Commit 3cf559b9 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

All download use LWP::Simple now in a more consistent & centralized way

parent e21fc09f
......@@ -331,7 +331,7 @@ for(my $r=0; $r<=$#original_names; $r++){
# Get Nt sequence from template
if ( exists($template_NT->{$short_name}) && $template_NT->{$short_name} ne '' ){
if ( $template_NT->{$short_name} !~ /^My_Seq$/i ){
download_seq($cache, $date, '', '', $template_NT->{$short_name});
download_seq($cache, '', '', $template_NT->{$short_name});
@nt_GIs = $template_NT->{$short_name};
}
else {
......@@ -374,10 +374,10 @@ for(my $r=0; $r<=$#original_names; $r++){
my ($chr, $amont, $aval) = geneID2Chr($geneID, $equivalent_blast_hits[$qq]);
print "\n\tNo gene locus found for $equivalent_blast_hits[$qq] with $geneID in $fasta_header\n\n" if ($chr eq '' and $geneID ne '');
#Get Chr seq
if ( $chr ne '' ){
#Get Chr seq
download_seq($cache, $amont, $aval, $chr);
$intronStep = 1;
$intronStep = 1; #FIXME: useful ???
@nt_GIs = ("$chr:$amont-$aval", @nt_GIs);
}
......@@ -1010,58 +1010,11 @@ sub download_seq{
}
}
my $whatNumber = 0;
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
CHECK_CHR_DOWN:
for(my $rep=0; $rep <= 4; $rep++){
if ( !-e "$cache/$acc[$a]:$from-$to.fas" || -z "$cache/$acc[$a]:$from-$to.fas" ||
$cache eq 'none' || ($cache eq 'update' && -M "$cache/$acc[$a]:$from-$to.fas" > $cacheStorageTime) ){
getstore("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]:$from-$to.fas");
}
my $counter = 0;
my $lines = 0;
open(my $CIBLE, '<', "$cache/$acc[$a]:$amont-$aval.fas");
while(<$CIBLE>){
$lines++ if ( $_ !~ /^>/ );
$counter = $counter+2 if ( $counter==1 && $_ !~ /^\w/ && $lines==1 && $_ !~ /^>/ );
$counter++ if ( $_ =~ /^>/ );
$counter = $counter+2 if ( $_ !~ /^>/ && ($_ =~ /Error:/ || $_ =~ /[<>]/) );
}
close $CIBLE;
$whatNumber++;
last CHECK_CHR_DOWN if ( $whatNumber==20 );
if ( $counter != 1 ){
$rep = $rep-1;
unlink("$cache/$acc[$a]:$amont-$aval.fas");
}
}
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.c", "$cache/$acc[$a]:$from-$to.fas");
}
else{
CHECK_OTHER_DOWN:
for(my $rep=0; $rep <= 4; $rep++){
if ( !-e "$cache/$acc[$a].fas" || -z "$cache/$acc[$a].fas" || $cache eq 'none' ||
($cache eq 'update' && -M "$cache/$acc[$a].fas" > $cacheStorageTime) ){
getstore("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a].fas");
}
my $counter = 0;
my $lines = 0;
open(my $CIBLE, '<', "$cache/$acc[$a].fas");
while(<$CIBLE>){
$lines++ if ( $_ !~ /^>/ );
$counter = $counter+2 if ( $counter==1 && $_ !~ /^\w/ && $lines==1 && $_ !~ /^>/ );
$counter++ if ( $_ =~ /^>/ );
$counter = $counter+2 if ( $_ !~ /^>/ && ($_ =~ /Error:/ || $_ =~ /[<>]/) );
}
close $CIBLE;
$whatNumber++;
last CHECK_OTHER_DOWN if ( $whatNumber==20 );
if ( $counter != 1 ){
$rep = $rep-1;
unlink("$cache/$acc[$a].fas");
}
}
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a].fas");
}
#FIXME: Don't remember exactly what all this function does
......@@ -1073,8 +1026,8 @@ sub download_seq{
unlink("$cache/$acc[$a]:$from-$to.fas") if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ );
unlink("$cache/$acc[$a].fas") if ( $amont !~ /^\d+$/ || $aval !~ /^\d+$/ );
# $amont = '';
$aval = $aval-5000 if ( $aval =~ /^\d+$/ );
$a = $a-1;
$aval = $aval - 5000 if ( $aval =~ /^\d+$/ );
$a = $a - 1;
$cp++;
}
}
......
- sort boj pos output
- Exonerate 2+ ...
- ssearch instead of blast to be more sensitive ?
- add "respect case for seq name between in0 and in1"
......@@ -8,10 +10,8 @@
- Re-compare translated nucleotides in the result file with original input prot seq
- Taint & mask path in log files
- ProtoG recree le cache if no more here
- LWP::Simple +tot que wget
- undef no more used var more frequently
- PUI unavailable => pass by another thing: UniProt, PICR, ... ?
- fetch instead of wget from NCBI
- Check seq length to optimize windows size in blast, for short seq only
- blast filter: % ID + % couverture seulement
- fastacmd vs fetch ?
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment