Commit 52b1fdad authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Implement a single fetch fasta method using LWP::Simple

parent 25692be6
......@@ -363,7 +363,7 @@ for(my $r=0; $r<=$#original_names; $r++){
#Get transcript and contig seq
@nt_GIs = split(/,/, $ntGIs);
downloadSeqFromGIs($cache, $date, '', '', @nt_GIs);
downloadSeqFromGIs($cache, '', '', @nt_GIs);
#GeneID -> Chr
my @geneIDs = split(/,/, $geneID);
while(<@geneIDs>){
......@@ -373,7 +373,7 @@ for(my $r=0; $r<=$#original_names; $r++){
if ( $chr ne '' ){
#Get Chr seq
download_seq($cache, $date, $amont, $aval, $chr);
download_seq($cache, $amont, $aval, $chr);
$intronStep = 1;
@nt_GIs = ("$chr:$amont-$aval", @nt_GIs);
......@@ -829,14 +829,39 @@ sub fetch {
my $content = get($url);
return $content if defined $content;
}
print {*STDERR} "Problem with NCBI eutils, please try again later/n";
exit(4);
print {*STDERR} "Problem with NCBI eutils, please try again later\n";
exit(40);
}
sub fetch_fasta {
my ($url, $outfile) = @_;
return 1 if ( -s "$outfile" && $cache eq 'update' && -M "$outfile" <= $cacheStorageTime );
GET_FASTA:
for (my $tries=0; $tries <20; $tries++ ){
my $HTTP_status = getstore($url, $outfile);
if ( is_error($HTTP_status) ){
next GET_FASTA;
}
else {
open(my $FASTA, '<', "$outfile");
my $counter = 0;
my $lines = 0;
while(<$FASTA>){
$lines++ if ( $_ !~ /^>/ );
$counter = $counter+2 if ( $counter==1 && $_ !~ /^\w/ && $lines==1 && $_ !~ /^>/ );
$counter++ if ( $_ =~ /^>/ );
$counter = $counter+2 if ( $_ !~ /^>/ && ($_ =~ /Error:/ || $_ =~ /[<>]/) );
}
close $FASTA;
return 1 if ( $counter == 1 );
}
}
unlink("$outfile");
print {*STDERR} "Problem with NCBI eutils, please try again later.\n";
exit(45);
}
......@@ -963,58 +988,15 @@ sub geneID2Chr{
}
sub downloadSeqFromGIs{
my ($cache, $date, $amont, $aval, @acc) = @_;
my ($cache, $amont, $aval, @acc) = @_;
#FIXME: general fct for fetching fasta seq
GET_SEQ:
for(my $a=0; $a<=$#acc; $a++){
my $whatNumber = 0;
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
GET_CHR_SEQ:
for(my $rep=0; $rep <= 4; $rep++){
system("wget -q -O $cache/$acc[$a]-$amont.fas 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch'") if ( !-e "$cache/$acc[$a]-$amont.fas" || -z "$cache/$acc[$a]-$amont.fas" || $cache eq 'none' || ($cache eq 'update' && -M "$cache/$acc[$a]-$amont.fas" > $cacheStorageTime ) );
open(my $CIBLE, '<', "$cache/$acc[$a]-$amont.fas");
my $counter = 0;
my $lines = 0;
CHECK_CHR_SEQ:
while(<$CIBLE>){
$lines++ if ( $_ !~ /^>/ );
$counter = $counter+2 if ( $counter==1 && $_ !~ /^\w/ && $lines==1 && $_ !~ /^>/ );
$counter++ if ( $_ =~ /^>/ );
$counter = $counter+2 if ( $_ !~ /^>/ && ($_ =~ /Error:/ || $_ =~ /[<>]/) );
# last CHECK_CHR_SEQ if ( $_ !~ /^>/ && $counter==1 && $_ =~ /^\w/ );
}
close $CIBLE;
$whatNumber++;
last GET_CHR_SEQ if ( $whatNumber==20 );
if ( $counter != 1 ){
$rep = $rep-1;
unlink("$cache/$acc[$a]-$amont.fas");
}
}
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]-$amont.fas") ;
}
else{
GET_OTHER_SEQ:
for(my $rep=0; $rep <= 4; $rep++){
system("wget -q -O $cache/$acc[$a].fas 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch'") if ( !-e "$cache/$acc[$a].fas" || -z "$cache/$acc[$a].fas" || $cache eq 'none' || ($cache eq 'update' && -M "$cache/$acc[$a].fas" > $cacheStorageTime ) );
open(my $CIBLE, '<', "$cache/$acc[$a].fas");
my $counter = 0;
my $lines = 0;
CHECK_OTHER_SEQ:
while(<$CIBLE>){
$lines++ if ( $_ !~ /^>/ );
$counter = $counter+2 if ( $counter==1 && $_ !~ /^\w/ && $lines==1 && $_ !~ /^>/ );
$counter++ if ( $_ =~ /^>/ );
$counter = $counter+2 if ( $_ !~ /^>/ && ($_ =~ /Error:/ || $_ =~ /[<>]/) );
}
close $CIBLE;
$whatNumber++;
last GET_OTHER_SEQ if ( $whatNumber==20 );
if ( $counter != 1 ){
$rep=$rep-1;
unlink("$cache/$acc[$a].fas");
}
}
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch'", "$cache/$acc[$a].fas");
}
}
......@@ -1022,7 +1004,7 @@ sub downloadSeqFromGIs{
}
sub download_seq{
my ($cache, $date, $amont, $aval, @acc) = @_;
my ($cache, $amont, $aval, @acc) = @_;
my $cp = 0;
my $from = $amont;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment