Commit 1e1122f7 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Big bug fixes: remove useless LOG file + proper e-mail address for eutils +...

Big bug fixes: remove useless LOG file + proper e-mail address for eutils + Allow download of entries with only fasta header and no sequence associated + date fixing + correct wrong loop id + better checkExternalSoftware fct
parent 1145262a
......@@ -21,7 +21,7 @@ use LWP::Simple; # To test gigablaster availability
use Mail::Send; # Send warnings and errors files by e-mail ==> only if the $userEMail variable is defined
use lib '/mnt/common/share/ProtoGene/'; # Local path for ProtoGene's own perl modules
###use lib '/mnt/local/lib/tcoffee_perl/';
use lib '/mnt/local/lib/tcoffee_perl/';
use Exonerate; # Exonerate runner, parser, ...
use Views; # Non-text outputs, e.g. HTML/CSS
#use CheckOutput; # Check output for cds consistancy with query
......@@ -32,7 +32,6 @@ use Views; # Non-text outputs, e.g. HTML/CSS
$ENV{'PATH'} .= ':/mnt/local/bin/:./'; # Additional path for executables
my $cachePath = '/scratch/cluster/monthly/t_coffee/ProtoGene_Cache'; # Cache directory
$cachePath = '/Users/smoretti/Documents/ProtoGene/TTMMPP'; #FIXME
my $cacheStorageTime = 15; # Do not update sequences younger than X days
my $userEMail = 'moretti.sebastien@gmail.com'; # To receive e-mails with encountered problems; leave blank to inactive
......@@ -58,7 +57,7 @@ my $blast_param = { 'evalue' => 0.05,
my $VERSION = '4.0.7';
my $webblast_exe = 'webblast.pl';
my $webblast_exe = '/mnt/common/share/ProtoGene/webblast.pl';
my $blast_exe = 'blastall'; # Or wu-blastall for Wu-BLAST; for local blast usage
my $exonerate_exe = 'exonerate-1.0'; # Exonerate 1.0 because current parser only works with this version
......@@ -233,7 +232,7 @@ if ( $giga==1 ){
# Temporary file extension
my $date = sprintf("%d-%02d-%02d_%02dh%02d", localtime->year() + 1900, localtime->mon(), localtime->mday(), localtime->hour, localtime->min);
my $date = sprintf("%d-%02d-%02d_%02dh%02d", localtime->year() + 1900, localtime->mon() + 1, localtime->mday(), localtime->hour, localtime->min);
......@@ -262,7 +261,6 @@ unlink("$originalMSA.cds", "$originalMSA.cdsP", "$originalMSA.cdsP.html",
# Build the sequences, from the alignment, to perform blast search
open(my $LOG, '>', "$date.log") or die "\n\tCannot open LOG file\n\n";
EACH_SEQ:
for(my $r=0; $r<=$#original_names; $r++){
my $fasta_header = $original_names[$r];
......@@ -411,7 +409,7 @@ for(my $r=0; $r<=$#original_names; $r++){
else {
buildFailureOutputFiles($r, $equivalent_blast_hits[0], 'Alignment_failure');
}
next HIT_LINK;
next EACH_SEQ;
}
elsif ( $resultBOJ eq '' ){
if ( $failureStatus[0] eq 'PUI_unavailable' || $failureStatus[0] eq 'No_nt_link' ){
......@@ -434,8 +432,6 @@ for(my $r=0; $r<=$#original_names; $r++){
undef $original_seq[$r];
undef $original_names[$r];
}
close $LOG;
unlink "$date.log" if ( -z "$date.log" );
checkAndCleanStderrFiles("$cache/$date.ExonerateError") if ( -e "$cache/$date.ExonerateError" );
......@@ -595,7 +591,7 @@ sub failure {
# Check external programs presence in the PATH
sub checkExternalSoftware {
for my $exe ( @_ ){
if ( ! which($exe) ){
if ( ! which($exe) && !-x $exe ){
failure();
print {*STDERR} "\t'$exe' program is not reachable\n\tIt could not be in your PATH or not installed\n\n";
exit(1);
......@@ -835,13 +831,13 @@ sub fetch {
XML:
for (my $tries=0; $tries <20; $tries++ ){
my $content = get($url);
print "[$content]\n\n" if ($debug);
print {*STDERR} "[$content]\n\n" if ($debug);
if ( defined $content ){
next XML if ( $content =~ /<ERROR>/i );
return $content;
}
}
print {*STDERR} "Problem with NCBI eutils, please try again later\n";
print "ERROR: Problem with NCBI eutils, please try again later\n";
exit(40);
}
......@@ -861,7 +857,7 @@ sub fetch_fasta {
my $counter = 0;
my $lines = 0;
while(<$FASTA>){
$lines++ if ( $_ !~ /^>/ );
# $lines++ if ( $_ !~ /^>/ ); #Allow download of entries with only fasta header and no sequence associated
$counter = $counter+2 if ( $counter==1 && $_ !~ /^\w/ && $lines==1 && $_ !~ /^>/ );
$counter++ if ( $_ =~ /^>/ );
$counter = $counter+2 if ( $_ !~ /^>/ && ($_ =~ /Error:/ || $_ =~ /[<>]/) );
......@@ -871,8 +867,9 @@ sub fetch_fasta {
return 1 if ( $counter == 1 );
}
}
print {*STDERR} "[[$url]]\n" if ( $debug );
unlink("$outfile");
print {*STDERR} "Problem with NCBI eutils, please try again later.\n";
print "ERROR: Problem with NCBI eutils, please try again later.\n";
exit(45);
}
......@@ -1013,7 +1010,7 @@ sub download_seq{
}
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.c", "$cache/$acc[$a]:$from-$to.fas");
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]:$from-$to.fas");
}
else{
fetch_fasta("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a].fas");
......@@ -1073,7 +1070,7 @@ sub runExonerate{
my $targetNT = $gis[$b];
$targetNT = 'gi|'.$targetNT.'|' if ( $gis[$b] =~ /^\d+$/ );
print "\n@@ -> $b ... $targetNT\n" if ( $debug );
print {*STDERR} "\n@@ -> $b ... $targetNT\n" if ( $debug );
# Remove exonerate Error file if it has failed to align protein and nucleotide sequences
......@@ -1114,22 +1111,27 @@ sub testPositions{
%positions = (%positions, '0' => $currentTarget);
my @clefs = sort({$a <=> $b} keys(%positions));
foreach (@clefs) {
print "$_\t$positions{$_}\n" if ( $debug && $_>0 );
print {*STDERR} "$_\t$positions{$_}\n" if ( $debug && $_>0 );
}
my @cles = sort({$a <=> $b} keys(%$best_pos));
if ( !exists($cles[0]) ){
print {*STDERR} keys(%positions), " 1\n";
return(\%positions);
}
else{
print {*STDERR} keys(%positions), " 2\n";
return(\%positions) if ( exists($cles[0]) && $#cles < $#clefs );
print {*STDERR} keys(%$best_pos), " 3\n";
return($best_pos) if ( exists($cles[0]) && $#cles >= $#clefs );
}
}
elsif ( %$best_pos ne 0 ){
print {*STDERR} keys(%$best_pos), " 4\n";
return($best_pos);
}
else{
print {*STDERR} keys(%positions), " 5\n";
return(\%positions);
}
}
......@@ -1143,7 +1145,7 @@ sub testBOJ{
my @clefs = sort({$a <=> $b} keys(%positBOJ));
while(my ($x, $y) = each(%positBOJ) ){
print "pos:$x -> $y\n" if ( $debug && $x>0 );
print {*STDERR} "pos:$x -> $y\n" if ( $debug && $x>0 );
}
my @cles = sort({$a <=> $b} keys(%$bestBOJ));
......
......@@ -148,6 +148,7 @@ my %Strain = ('biogroup' => '',
'plasmid' => '',
'proteobacterium' => '',
'pv.' => '',
'serotype' => '',
'serovar' => '',
'sp.' => '',
'ssp.' => '',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment