Commit eb2753fd authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Start cleaning failed output because of blast failure

parent 446fe9d3
......@@ -136,8 +136,7 @@ cacheManagement('old', $cachePath, 1);
################## Check external software in the PATH
#TODO: remove that for production
#checkExternalSoftware($webblast_exe, $exonerate_exe);
checkExternalSoftware($webblast_exe, $exonerate_exe);
if ( $local ){
checkExternalSoftware($blast_exe);
}
......@@ -306,11 +305,10 @@ for(my $r=0; $r<=$#original_names; $r++){
push @equivalent_blast_hits, $1 if ( $_ =~ /^>\s*\d+@?\w?\w?\w?__([^\s\|\.]+).*$/ ); #Warning: double '_'
}
close $BLASTP;
exit;
unlink("$cache/$date.blastp.$r", "$cache/$date.seq2blast.fas.$r") if ( $tmp == 0 || exists($equivalent_blast_hits[0]) );
if ( !exists($equivalent_blast_hits[0]) ){
print "\n$r ...\tNo blast result found above thresholds for $fasta_header\n\n";
buildFailureOutputFiles($r, 'No_BLASTp_Result', 'Unavailable', '');
buildFailureOutputFiles($r, 'No_BLASTp_Result', 'Not_searched', '');
undef $original_seq[$r];
undef $original_names[$r];
next EACH_SEQ;
......@@ -319,7 +317,7 @@ for(my $r=0; $r<=$#original_names; $r++){
#When multiple equal blast hits ==> use, and add, every hits
#When multiple equivalent blast hits ==> use, and add, every hits
my ($resultPOS, $resultBOJ) = ('', '');
my @failureStatus = ('');
my @intronStatus = ('');
......@@ -386,8 +384,8 @@ for(my $r=0; $r<=$#original_names; $r++){
my ($POSresult, $bestNucleotide) = prepareResults4CDS($POS, $equivalent_blast_hits[$qq], $original_seq[$r], $fasta_header);
$resultPOS .= $POSresult if ( $bestNucleotide ne '' && $POSresult ne '' && $resultPOS !~ /_G_$bestNucleotide/ );
@intronStatus = ($POS->{0}, ${equivalent_blast_hits[$qq]}, '') if ( $intronStep==1 && exists($POS->{0}) && $POS->{0} ne '' && $POS->{0} =~ /\:/ );
@intronStatus = ($POS->{0}, ${equivalent_blast_hits[$qq]}, $bestNucleotide) if ( $intronStep==0 && ($bestNucleotide =~ /^N[CTWZG]_/ || $bestNucleotide =~ /^AC_/) );
@intronStatus = ($POS->{0}, $equivalent_blast_hits[$qq], '') if ( $intronStep==1 && exists($POS->{0}) && $POS->{0} ne '' && $POS->{0} =~ /\:/ );
@intronStatus = ($POS->{0}, $equivalent_blast_hits[$qq], $bestNucleotide) if ( $intronStep==0 && ($bestNucleotide =~ /^N[CTWZG]_/ || $bestNucleotide =~ /^AC_/) );
my ($intronLess, $nameLess) = ('', '');
$intronLess = $POS->{0} if ( %$BOJ eq 0 && exists($POS->{0}) && $POS->{0} ne '' );
......@@ -399,8 +397,8 @@ for(my $r=0; $r<=$#original_names; $r++){
if ( $resultPOS eq '' ){
buildFailureOutputFiles($r, $failureStatus[1], $failureStatus[0], '') if ( $failureStatus[0] ne '' );
buildFailureOutputFiles($r, $equivalent_blast_hits[0], 'Failed_Aln', '') if ( $failureStatus[0] eq '' );
buildFailureOutputFiles($r, $failureStatus[1], $failureStatus[0], '') if ( $failureStatus[0] ne '' );
buildFailureOutputFiles($r, $equivalent_blast_hits[0], 'Alignment_failure', '') if ( $failureStatus[0] eq '' );
next;
}
elsif ( $resultBOJ eq '' ){
......@@ -425,11 +423,13 @@ for(my $r=0; $r<=$#original_names; $r++){
undef $original_names[$r];
}
close $LOG;
unlink "$date.log" if ( -z "$date.log" );
checkAndCleanStderrFiles("$cache/${date}_ExonerateError") if ( -e "$cache/${date}_ExonerateError" );
checkOutputFiles();
unlink("$msa") if ( $converted==1 );
unlink 'blast_result.txt';
my @tmpFiles = glob($cache."/${date}*");
if ( exists($tmpFiles[0]) ){
mkdir("$cache/${date}_TMP");
......@@ -698,16 +698,16 @@ sub fastaHeaders4ProtoGene{
my ($QueryName) = @_;
$QueryName =~ s/ +/ /g;
my ($cqacc, $cqdesc) = ($QueryName, $QueryName);
$cqacc =~ s/^ *([^ ]*) .*$/$1/;
$cqacc =~ s/[\s\t]*$//g;
$cqacc = $cqacc.'_G_@@';
$cqdesc =~ s/^ *[^ ]* *(.*)$/$1/;
$cqdesc =~ s/^[\| \.]*//;
$cqdesc =~ s/[\|\. ]*$//;
$QueryName = $cqacc.$cqdesc;
return($QueryName);
my ($acc, $desc) = ($QueryName, $QueryName);
$acc =~ s/^ *([^ ]*) .*$/$1/;
$acc =~ s/\s+$//g;
$acc = $acc.'_G_@@';
$desc =~ s/^ *[^ ]* *(.*)$/$1/;
$desc =~ s/^[\| \.]+//;
$desc =~ s/[\|\. ]+$//;
return($acc.$desc);
}
#Prepare Fasta Header to put it on the fasta header of the result outputs
......@@ -736,14 +736,15 @@ sub getFastaHeaderAnnot{
sub reverse_trad{
my ($aa) = @_;
my %reverse_code=('A' => 'GCN', 'F' => 'TTy', 'K' => 'AAr', 'P' => 'CCN', 'T' => 'ACN',
'C' => 'TGy', 'G' => 'GGN', 'L' => 'yTN', 'Q' => 'CAr', 'V' => 'GTN',
'D' => 'GAy', 'H' => 'CAy', 'M' => 'ATG', 'R' => 'mGN', 'W' => 'TGG',
'E' => 'GAr', 'I' => 'ATh', 'N' => 'AAy', 'S' => 'wsN', 'Y' => 'TAy',
'X' => 'NNN', '-' => '---', '*' => 'Trr'); # '*' is for stop codon
my %reverse_code = ('A' => 'GCN', 'F' => 'TTy', 'K' => 'AAr', 'P' => 'CCN', 'T' => 'ACN',
'C' => 'TGy', 'G' => 'GGN', 'L' => 'yTN', 'Q' => 'CAr', 'V' => 'GTN',
'D' => 'GAy', 'H' => 'CAy', 'M' => 'ATG', 'R' => 'mGN', 'W' => 'TGG',
'E' => 'GAr', 'I' => 'ATh', 'N' => 'AAy', 'S' => 'wsN', 'Y' => 'TAy',
'X' => 'NNN', '-' => '---', '*' => 'Trr',
); # '*' is for stop codon
my $triplet = 'nnn';
while( my ($x,$y) =each(%reverse_code) ){
while( my ($x, $y) =each(%reverse_code) ){
if ( uc($aa) eq $x ){
$triplet = $y;
last;
......@@ -1377,8 +1378,8 @@ sub buildBOJseq{
sub createCDSOutputFile{
my ($CDSresultat, $original_seq, $original_name) = @_;
open(my $CDS, '>>', "${originalMSA}.cds");
open(my $CDSP, '>>', "${originalMSA}.cdsP") if ( $pep==1 );
open(my $CDS, '>>', "$originalMSA.cds");
open(my $CDSP, '>>', "$originalMSA.cdsP") if ( $pep==1 );
$CDSresultat =~ s/ +/ /g;
$CDSresultat =~ s/MATCHES_ON for >/MATCHES_ON for /;
......@@ -1408,7 +1409,7 @@ sub createBOJOutputFile{
my ($BOJresultat, $original_seq) = @_;
$BOJresultat =~ s/ +/ /g;
open(my $BOJ, '>>', "${originalMSA}.boj");
open(my $BOJ, '>>', "$originalMSA.boj");
print {$BOJ} $BOJresultat;
close $BOJ;
return;
......@@ -1421,11 +1422,11 @@ sub buildIntronlessBOJOutputFile{
my $readyname = '';
$readyname = fastaHeaders4ProtoGene($original_name);
$NTstatus =~ s/\:.+$//;
$readyname =~ s/_G_@@/_G_${NTstatus}-IntronLess _S_ $BLASTstatus _DESC_ / if ( $NTname eq '' );
$readyname =~ s/_G_@@/_G_${NTname}-IntronLess _S_ $BLASTstatus _DESC_ / if ( $NTname ne '' );
$readyname =~ s/_G_@@/_G_$NTstatus-IntronLess _S_ $BLASTstatus _DESC_ / if ( $NTname eq '' );
$readyname =~ s/_G_@@/_G_$NTname-IntronLess _S_ $BLASTstatus _DESC_ / if ( $NTname ne '' );
$readyname =~ s/(.)$/$1 MATCHES_ON $annot/ if ( $annot ne '' );
$readyname =~ s/ +/ /g;
open(my $BOJ, '>>', "${originalMSA}.boj");
open(my $BOJ, '>>', "$originalMSA.boj");
print {$BOJ} ">$readyname\n$original_seq\n";
close $BOJ;
return;
......@@ -1434,29 +1435,20 @@ sub buildIntronlessBOJOutputFile{
sub buildFailureOutputFiles{
my ($order, $BLASTstatus, $NTstatus, $annot) = @_;
my $oriname = $original_names[$order];
my $readyname = '';
open(CDS, '>>', "${originalMSA}.cds");
open(CDSP, '>>', "${originalMSA}.cdsP") if ( $pep==1 );
open(OUT, '>>', "${originalMSA}.out");
open(BOJ, '>>', "${originalMSA}.boj");
if ( $BLASTstatus eq 'No_BLASTp_Result' ){
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
elsif ( $NTstatus eq 'PUI_unavailable' ){
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
elsif ( $NTstatus eq 'No_nt_link' ){
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
elsif ( $NTstatus eq 'Failed_Aln' ){
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
if ( $BLASTstatus eq 'No_BLASTp_Result' || $NTstatus eq 'PUI_unavailable' || $NTstatus eq 'No_nt_link' || $NTstatus eq 'Alignment_failure' ){
open(CDS, '>>', "$originalMSA.cds");
open(CDSP, '>>', "$originalMSA.cdsP") if ( $pep==1 );
open(OUT, '>>', "$originalMSA.out");
open(BOJ, '>>', "$originalMSA.boj");
revtransBuilding($original_names[$order], $order, $BLASTstatus, $NTstatus, '', '');
close CDS;
close CDSP if ( $pep==1 );
close OUT;
close BOJ;
}
close CDS;
close CDSP if ( $pep==1 );
close OUT;
close BOJ;
return;
}
......@@ -1478,27 +1470,29 @@ sub revtransBuilding{
$readyname = fastaHeaders4ProtoGene($oriname);
$readyname =~ s/_G_@@/_G_$NTstatus _S_ $BLASTstatus _DESC_ /;
$readyname =~ s/(.)$/$1 MATCHES_ON $annot/ if ($annot ne '');
$readyname .= " MATCHES_ON $annot" if ($annot ne '');
$readyname =~ s/ +/ /g;
print OUT "$readyname\n";
my $final_seq = '';
if ( $revtrans==1 ){
$final_seq = '';
my $final_seq = '';
for(my $w=0; $w < length($original_seq[$order]); $w++){
my $aa = substr($original_seq[$order], $w, 1);
$final_seq = $final_seq.reverse_trad($aa);
}
$readyname =~ s/_G_$NTstatus _S_ $BLASTstatus /_G_revtrans /;
my $CDSreformatedSeq = ${final_seq}."---\n";
my $CDSreformatedSeq = $final_seq."---\n";#FIXME: For stop codon ???
$CDSreformatedSeq =~ s/([^\n]{60})/$1\n/g;
# print CDS "$readyname\n${final_seq}---\n";
chomp $CDSreformatedSeq if ( $CDSreformatedSeq =~ /\n+$/ );
print CDS "$readyname\n$CDSreformatedSeq";
if ( $pep==1 ){
my $peptide = $original_seq[$order];
$peptide =~ s/(.)/$1--/g;
print CDSP "$readyname\n$final_seq---\n$original_names[$order]\n$peptide\n";
}
}
if ( $pep==1 && $revtrans==1 ){
my $peptide = $original_seq[$order];
$peptide =~ s/(.)/${1}--/g;
print CDSP "$readyname\n${final_seq}---\n$original_names[$order]\n$peptide\n";
}
return;
}
......@@ -1511,7 +1505,7 @@ sub checkAndCleanStderrFiles{
my ($ExonerateStderrFiles) = @_;
my $body = '';
if ( -e "$ExonerateStderrFiles" && -s "$ExonerateStderrFiles"){
if ( -s "$ExonerateStderrFiles" ){
my %uniq;
open(my $ERRSTD, '<', "$ExonerateStderrFiles");
while(<$ERRSTD>){
......@@ -1600,7 +1594,7 @@ sub checkTemplate {
my $nuclTarget = $name;
$nuclTarget =~ s/^.*_G_ *([^ ]+)/$1/;
$nuclTarget = '' if ( $nuclTarget !~ /^[\w]+$/ || $nuclTarget eq 'No_nt_link' || $nuclTarget =~ /navailable$/ || $nuclTarget eq 'Failed_Aln' );
$nuclTarget = '' if ( $nuclTarget !~ /^[\w]+$/ || $nuclTarget eq 'No_nt_link' || $nuclTarget =~ /Not_searched/ || $nuclTarget =~ /navailable$/ || $nuclTarget eq 'Alignment_failure' );
$nucleotideTarget{$realSeqName} = $nuclTarget;
$nucleotideSeq{$realSeqName} = '';
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment