Commit fc085288 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

simplify names

parent a4c6d7b8
......@@ -265,29 +265,31 @@ for(my $r=0; $r<=$#original_names; $r++){
undef $noGap;
my @equivalent_blast_hits; #Multiple BLAST hits with the same highest e-value/score
my @multipleequalblasthits;
if ( exists($template_NT->{$short_name}) && $template_NT->{$short_name} ne '' ){
@multipleequalblasthits = (@multipleequalblasthits, 'My_own_seq'); #To change for ?
print "\n\n$r done\n\nNucleotidic template for $r:\t$template_NT->{$short_name}\n\n**********************************************************************\n\n";
# First try nucleotide template
push @equivalent_blast_hits, 'My_own_seq';
print "\n\n$r done\n\nNucleotide template for $r:\t$template_NT->{$short_name}\n\n*******************************************************************\n\n";
}
elsif ( exists($template_AA->{$short_name}) && $template_AA->{$short_name} ne '' ){
@multipleequalblasthits = (@multipleequalblasthits, $template_AA->{$short_name});
print "\n\n$r done\n\nBlastP template for $r:\t$template_AA->{$short_name}\n\n**********************************************************************\n\n";
# Second try amino acid/BLASTp template
push @equivalent_blast_hits, $template_AA->{$short_name};
print "\n\n$r done\n\nBlastP template for $r:\t$template_AA->{$short_name}\n\n*******************************************************************\n\n";
}
else{
## --> send them to webblast
## else send them to webblast
$quiet = '-quiet=on ' if ( $r>0 ); #Show webblast parameters only for the first webblast run
launchWebblast("$cache/$date.seq2blast.fas.$r", $quiet, $local, $db, $species, $blast_exe, "$cache/$date.blastp${r}");
##Get the BLASTp hit(s) acc number
open(my $BLASTP, '<', "$cache/$date.blastp${r}");
while(<$BLASTP>){
@multipleequalblasthits = (@multipleequalblasthits, $1) if ( $_ =~ /^>\s*\d+@?\w?\w?\w?__([^\s\|\.]+).*$/ ); #Warning: double '_'
@equivalent_blast_hits = (@equivalent_blast_hits, $1) if ( $_ =~ /^>\s*\d+@?\w?\w?\w?__([^\s\|\.]+).*$/ ); #Warning: double '_'
}
close $BLASTP;
unlink("$cache/$date.blastp${r}", "$cache/$date.seq2blast.fas.$r") if ( $tmp == 0 || exists($multipleequalblasthits[0]) );
if ( !exists($multipleequalblasthits[0]) ){
unlink("$cache/$date.blastp${r}", "$cache/$date.seq2blast.fas.$r") if ( $tmp == 0 || exists($equivalent_blast_hits[0]) );
if ( !exists($equivalent_blast_hits[0]) ){
print "\n$r ...\tNo blast result found above filter thresholds for $fasta_header\n\n";
buildFailureOutputFiles($r, 'No_BLASTp_Result', 'Unavailable', '');
undef $original_seq[$r];
......@@ -302,8 +304,8 @@ for(my $r=0; $r<=$#original_names; $r++){
my ($resultPOS, $resultBOJ) = ('', '');
my @failureStatus = ('');
my @intronStatus = ('');
for(my $qq=0; $qq<=$#multipleequalblasthits; $qq++){
print "\n$fasta_header --> [$multipleequalblasthits[$qq]]\n";
for(my $qq=0; $qq<=$#equivalent_blast_hits; $qq++){
print "\n$fasta_header --> [$equivalent_blast_hits[$qq]]\n";
my @nt_GIs;
......@@ -320,19 +322,19 @@ for(my $r=0; $r<=$#original_names; $r++){
}
else{
#Prot ACC -> PUID
my $protGI = blastPAcc2PGI($multipleequalblasthits[$qq]);
my $protGI = blastPAcc2PGI($equivalent_blast_hits[$qq]);
if ( $protGI eq '' ){
print "\tNo protein (prot GI) link found for $multipleequalblasthits[$qq] in $fasta_header\n\n";
@failureStatus = ('PUI_unavailable', $multipleequalblasthits[$qq]);
print "\tNo protein (prot GI) link found for $equivalent_blast_hits[$qq] in $fasta_header\n\n";
@failureStatus = ('PUI_unavailable', $equivalent_blast_hits[$qq]);
next;
}
# print "\t$protGI\n";
#PUID -> nt GIs & GeneID
my ($ntGIs, $geneID) = protGI2NTGIs($protGI, $multipleequalblasthits[$qq]);
my ($ntGIs, $geneID) = protGI2NTGIs($protGI, $equivalent_blast_hits[$qq]);
if ( $ntGIs eq '' && $geneID eq '' ){
print "\tNo nucleotide (nt GIs) link found for $multipleequalblasthits[$qq] in $fasta_header\n\n";
@failureStatus = ('No_nt_link', $multipleequalblasthits[$qq]);
print "\tNo nucleotide (nt GIs) link found for $equivalent_blast_hits[$qq] in $fasta_header\n\n";
@failureStatus = ('No_nt_link', $equivalent_blast_hits[$qq]);
next;
}
print " linked with [$ntGIs $geneID]\n";
......@@ -345,8 +347,8 @@ for(my $r=0; $r<=$#original_names; $r++){
my @geneIDs = split(/,/, $geneID);
while(<@geneIDs>){
my $geneID = $_;
my ($chr, $amont, $aval) = geneID2Chr($geneID, $multipleequalblasthits[$qq]);
print "\n\tNo gene locus found for $multipleequalblasthits[$qq] with $geneID in $fasta_header\n\n" if ($chr eq '' and $geneID ne '');
my ($chr, $amont, $aval) = geneID2Chr($geneID, $equivalent_blast_hits[$qq]);
print "\n\tNo gene locus found for $equivalent_blast_hits[$qq] with $geneID in $fasta_header\n\n" if ($chr eq '' and $geneID ne '');
if ( $chr ne '' ){
#Get Chr seq
......@@ -360,18 +362,18 @@ for(my $r=0; $r<=$#original_names; $r++){
#Align Nt seq with our protein query seq
my ($POS, $BOJ) = runExonerate($exonerate_exe, $cache, $date, $r, $original_seq[$r], $fasta_header, $multipleequalblasthits[$qq], $r, @nt_GIs);
my ($POS, $BOJ) = runExonerate($exonerate_exe, $cache, $date, $r, $original_seq[$r], $fasta_header, $equivalent_blast_hits[$qq], $r, @nt_GIs);
my ($POSresult, $bestNucleotide) = prepareResults4CDS($POS, $multipleequalblasthits[$qq], $original_seq[$r], $fasta_header);
my ($POSresult, $bestNucleotide) = prepareResults4CDS($POS, $equivalent_blast_hits[$qq], $original_seq[$r], $fasta_header);
$resultPOS .= $POSresult if ( $bestNucleotide ne '' && $POSresult ne '' && $resultPOS !~ /_G_$bestNucleotide/ );
@intronStatus = ($POS->{0}, ${multipleequalblasthits[$qq]}, '') if ( $intronStep==1 && exists($POS->{0}) && $POS->{0} ne '' && $POS->{0} =~ /\:/ );
@intronStatus = ($POS->{0}, ${multipleequalblasthits[$qq]}, $bestNucleotide) if ( $intronStep==0 && ($bestNucleotide =~ /^N[CTWZG]_/ || $bestNucleotide =~ /^AC_/) );
@intronStatus = ($POS->{0}, ${equivalent_blast_hits[$qq]}, '') if ( $intronStep==1 && exists($POS->{0}) && $POS->{0} ne '' && $POS->{0} =~ /\:/ );
@intronStatus = ($POS->{0}, ${equivalent_blast_hits[$qq]}, $bestNucleotide) if ( $intronStep==0 && ($bestNucleotide =~ /^N[CTWZG]_/ || $bestNucleotide =~ /^AC_/) );
my ($intronLess, $nameLess) = ('', '');
$intronLess = $POS->{0} if ( %$BOJ eq 0 && exists($POS->{0}) && $POS->{0} ne '' );
$nameLess = $bestNucleotide if ( %$BOJ eq 0 && $intronStep==0 && ($bestNucleotide =~ /^N[CTWZG]_/ || $bestNucleotide =~ /^AC_/) );
my ($BOJresult, $bestGenomic) = prepareResults4BOJ($BOJ, $multipleequalblasthits[$qq], $original_seq[$r], $fasta_header, $intronLess, $nameLess);
my ($BOJresult, $bestGenomic) = prepareResults4BOJ($BOJ, $equivalent_blast_hits[$qq], $original_seq[$r], $fasta_header, $intronLess, $nameLess);
$resultBOJ .= $BOJresult if ( $bestGenomic ne '' && $BOJresult ne '' && $resultBOJ !~ /_G_$bestGenomic[ \-]/ );
# print " $intronStep $POS->{0} [@intronStatus] {$BOJresult} $bestNucleotide - $bestGenomic \t $BOJ->{0}\n";
}
......@@ -379,7 +381,7 @@ for(my $r=0; $r<=$#original_names; $r++){
if ( $resultPOS eq '' ){
buildFailureOutputFiles($r, $failureStatus[1], $failureStatus[0], '') if ( $failureStatus[0] ne '' );
buildFailureOutputFiles($r, $multipleequalblasthits[0], 'Failed_Aln', '') if ( $failureStatus[0] eq '' );
buildFailureOutputFiles($r, $equivalent_blast_hits[0], 'Failed_Aln', '') if ( $failureStatus[0] eq '' );
next;
}
elsif ( $resultBOJ eq '' ){
......@@ -390,7 +392,7 @@ for(my $r=0; $r<=$#original_names; $r++){
buildIntronlessBOJOutputFile($intronStatus[1], $intronStatus[0], $intronStatus[2], $fasta_header, $original_seq[$r]);
}
else{
buildFailureBOJOutputFile($multipleequalblasthits[0], 'Unavailable', $fasta_header, $original_seq[$r]);
buildFailureBOJOutputFile($equivalent_blast_hits[0], 'Unavailable', $fasta_header, $original_seq[$r]);
}
createCDSOutputFile($resultPOS, $original_seq[$r], $fasta_header);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment