Commit 80d1c9ae authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Start cleaning fct calls

parent 17bb387d
......@@ -10,6 +10,8 @@
use strict;
use warnings;
use diagnostics;
use Carp;
use Env qw(HOME); # Use only environmental (shell) HOME variable
use File::Basename;
use File::Copy qw(move); # Avoid external 'mv' command usage
......@@ -50,10 +52,10 @@ my $date = `/usr/bin/env date +%y-%m-%d_%T`; #Used to get a unique file
chomp($date);
$date =~ s/\:/\-/g;
&checkProgramsPresence($webblast_exe, $exonerate); # Check external programs presence in the PATH
checkProgramsPresence($webblast_exe, $exonerate); # Check external programs presence in the PATH
$blast_exe =~ s/${blast_prog}$//;
&checkCacheAccessibility($cachedir); # Check cache directory accessibility
checkCacheAccessibility($cachedir); # Check cache directory accessibility
......@@ -97,12 +99,12 @@ if ( $blast_exe eq '' && $version==0 && $help==0 ){
}
#Cache management
my $cache = &cacheManagement($Cache) || '.';
my $cache = cacheManagement($Cache) || '.';
#Open and check template file
my ($templG, $templP, $templSeq);
($templG, $templP, $templSeq) = &checkTemplate($template) if ($template);
($templG, $templP, $templSeq) = checkTemplate($template) if ($template);
#Short help message
......@@ -141,7 +143,7 @@ if ( $msa eq '' || $species eq '' || $db eq '' || $cache eq '' ){
##Open and Check the fasta file
my $originalMSA = $msa;
my $change = 0;
($msa, $change) = &checkFastaFile($msa, $change);
($msa, $change) = checkFastaFile($msa, $change);
my $fasta_checker = -1;
my (@input_order, @original_names, @original_seq); #Use 3 lists in parallel : original_names, seq, order(1->n)
open( my $MSA, '<', "$msa" ) if ( -e "$msa" );
......@@ -173,12 +175,12 @@ while(<$MSA>){
}
close $MSA;
if ( $fasta_checker == -1 ){
&failure;
failure();
print {*STDERR} "\tThe MSA file does NOT seem to be a protein FASTA format\n\n";
exit(1);
}
elsif ( $lim >0 && $fasta_checker > $lim ){
&failure;
failure();
print {*STDERR} "\tThe FASTA file is too large, try with less than $lim sequences\n\tor split your file\n\n";
exit(1);
}
......@@ -191,7 +193,7 @@ elsif ( exists( $original_seq[0] ) && $original_seq[0] =~ /[acgtu]/i ){
my $t = ($first_seq =~ s/[tTuU]//g) || 0;
my $non = ($first_seq =~ s/[^aAcCgGtTuUXxNn-]//g) || 0;
if ( ($a+$c+$g+$t) >= (($a+$c+$g+$t+$non)*80/100) ){
&failure();
failure();
print {*STDERR} "\tYour sequences seem already to be nucleotides\n\tthis program purpose is to turn AMINO ACID alignments into CDS nucleotide alignments\n\n";
exit(1);
}
......@@ -236,7 +238,7 @@ unlink("${originalMSA}.boj");
# Remove old files from the cache directory
&cacheManagement('old', '1'); #everytime ProtoGene is running
cacheManagement('old', '1'); #everytime ProtoGene is running
......@@ -270,7 +272,7 @@ for(my $r=0; $r<=$#input_order; $r++){
else{
## --> send them to webblast
$quiet = '-quiet=on ' if ( $r>0 ); #Show webblast parameters only for the first webblast run
&launchWebblast("$cache/${date}_seq2blast.fas${r}", $quiet, $athome, $db, $species, $blast_exe, "$cache/${date}.blastp${r}");
launchWebblast("$cache/${date}_seq2blast.fas${r}", $quiet, $athome, $db, $species, $blast_exe, "$cache/${date}.blastp${r}");
##Get the BLASTp hit(s) acc number
open(my $BLASTP, '<', "${cache}/${date}.blastp${r}");
......@@ -281,7 +283,7 @@ for(my $r=0; $r<=$#input_order; $r++){
unlink("$cache/${date}.blastp${r}", "${cache}/${date}_seq2blast.fas${r}") if ( $tmp == 0 || exists($multipleequalblasthits[0]) );
if ( !exists($multipleequalblasthits[0]) ){
print "\n$input_order[$r] ...\tNo blast result found above filter thresholds for $right_name\n\n";
&buildFailureOutputFiles($r, 'No_BLASTp_Result', 'Unavailable', '');
buildFailureOutputFiles($r, 'No_BLASTp_Result', 'Unavailable', '');
undef $original_seq[$r];
undef $original_names[$r];
undef $input_order[$r];
......@@ -302,7 +304,7 @@ for(my $r=0; $r<=$#input_order; $r++){
my @nt_GIs;
my $intronStep = 0;
if ( exists($templG->{$templ_name}) && $templG->{$templ_name} ne '' && $templG->{$templ_name} !~ /^My_Seq$/i ){
&downloadSeq($cache, $date, '', '', $templG->{$templ_name});
downloadSeq($cache, $date, '', '', $templG->{$templ_name});
@nt_GIs = $templG->{$templ_name};
}
elsif ( exists($templG->{$templ_name}) && $templG->{$templ_name} ne '' && $templG->{$templ_name} =~ /^My_Seq$/i ){
......@@ -313,7 +315,7 @@ for(my $r=0; $r<=$#input_order; $r++){
}
else{
#Prot ACC -> PUID
my $protGI = &blastPAcc2PGI($multipleequalblasthits[$qq]);
my $protGI = blastPAcc2PGI($multipleequalblasthits[$qq]);
if ( $protGI eq '' ){
print "\tNo protein (prot GI) link found for ${multipleequalblasthits[$qq]} in $right_name\n\n";
@failureStatus = ('PUI_unavailable', ${multipleequalblasthits[$qq]});
......@@ -322,7 +324,7 @@ for(my $r=0; $r<=$#input_order; $r++){
# print "\t$protGI\n";
#PUID -> nt GIs & GeneID
my ($ntGIs, $geneID) = &protGI2NTGIs($protGI, $multipleequalblasthits[$qq]);
my ($ntGIs, $geneID) = protGI2NTGIs($protGI, $multipleequalblasthits[$qq]);
if ( $ntGIs eq '' && $geneID eq '' ){
print "\tNo nucleotide (nt GIs) link found for ${multipleequalblasthits[$qq]} in $right_name\n\n";
@failureStatus = ('No_nt_link', ${multipleequalblasthits[$qq]});
......@@ -333,17 +335,17 @@ for(my $r=0; $r<=$#input_order; $r++){
#Get transcript and contig seq
@nt_GIs = split(/,/, $ntGIs);
&downloadSeqFromGIs($cache, $date, '', '', @nt_GIs);
downloadSeqFromGIs($cache, $date, '', '', @nt_GIs);
#GeneID -> Chr
my @geneIDs = split(/,/, $geneID);
while(<@geneIDs>){
my $geneID = $_;
my ($chr, $amont, $aval) = &geneID2Chr($geneID, $multipleequalblasthits[$qq]);
my ($chr, $amont, $aval) = geneID2Chr($geneID, $multipleequalblasthits[$qq]);
print "\n\tNo gene locus found for ${multipleequalblasthits[$qq]} with $geneID in $right_name\n\n" if ($chr eq '' and $geneID ne '');
if ( $chr ne '' ){
#Get Chr seq
&downloadSeq($cache, $date, $amont, $aval, $chr);
downloadSeq($cache, $date, $amont, $aval, $chr);
$intronStep = 1;
@nt_GIs = ("$chr:$amont-$aval", @nt_GIs);
......@@ -353,9 +355,9 @@ for(my $r=0; $r<=$#input_order; $r++){
#Align Nt seq with our protein query seq
my ($POS, $BOJ) = &runExonerate($exonerate, $cache, $date, ${input_order[$r]}, ${original_seq[$r]}, $right_name, $multipleequalblasthits[$qq], $r, @nt_GIs);
my ($POS, $BOJ) = runExonerate($exonerate, $cache, $date, ${input_order[$r]}, ${original_seq[$r]}, $right_name, $multipleequalblasthits[$qq], $r, @nt_GIs);
my ($POSresult, $bestNucleotide) = &prepareResults4CDS($POS, $multipleequalblasthits[$qq], ${original_seq[$r]}, $right_name);
my ($POSresult, $bestNucleotide) = prepareResults4CDS($POS, $multipleequalblasthits[$qq], ${original_seq[$r]}, $right_name);
$resultPOS .= $POSresult if ( $bestNucleotide ne '' && $POSresult ne '' && $resultPOS !~ /_G_$bestNucleotide/ );
@intronStatus = ($POS->{0}, ${multipleequalblasthits[$qq]}, '') if ( $intronStep==1 && exists($POS->{0}) && $POS->{0} ne '' && $POS->{0} =~ /\:/ );
......@@ -364,32 +366,32 @@ for(my $r=0; $r<=$#input_order; $r++){
my ($intronLess, $nameLess) = ('', '');
$intronLess = $POS->{0} if ( %$BOJ eq 0 && exists($POS->{0}) && $POS->{0} ne '' );
$nameLess = $bestNucleotide if ( %$BOJ eq 0 && $intronStep==0 && ($bestNucleotide =~ /^N[CTWZG]_/ || $bestNucleotide =~ /^AC_/) );
my ($BOJresult, $bestGenomic) = &prepareResults4BOJ($BOJ, $multipleequalblasthits[$qq], ${original_seq[$r]}, $right_name, $intronLess, $nameLess);
my ($BOJresult, $bestGenomic) = prepareResults4BOJ($BOJ, $multipleequalblasthits[$qq], ${original_seq[$r]}, $right_name, $intronLess, $nameLess);
$resultBOJ .= $BOJresult if ( $bestGenomic ne '' && $BOJresult ne '' && $resultBOJ !~ /_G_$bestGenomic[ \-]/ );
# print " $intronStep $POS->{0} [@intronStatus] {$BOJresult} $bestNucleotide - $bestGenomic \t $BOJ->{0}\n";
}
if ( $resultPOS eq '' ){
&buildFailureOutputFiles($r, $failureStatus[1], $failureStatus[0], '') if ( $failureStatus[0] ne '' );
&buildFailureOutputFiles($r, $multipleequalblasthits[0], 'Failed_Aln', '') if ( $failureStatus[0] eq '' );
buildFailureOutputFiles($r, $failureStatus[1], $failureStatus[0], '') if ( $failureStatus[0] ne '' );
buildFailureOutputFiles($r, $multipleequalblasthits[0], 'Failed_Aln', '') if ( $failureStatus[0] eq '' );
next;
}
elsif ( $resultBOJ eq '' ){
if ( $failureStatus[0] eq 'PUI_unavailable' || $failureStatus[0] eq 'No_nt_link' ){
&buildFailureBOJOutputFile($failureStatus[1], $failureStatus[0], $right_name, ${original_seq[$r]});
buildFailureBOJOutputFile($failureStatus[1], $failureStatus[0], $right_name, ${original_seq[$r]});
}
elsif ( $failureStatus[0] eq '' && $intronStatus[0] ne '' ){
&buildIntronlessBOJOutputFile($intronStatus[1], $intronStatus[0], $intronStatus[2], $right_name, ${original_seq[$r]});
buildIntronlessBOJOutputFile($intronStatus[1], $intronStatus[0], $intronStatus[2], $right_name, ${original_seq[$r]});
}
else{
&buildFailureBOJOutputFile($multipleequalblasthits[0], 'Unavailable', $right_name, ${original_seq[$r]});
buildFailureBOJOutputFile($multipleequalblasthits[0], 'Unavailable', $right_name, ${original_seq[$r]});
}
&createCDSOutputFile($resultPOS, ${original_seq[$r]}, $right_name);
createCDSOutputFile($resultPOS, ${original_seq[$r]}, $right_name);
}
else {
&createCDSOutputFile($resultPOS, ${original_seq[$r]}, $right_name);
&createBOJOutputFile($resultBOJ, ${original_seq[$r]});
createCDSOutputFile($resultPOS, ${original_seq[$r]}, $right_name);
createBOJOutputFile($resultBOJ, ${original_seq[$r]});
}
......@@ -399,8 +401,8 @@ for(my $r=0; $r<=$#input_order; $r++){
}
&checkAndCleanStderrFiles("$cache/${date}_ExonerateError") if ( -e "$cache/${date}_ExonerateError" );
&checkOutputFiles();
checkAndCleanStderrFiles("$cache/${date}_ExonerateError") if ( -e "$cache/${date}_ExonerateError" );
checkOutputFiles();
unlink("$msa") if ( $change==1 );
my @tmpFiles = glob($cache."/${date}*");
if ( exists($tmpFiles[0]) ){
......@@ -421,14 +423,14 @@ if ( -s "${originalMSA}.cds" || -s "${originalMSA}.out"){
print " #### File Type= MSA Format= fasta_CDS+Query Name= ${originalMSA}.cdsP\n"
if ( -s "${originalMSA}.cdsP");
print " #### File Type= MSA Format= colored_fasta_CDS Name= ${originalMSA}.cdsP.html\n"
if ( &Views::Html("${originalMSA}.cdsP") && -s "${originalMSA}.cdsP.html");
if ( Views::Html("${originalMSA}.cdsP") && -s "${originalMSA}.cdsP.html");
print " #### File Type= MSA Format= Rejected_seq Name= ${originalMSA}.out\n"
if ( -s "${originalMSA}.out");
print " #### File Type= MSA Format= fasta_Exon-Boundaries Name= ${originalMSA}.boj\n"
if ( -s "${originalMSA}.boj" && -s "${originalMSA}.cds" && $hideBOJ==0);
}
else{
&failure;
failure();
exit(1);
}
......@@ -549,7 +551,7 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
###################### Management ######################
#Failure declaration
sub failure{
sub failure {
print {*STDERR} "\n\nFATAL\n\nTERMINATION STATUS: FAILURE\n\n";
return;
}
......@@ -559,12 +561,12 @@ sub checkProgramsPresence{
my ($webblast_exe, $exonerate) = @_;
if ( $webblast_exe eq '' ){
&failure;
failure();
print {*STDERR} "\twebblast.pl program is not reachable\n\tIt could not be in your PATH or not installed\n\n";
exit(1);
}
if ( $exonerate eq '' ){
&failure;
failure();
print {*STDERR} "\texonerate program is not reachable\n\tIt could not be in your PATH or not installed\n\n";
exit(1);
}
......@@ -672,7 +674,7 @@ sub checkFastaFile{
}
}
else {
&failure;
failure();
print {*STDERR} "\tThe file \'$msa\' doesn't not seem to be reachable\n\n";
exit(1);
}
......@@ -1123,11 +1125,11 @@ sub runExonerate{
#Parse Exonerate output
my ($posiTions, $posBOJ) = &loci_from_Exonerate::parser("$cache/${date}_${order}.exon", $debug);
my ($posiTions, $posBOJ) = loci_from_Exonerate::parser("$cache/${date}_${order}.exon", $debug);
%best_pos = &testPositions($posiTions, $gis[$b], %best_pos);
%bestBOJ = &testBOJ($posBOJ, $gis[$b], %bestBOJ);
%best_pos = testPositions($posiTions, $gis[$b], %best_pos);
%bestBOJ = testBOJ($posBOJ, $gis[$b], %bestBOJ);
my $whichGI = $gis[$b];
$whichGI = 'gi|'.$whichGI.'|' if ( $whichGI =~ /^\d+$/ );
print {*STDERR} "\tNo exon-intron structure found for $whichGI with $right_name\n" if ( %$posBOJ eq 0 );
......@@ -1211,9 +1213,9 @@ sub prepareResults4CDS{
my $resultPOS = '';
my ($bestOne, $annot) = ('', '');
if ( %POSITIONS ne 0 ){
($bestOne, $annot) = &prepareAnnotation($POSITIONS{0});
my $seqName = &buildAnnotation($bestOne, $annot, $input_name, $blastHit);
my $seqCDS = &buildCDSseq($input_seq, %POSITIONS);
($bestOne, $annot) = prepareAnnotation($POSITIONS{0});
my $seqName = buildAnnotation($bestOne, $annot, $input_name, $blastHit);
my $seqCDS = buildCDSseq($input_seq, %POSITIONS);
$resultPOS = ">".$seqName."\n".$seqCDS."\n";
}
......@@ -1228,16 +1230,16 @@ sub prepareResults4BOJ{
my $resultBOJ = '';
my ($bestOne, $annot) = ('', '');
if ( %EXONBORDERS ne 0 ){
($bestOne, $annot) = &prepareAnnotation($EXONBORDERS{0});
my $seqName = &buildAnnotation($bestOne, $annot, $input_name, $blastHit);
my $seqBOJ = &buildBOJseq($input_seq, %EXONBORDERS);
($bestOne, $annot) = prepareAnnotation($EXONBORDERS{0});
my $seqName = buildAnnotation($bestOne, $annot, $input_name, $blastHit);
my $seqBOJ = buildBOJseq($input_seq, %EXONBORDERS);
$resultBOJ = ">".$seqName."\n".$seqBOJ."\n";
}
elsif ( $intronLess ne '' ){
($bestOne, $annot) = &prepareAnnotation($intronLess);
my $description = $annot;#&getFastaHeaderAnnot($intronLess);
($bestOne, $annot) = prepareAnnotation($intronLess);
my $description = $annot;#getFastaHeaderAnnot($intronLess);
my $readyname = '';
$readyname = &fastaHeaders4ProtoGene($input_name);
$readyname = fastaHeaders4ProtoGene($input_name);
if ( $intronLess =~ /\:.+/ || $bestOne =~ /^N[CTWZG]_/ || $bestOne =~ /^AC_/ || $nameLess =~ /^N[CTWZG]_/ || $nameLess =~ /^AC_/ ){
$readyname =~ s/_G_@@/_G_${bestOne}-IntronLess _S_ $blastHit _DESC_ / if ( $nameLess eq '' );
$readyname =~ s/_G_@@/_G_${nameLess}-IntronLess _S_ $blastHit _DESC_ / if ( $nameLess ne '' );
......@@ -1277,7 +1279,7 @@ sub prepareAnnotation{
}
close $BEST;
}
my $annot = &getFastaHeaderAnnot($best_pos);
my $annot = getFastaHeaderAnnot($best_pos);
return($bestOne, $annot);
}
......@@ -1287,7 +1289,7 @@ sub buildAnnotation{
$annot =~ s{>}{}g; #Remove > sign in annotation if any e.g.: NM_105729
my $readyName = '';
$readyName = &fastaHeaders4ProtoGene($input_name);
$readyName = fastaHeaders4ProtoGene($input_name);
$readyName =~ s/_G_@@/_G_$bestOne _S_ $blastHit _DESC_ /;
$readyName =~ s/(.)$/$1 MATCHES_ON $annot/ if ( $annot ne '' );
$readyName =~ s/ +/ /g;
......@@ -1397,9 +1399,9 @@ sub createBOJOutputFile{
sub buildIntronlessBOJOutputFile{
my ($BLASTstatus, $NTstatus, $NTname, $original_name, $original_seq) = @_;
my $annot = &getFastaHeaderAnnot($NTstatus);
my $annot = getFastaHeaderAnnot($NTstatus);
my $readyname = '';
$readyname = &fastaHeaders4ProtoGene($original_name);
$readyname = fastaHeaders4ProtoGene($original_name);
$NTstatus =~ s/\:.+$//;
$readyname =~ s/_G_@@/_G_${NTstatus}-IntronLess _S_ $BLASTstatus _DESC_ / if ( $NTname eq '' );
$readyname =~ s/_G_@@/_G_${NTname}-IntronLess _S_ $BLASTstatus _DESC_ / if ( $NTname ne '' );
......@@ -1421,16 +1423,16 @@ sub buildFailureOutputFiles{
open(OUT, '>>', "${originalMSA}.out");
open(BOJ, '>>', "${originalMSA}.boj");
if ( $BLASTstatus eq 'No_BLASTp_Result' ){
&revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
elsif ( $NTstatus eq 'PUI_unavailable' ){
&revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
elsif ( $NTstatus eq 'No_nt_link' ){
&revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
elsif ( $NTstatus eq 'Failed_Aln' ){
&revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
revtransBuilding($oriname, $order, $BLASTstatus, $NTstatus, $readyname, '');
}
close CDS;
......@@ -1444,7 +1446,7 @@ sub buildFailureBOJOutputFile{
my ($BLASTstatus, $NTstatus, $original_name, $original_seq) = @_;
my $readyname = '';
$readyname = &fastaHeaders4ProtoGene($original_name);
$readyname = fastaHeaders4ProtoGene($original_name);
$readyname =~ s/_G_@@/_G_$NTstatus _S_ $BLASTstatus _DESC_ /;
$readyname =~ s/ +/ /g;
open( my $BOJ, '>>', "${originalMSA}.boj");
......@@ -1456,7 +1458,7 @@ sub buildFailureBOJOutputFile{
sub revtransBuilding{
my ($oriname, $order, $BLASTstatus, $NTstatus, $readyname, $annot) = @_;
$readyname = &fastaHeaders4ProtoGene($oriname);
$readyname = fastaHeaders4ProtoGene($oriname);
$readyname =~ s/_G_@@/_G_$NTstatus _S_ $BLASTstatus _DESC_ /;
$readyname =~ s/(.)$/$1 MATCHES_ON $annot/ if ($annot ne '');
$readyname =~ s/ +/ /g;
......@@ -1466,7 +1468,7 @@ sub revtransBuilding{
$final_seq = '';
for(my $w=0; $w < length($original_seq[$order]); $w++){
my $aa = substr($original_seq[$order], $w, 1);
$final_seq = $final_seq.&reverse_trad($aa);
$final_seq = $final_seq.reverse_trad($aa);
}
$readyname =~ s/_G_$NTstatus _S_ $BLASTstatus /_G_revtrans /;
my $CDSreformatedSeq = ${final_seq}."---\n";
......@@ -1559,8 +1561,8 @@ sub checkTemplate{
READ_TEMPLATE:
while(<$TEMPLATE>){
if ( $_ =~ /^>/ && $_ !~ /_[GS]_ *[^ ]*/ ){
&failure();
&template_failure();
failure();
template_failure();
close $TEMPLATE;
exit 1;
return;
......@@ -1613,8 +1615,8 @@ sub checkTemplate{
}
close $TEMPLATE;
if ( $fasta_checker == -1 ){
&failure();
&template_failure();
failure();
template_failure();
exit 1;
return;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment