Commit 69d48150 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Better rendering for ncbi acc.version id + Better stdout rendering + Update...

Better rendering for ncbi acc.version id + Better stdout rendering + Update ncbi prot/nt links + Fix revtrans for the tcoffee server
parent 1151b3f3
...@@ -61,7 +61,7 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on ...@@ -61,7 +61,7 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on
################## Option management ################## Option management
my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0); my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0);
my ($cache, $cleancache) = ('update', 'update'); my ($cache, $cleancache) = ('update', 'update');#TODO Finish to implement
my ($debug, $tmp) = (0, 0); my ($debug, $tmp) = (0, 0);
my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0); my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0);
my %opts = ('msa|in=s' => \$msa, # Input sequences my %opts = ('msa|in=s' => \$msa, # Input sequences
...@@ -89,7 +89,7 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences ...@@ -89,7 +89,7 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences
'tmp' => \$tmp, # To keep traces of fake intermediate files like fake xml from NCBI, fake aln, ... 'tmp' => \$tmp, # To keep traces of fake intermediate files like fake xml from NCBI, fake aln, ...
); );
my $test_option_values = Getopt::Long::GetOptions(%opts); my $test_option_values = Getopt::Long::GetOptions(%opts);
$revtrans = 1 if ( $revtrans eq '' ); # Allow revtrans to be a boolean or a string option (for tcoffee web server) #$revtrans = 1 if ( $revtrans eq '' ); # Allow revtrans to be a boolean or a string option (for tcoffee web server)
################## Short help message ################## Short help message
...@@ -304,7 +304,7 @@ for(my $r=0; $r<=$#original_names; $r++){ ...@@ -304,7 +304,7 @@ for(my $r=0; $r<=$#original_names; $r++){
# Get the BLASTp hit(s) acc number # Get the BLASTp hit(s) acc number
open(my $BLASTP, '<', "$cache/$date.blastp.$r"); open(my $BLASTP, '<', "$cache/$date.blastp.$r");
while(<$BLASTP>){ while(<$BLASTP>){
push @equivalent_blast_hits, $1 if ( $_ =~ /^>.+?\@.+?__([^\s\|\.]+).*$/ ); #Warning: double '_' push @equivalent_blast_hits, $1 if ( $_ =~ /^>.+?\@.+?__(\S+).*$/ ); #Warning: double '_'
} }
close $BLASTP; close $BLASTP;
unlink("$cache/$date.blastp.$r", "$cache/$date.seq2blast.fas.$r") if ( $tmp == 0 || exists($equivalent_blast_hits[0]) ); unlink("$cache/$date.blastp.$r", "$cache/$date.seq2blast.fas.$r") if ( $tmp == 0 || exists($equivalent_blast_hits[0]) );
...@@ -1236,7 +1236,7 @@ sub prepareAnnotation{ ...@@ -1236,7 +1236,7 @@ sub prepareAnnotation{
my ($best_pos) = @_; my ($best_pos) = @_;
my $bestOne = $best_pos; my $bestOne = $best_pos;
$bestOne =~ s{--.+$}{} if ( $best_pos !~ /^\d+$/ ); $bestOne =~ s{--.+$}{} if ( $best_pos !~ /^\d+$/ );
if ( $bestOne =~ /^\d+$/ ){ if ( $bestOne =~ /^\d+$/ ){
open(my $BEST, '<', "$cache/$bestOne.fas"); open(my $BEST, '<', "$cache/$bestOne.fas");
FIND_BEST: FIND_BEST:
...@@ -1246,8 +1246,10 @@ sub prepareAnnotation{ ...@@ -1246,8 +1246,10 @@ sub prepareAnnotation{
chomp($goodName); chomp($goodName);
$goodName =~ s/^>//; $goodName =~ s/^>//;
$goodName =~ s/^gi\|\d+\|//; $goodName =~ s/^gi\|\d+\|//;
$goodName =~ s/^ *([^ ]+) *.*$/$1/; $goodName =~ s/^ *(\S+) *.*$/$1/;
$goodName =~ s/^...?\|([\w\_\-]+).*/$1/; if ( $goodName =~ /^...?\|/ ){
$goodName =~ s/^...?\|([\w\_\-]+(\.\d+)?).*/$1/;
}
$bestOne = $goodName; $bestOne = $goodName;
last FIND_BEST; last FIND_BEST;
} }
...@@ -1438,7 +1440,7 @@ sub revtransBuilding{ ...@@ -1438,7 +1440,7 @@ sub revtransBuilding{
$readyname =~ s{ +}{ }g; $readyname =~ s{ +}{ }g;
print OUT "$readyname\n"; print OUT "$readyname\n";
if ( $revtrans==1 ){ if ( $revtrans ){
my $final_seq = ''; my $final_seq = '';
for(my $w=0; $w < length($original_seq[$order]); $w++){ for(my $w=0; $w < length($original_seq[$order]); $w++){
my $aa = substr($original_seq[$order], $w, 1); my $aa = substr($original_seq[$order], $w, 1);
......
- Replace webblast.pl by a simpler blast caller!
=> use tsv blast format to make parsing easier and more perenial
=> parser db agnostic
=> only blast NCBI ???
- Better deal with temporary files: must have a unique name !!!!
- Check what gigablaster returns differently between morning & afternoon
=> local blast instead ?
- Different options for cache path and cache mgmt!!!
- find species for View in taxonomy names.dmp file?
- sort boj pos output - sort boj pos output
- ssearch instead of blast to be more sensitive ? - ssearch instead of blast to be more sensitive ?
......
...@@ -11,8 +11,8 @@ my %css = ('species' => qq{font-style: italic;\n}, ...@@ -11,8 +11,8 @@ my %css = ('species' => qq{font-style: italic;\n},
'match_on' => qq{background-color: #BD43EF;\n font-weight: bolder;\n}, #violet 'match_on' => qq{background-color: #BD43EF;\n font-weight: bolder;\n}, #violet
); );
my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=nuccore&amp;id='; my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/nuccore/';
my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&amp;id='; my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/protein/';
my $uniprot = 'http://www.uniprot.org/uniprot/'; my $uniprot = 'http://www.uniprot.org/uniprot/';
my $pdb = 'http://www.pdb.org/pdb/explore/explore.do?structureId='; my $pdb = 'http://www.pdb.org/pdb/explore/explore.do?structureId=';
...@@ -121,8 +121,8 @@ sub Html { ...@@ -121,8 +121,8 @@ sub Html {
} }
if ( m/^&gt;.*MATCHES_ON / ){ if ( m/^&gt;.*MATCHES_ON / ){
s{^&gt;([^ ]*)_G_(\w+) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / ); s{^&gt;([^ ]*)_G_(\w+(\.\d+)?) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / );
if ( m/ _S_ (\w+) / && ! m/ _S_ My_own_seq / ){ if ( m/ _S_ (\w+(\.\d+)?) / && ! m/ _S_ My_own_seq / ){
my $blastHit = $1; my $blastHit = $1;
if ( $blastHit =~ /^[A-Z][A-Z0-9]{5}$/ ){ if ( $blastHit =~ /^[A-Z][A-Z0-9]{5}$/ ){
s{ _S_ (\w+) }{ _S_ <a href='$uniprot$1' target='_blank'>$1</a> }; s{ _S_ (\w+) }{ _S_ <a href='$uniprot$1' target='_blank'>$1</a> };
...@@ -131,7 +131,7 @@ sub Html { ...@@ -131,7 +131,7 @@ sub Html {
s{ _S_ (\w+) }{ _S_ <a href='$pdb$1' target='_blank'>$1</a> }; s{ _S_ (\w+) }{ _S_ <a href='$pdb$1' target='_blank'>$1</a> };
} }
else { else {
s{ _S_ (\w+) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> }; s{ _S_ (\w+(\.\d+)?) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> };
} }
} }
......
...@@ -11,8 +11,8 @@ my %css = ('species' => qq{font-style: italic;\n}, ...@@ -11,8 +11,8 @@ my %css = ('species' => qq{font-style: italic;\n},
'match_on' => qq{background-color: #BD43EF;\n font-weight: bolder;\n}, #violet 'match_on' => qq{background-color: #BD43EF;\n font-weight: bolder;\n}, #violet
); );
my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=nuccore&amp;id='; my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/nuccore/';
my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&amp;id='; my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/protein/';
my $uniprot = 'http://www.uniprot.org/uniprot/'; my $uniprot = 'http://www.uniprot.org/uniprot/';
my $pdb = 'http://www.pdb.org/pdb/explore/explore.do?structureId='; my $pdb = 'http://www.pdb.org/pdb/explore/explore.do?structureId=';
...@@ -323,8 +323,8 @@ sub Html { ...@@ -323,8 +323,8 @@ sub Html {
if ( $cross_sp && $desc[0] =~ /^([\w\.\-]+)$/ ){ if ( $cross_sp && $desc[0] =~ /^([\w\.\-]+)$/ ){
my $spe = $1; my $spe = $1;
#FIXME: Use hash 'longer_species_name' instead of this #FIXME: Use hash 'longer_species_name' instead of this
$spe = '' if ( ($match =~ /lupus$/ && $spe ne 'familiaris') || ($match =~ /gallus$/ && $spe ne 'gallus') || ($match =~ /gorilla$/ && $spe ne 'gorilla') ); $spe = '' if ( ($match =~ /lupus$/ && $spe ne 'familiaris') || ($match =~ /gallus$/ && $spe ne 'gallus') || ($match =~ /gorilla$/ && $spe ne 'gorilla') );
$strain = 1 if ( exists($Strain{$spe} ); $strain = 1 if ( exists($Strain{$spe}) );
$match .= ' '.$spe; $match .= ' '.$spe;
shift(@desc); shift(@desc);
} }
...@@ -353,8 +353,8 @@ sub Html { ...@@ -353,8 +353,8 @@ sub Html {
} }
if ( m/^&gt;.*MATCHES_ON / ){ if ( m/^&gt;.*MATCHES_ON / ){
s{^&gt;([^ ]*)_G_(\w+) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / ); s{^&gt;([^ ]*)_G_(\w+(\.\d+)?) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / );
if ( m/ _S_ (\w+) / && ! m/ _S_ My_own_seq / ){ if ( m/ _S_ (\w+(\.\d+)?) / && ! m/ _S_ My_own_seq / ){
my $blastHit = $1; my $blastHit = $1;
if ( $blastHit =~ /^[A-Z][A-Z0-9]{5}$/ ){ if ( $blastHit =~ /^[A-Z][A-Z0-9]{5}$/ ){
s{ _S_ (\w+) }{ _S_ <a href='$uniprot$1' target='_blank'>$1</a> }; s{ _S_ (\w+) }{ _S_ <a href='$uniprot$1' target='_blank'>$1</a> };
...@@ -363,7 +363,7 @@ sub Html { ...@@ -363,7 +363,7 @@ sub Html {
s{ _S_ (\w+) }{ _S_ <a href='$pdb$1' target='_blank'>$1</a> }; s{ _S_ (\w+) }{ _S_ <a href='$pdb$1' target='_blank'>$1</a> };
} }
else { else {
s{ _S_ (\w+) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> }; s{ _S_ (\w+(\.\d+)?) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> };
} }
} }
......
...@@ -147,7 +147,7 @@ unless ( $quiet =~ /on/i ){ ...@@ -147,7 +147,7 @@ unless ( $quiet =~ /on/i ){
} }
print "\n***************************************************************\n\n"; print "\n***********\n\n";
} }
...@@ -431,6 +431,7 @@ sub WEB_BLAST { ...@@ -431,6 +431,7 @@ sub WEB_BLAST {
} }
foreach my $encoded_seq(@list_encoded){ foreach my $encoded_seq(@list_encoded){
my $nb = 0; my $nb = 0;
print "\n\n**********************************************************************\n" if ( $database eq 'refseq_protein' );
print "BLAST $names[$i]..."; print "BLAST $names[$i]...";
#-- BUILD THE REQUEST #-- BUILD THE REQUEST
...@@ -521,14 +522,13 @@ sub WEB_BLAST { ...@@ -521,14 +522,13 @@ sub WEB_BLAST {
while (){ while (){
sleep 3; sleep 3;
$req = new HTTP::Request GET => "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid"; $req = new HTTP::Request GET => "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
$response = $ua -> request($req); $response = $ua -> request($req);
if ($response->content =~ /Altschul/i) { print "Search Complete\n"; push(@list_pdb,$response -> content);last; } if ($response->content =~ /Altschul/i) { print "Search Complete\n"; push(@list_pdb,$response -> content);last; }
else { next; } }
} print {$SOR1} (@list_pdb);
print {$SOR1} (@list_pdb); ++$i;
++$i;
} }
undef (@list_encoded); undef (@list_encoded);
...@@ -631,7 +631,13 @@ sub PARSING { ...@@ -631,7 +631,13 @@ sub PARSING {
if ( $distant==1 ){ if ( $distant==1 ){
my $version_d, my $database_d, my $poste_d; my $version_d, my $database_d, my $poste_d;
undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+).*?\s+Length=\s*(\d+)/smo ); undef $/;
if ( $intra_res[0] =~ /Query=\s+\S+.*?\s+Length=\s*\d+/ ){
($query, $length_query) = ( $intra_res[0] =~ /Query=\s+\S+\s+(\S+)\s+Length=\s*(\d+)/smo );
}
elsif ( $intra_res[0] =~ /Query=\s+\S+\s+Length=\s*\d+/ ){
($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
}
$/ = "\n"; $/ = "\n";
open (F3, '<', 'web_tempo.result') or die; open (F3, '<', 'web_tempo.result') or die;
while ($_=<F3>){ while ($_=<F3>){
...@@ -694,7 +700,7 @@ sub PARSING { ...@@ -694,7 +700,7 @@ sub PARSING {
if ( $intra_res=~ />.*?[A-Za-z0-9_\.]+?\s+/ ){ if ( $intra_res=~ />.*?[A-Za-z0-9_\.]+?\s+/ ){
while ( $intra_res=~ />.*?([A-Za-z0-9_\.]+?)\s+/sg ){ while ( $intra_res=~ />.*?([A-Za-z0-9_\.]+?)\s+/sg ){
my $refseq = $1; my $refseq = $1;
$refseq =~ s/\.\d+$//; #$refseq =~ s/\.\d+$//;
push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tncbi"); push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tncbi");
} }
} }
...@@ -770,7 +776,7 @@ sub MULTI_EQUIVALENT { ...@@ -770,7 +776,7 @@ sub MULTI_EQUIVALENT {
if ( $intra_res=~ />.*?[A-Za-z0-9_\.]+?\s+/ ){ if ( $intra_res=~ />.*?[A-Za-z0-9_\.]+?\s+/ ){
while ( $intra_res=~ />.*?([A-Za-z0-9_\.]+?)\s+/sg ){ while ( $intra_res=~ />.*?([A-Za-z0-9_\.]+?)\s+/sg ){
my $refseq = $1; my $refseq = $1;
$refseq =~ s/\.\d+$//; #$refseq =~ s/\.\d+$//;
push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tncbi"); push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tncbi");
} }
} }
...@@ -812,7 +818,7 @@ sub AFFICHAGE_REFSEQ_PARSING { ...@@ -812,7 +818,7 @@ sub AFFICHAGE_REFSEQ_PARSING {
if ($afficher) { print "\n$entete\n\n"; print $afficher; } if ($afficher) { print "\n$entete\n\n"; print $afficher; }
if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; } if (@name_gid) { print {*STDOUT} "\n***********\n\n"; }
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; } if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
print {*STDOUT} "\n", @name_gid; print {*STDOUT} "\n", @name_gid;
close SOR; close SOR;
...@@ -840,7 +846,7 @@ sub AFFICHAGE_PDB_PARSING { ...@@ -840,7 +846,7 @@ sub AFFICHAGE_PDB_PARSING {
else {next;} else {next;}
} }
undef(@result_sort); undef(@result_sort);
print {*STDOUT} "\n**********************************************************************\n\n"; print {*STDOUT} "\n***********\n\n";
#-- OUTFILE /STDOUT #-- OUTFILE /STDOUT
if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; } if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
...@@ -878,7 +884,7 @@ sub PROFILE { ...@@ -878,7 +884,7 @@ sub PROFILE {
unlink("tempo_file_profile"); unlink("tempo_file_profile");
undef(@list_pdb); undef(@list_pdb);
print "\n**********************************************************************\n\n"; print "\n***********\n\n";
#-- OUTFILE /STDOUT #-- OUTFILE /STDOUT
if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; } if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
print {*STDOUT} @sortie; print {*STDOUT} @sortie;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment