Commit 69d48150 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Better rendering for ncbi acc.version id + Better stdout rendering + Update...

Better rendering for ncbi acc.version id + Better stdout rendering + Update ncbi prot/nt links + Fix revtrans for the tcoffee server
parent 1151b3f3
......@@ -61,7 +61,7 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on
################## Option management
my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0);
my ($cache, $cleancache) = ('update', 'update');
my ($cache, $cleancache) = ('update', 'update');#TODO Finish to implement
my ($debug, $tmp) = (0, 0);
my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0);
my %opts = ('msa|in=s' => \$msa, # Input sequences
......@@ -89,7 +89,7 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences
'tmp' => \$tmp, # To keep traces of fake intermediate files like fake xml from NCBI, fake aln, ...
);
my $test_option_values = Getopt::Long::GetOptions(%opts);
$revtrans = 1 if ( $revtrans eq '' ); # Allow revtrans to be a boolean or a string option (for tcoffee web server)
#$revtrans = 1 if ( $revtrans eq '' ); # Allow revtrans to be a boolean or a string option (for tcoffee web server)
################## Short help message
......@@ -304,7 +304,7 @@ for(my $r=0; $r<=$#original_names; $r++){
# Get the BLASTp hit(s) acc number
open(my $BLASTP, '<', "$cache/$date.blastp.$r");
while(<$BLASTP>){
push @equivalent_blast_hits, $1 if ( $_ =~ /^>.+?\@.+?__([^\s\|\.]+).*$/ ); #Warning: double '_'
push @equivalent_blast_hits, $1 if ( $_ =~ /^>.+?\@.+?__(\S+).*$/ ); #Warning: double '_'
}
close $BLASTP;
unlink("$cache/$date.blastp.$r", "$cache/$date.seq2blast.fas.$r") if ( $tmp == 0 || exists($equivalent_blast_hits[0]) );
......@@ -1236,7 +1236,7 @@ sub prepareAnnotation{
my ($best_pos) = @_;
my $bestOne = $best_pos;
$bestOne =~ s{--.+$}{} if ( $best_pos !~ /^\d+$/ );
$bestOne =~ s{--.+$}{} if ( $best_pos !~ /^\d+$/ );
if ( $bestOne =~ /^\d+$/ ){
open(my $BEST, '<', "$cache/$bestOne.fas");
FIND_BEST:
......@@ -1246,8 +1246,10 @@ sub prepareAnnotation{
chomp($goodName);
$goodName =~ s/^>//;
$goodName =~ s/^gi\|\d+\|//;
$goodName =~ s/^ *([^ ]+) *.*$/$1/;
$goodName =~ s/^...?\|([\w\_\-]+).*/$1/;
$goodName =~ s/^ *(\S+) *.*$/$1/;
if ( $goodName =~ /^...?\|/ ){
$goodName =~ s/^...?\|([\w\_\-]+(\.\d+)?).*/$1/;
}
$bestOne = $goodName;
last FIND_BEST;
}
......@@ -1438,7 +1440,7 @@ sub revtransBuilding{
$readyname =~ s{ +}{ }g;
print OUT "$readyname\n";
if ( $revtrans==1 ){
if ( $revtrans ){
my $final_seq = '';
for(my $w=0; $w < length($original_seq[$order]); $w++){
my $aa = substr($original_seq[$order], $w, 1);
......
- Replace webblast.pl by a simpler blast caller!
=> use tsv blast format to make parsing easier and more perenial
=> parser db agnostic
=> only blast NCBI ???
- Better deal with temporary files: must have a unique name !!!!
- Check what gigablaster returns differently between morning & afternoon
=> local blast instead ?
- Different options for cache path and cache mgmt!!!
- find species for View in taxonomy names.dmp file?
- sort boj pos output
- ssearch instead of blast to be more sensitive ?
......
......@@ -11,8 +11,8 @@ my %css = ('species' => qq{font-style: italic;\n},
'match_on' => qq{background-color: #BD43EF;\n font-weight: bolder;\n}, #violet
);
my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=nuccore&amp;id=';
my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&amp;id=';
my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/nuccore/';
my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/protein/';
my $uniprot = 'http://www.uniprot.org/uniprot/';
my $pdb = 'http://www.pdb.org/pdb/explore/explore.do?structureId=';
......@@ -121,8 +121,8 @@ sub Html {
}
if ( m/^&gt;.*MATCHES_ON / ){
s{^&gt;([^ ]*)_G_(\w+) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / );
if ( m/ _S_ (\w+) / && ! m/ _S_ My_own_seq / ){
s{^&gt;([^ ]*)_G_(\w+(\.\d+)?) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / );
if ( m/ _S_ (\w+(\.\d+)?) / && ! m/ _S_ My_own_seq / ){
my $blastHit = $1;
if ( $blastHit =~ /^[A-Z][A-Z0-9]{5}$/ ){
s{ _S_ (\w+) }{ _S_ <a href='$uniprot$1' target='_blank'>$1</a> };
......@@ -131,7 +131,7 @@ sub Html {
s{ _S_ (\w+) }{ _S_ <a href='$pdb$1' target='_blank'>$1</a> };
}
else {
s{ _S_ (\w+) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> };
s{ _S_ (\w+(\.\d+)?) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> };
}
}
......
......@@ -11,8 +11,8 @@ my %css = ('species' => qq{font-style: italic;\n},
'match_on' => qq{background-color: #BD43EF;\n font-weight: bolder;\n}, #violet
);
my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=nuccore&amp;id=';
my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&amp;id=';
my $ncbi_nt = 'http://www.ncbi.nlm.nih.gov/nuccore/';
my $ncbi_aa = 'http://www.ncbi.nlm.nih.gov/protein/';
my $uniprot = 'http://www.uniprot.org/uniprot/';
my $pdb = 'http://www.pdb.org/pdb/explore/explore.do?structureId=';
......@@ -323,8 +323,8 @@ sub Html {
if ( $cross_sp && $desc[0] =~ /^([\w\.\-]+)$/ ){
my $spe = $1;
#FIXME: Use hash 'longer_species_name' instead of this
$spe = '' if ( ($match =~ /lupus$/ && $spe ne 'familiaris') || ($match =~ /gallus$/ && $spe ne 'gallus') || ($match =~ /gorilla$/ && $spe ne 'gorilla') );
$strain = 1 if ( exists($Strain{$spe} );
$spe = '' if ( ($match =~ /lupus$/ && $spe ne 'familiaris') || ($match =~ /gallus$/ && $spe ne 'gallus') || ($match =~ /gorilla$/ && $spe ne 'gorilla') );
$strain = 1 if ( exists($Strain{$spe}) );
$match .= ' '.$spe;
shift(@desc);
}
......@@ -353,8 +353,8 @@ sub Html {
}
if ( m/^&gt;.*MATCHES_ON / ){
s{^&gt;([^ ]*)_G_(\w+) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / );
if ( m/ _S_ (\w+) / && ! m/ _S_ My_own_seq / ){
s{^&gt;([^ ]*)_G_(\w+(\.\d+)?) }{&gt;$1_G_<a href='$ncbi_nt$2' target='_blank'>$2</a> } if ( !m/_G_My_Seq / );
if ( m/ _S_ (\w+(\.\d+)?) / && ! m/ _S_ My_own_seq / ){
my $blastHit = $1;
if ( $blastHit =~ /^[A-Z][A-Z0-9]{5}$/ ){
s{ _S_ (\w+) }{ _S_ <a href='$uniprot$1' target='_blank'>$1</a> };
......@@ -363,7 +363,7 @@ sub Html {
s{ _S_ (\w+) }{ _S_ <a href='$pdb$1' target='_blank'>$1</a> };
}
else {
s{ _S_ (\w+) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> };
s{ _S_ (\w+(\.\d+)?) }{ _S_ <a href='$ncbi_aa$1' target='_blank'>$1</a> };
}
}
......
......@@ -147,7 +147,7 @@ unless ( $quiet =~ /on/i ){
}
print "\n***************************************************************\n\n";
print "\n***********\n\n";
}
......@@ -431,6 +431,7 @@ sub WEB_BLAST {
}
foreach my $encoded_seq(@list_encoded){
my $nb = 0;
print "\n\n**********************************************************************\n" if ( $database eq 'refseq_protein' );
print "BLAST $names[$i]...";
#-- BUILD THE REQUEST
......@@ -521,14 +522,13 @@ sub WEB_BLAST {
while (){
sleep 3;
$req = new HTTP::Request GET => "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
$response = $ua -> request($req);
$req = new HTTP::Request GET => "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
$response = $ua -> request($req);
if ($response->content =~ /Altschul/i) { print "Search Complete\n"; push(@list_pdb,$response -> content);last; }
else { next; }
}
print {$SOR1} (@list_pdb);
++$i;
if ($response->content =~ /Altschul/i) { print "Search Complete\n"; push(@list_pdb,$response -> content);last; }
}
print {$SOR1} (@list_pdb);
++$i;
}
undef (@list_encoded);
......@@ -631,7 +631,13 @@ sub PARSING {
if ( $distant==1 ){
my $version_d, my $database_d, my $poste_d;
undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+).*?\s+Length=\s*(\d+)/smo );
undef $/;
if ( $intra_res[0] =~ /Query=\s+\S+.*?\s+Length=\s*\d+/ ){
($query, $length_query) = ( $intra_res[0] =~ /Query=\s+\S+\s+(\S+)\s+Length=\s*(\d+)/smo );
}
elsif ( $intra_res[0] =~ /Query=\s+\S+\s+Length=\s*\d+/ ){
($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
}
$/ = "\n";
open (F3, '<', 'web_tempo.result') or die;
while ($_=<F3>){
......@@ -694,7 +700,7 @@ sub PARSING {
if ( $intra_res=~ />.*?[A-Za-z0-9_\.]+?\s+/ ){
while ( $intra_res=~ />.*?([A-Za-z0-9_\.]+?)\s+/sg ){
my $refseq = $1;
$refseq =~ s/\.\d+$//;
#$refseq =~ s/\.\d+$//;
push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tncbi");
}
}
......@@ -770,7 +776,7 @@ sub MULTI_EQUIVALENT {
if ( $intra_res=~ />.*?[A-Za-z0-9_\.]+?\s+/ ){
while ( $intra_res=~ />.*?([A-Za-z0-9_\.]+?)\s+/sg ){
my $refseq = $1;
$refseq =~ s/\.\d+$//;
#$refseq =~ s/\.\d+$//;
push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tncbi");
}
}
......@@ -812,7 +818,7 @@ sub AFFICHAGE_REFSEQ_PARSING {
if ($afficher) { print "\n$entete\n\n"; print $afficher; }
if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
if (@name_gid) { print {*STDOUT} "\n***********\n\n"; }
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
print {*STDOUT} "\n", @name_gid;
close SOR;
......@@ -840,7 +846,7 @@ sub AFFICHAGE_PDB_PARSING {
else {next;}
}
undef(@result_sort);
print {*STDOUT} "\n**********************************************************************\n\n";
print {*STDOUT} "\n***********\n\n";
#-- OUTFILE /STDOUT
if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
......@@ -878,7 +884,7 @@ sub PROFILE {
unlink("tempo_file_profile");
undef(@list_pdb);
print "\n**********************************************************************\n\n";
print "\n***********\n\n";
#-- OUTFILE /STDOUT
if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
print {*STDOUT} @sortie;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment