Commit 4f06680d authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Clean webblast

parent 4427c275
......@@ -98,37 +98,37 @@ else { die "unknown method\n";}
if (($orgn !~ /All\+organisms/) && ($locale=~/1|2/))
{ print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";exit;}
##---AFFICHAGE des valeurs des options
unless ($quiet =~ /on/i )
{
print {*STDERR} "
Program : $program
##---PRINT option values
unless ( $quiet =~ /on/i ){
print {*STDERR} "
Program : $program
Database : $database
Method : $method
Method : $method
Query_file : $query_file
Out_file : $out_file
";
print {*STDOUT} "
Evalue threshold : $Eval
Matrix : $matrix
Filter : $filter
Out_file : $out_file
";
print {*STDOUT} "
Evalue threshold : $Eval
Matrix : $matrix
Filter : $filter
Blast_identity_threshold : $identity_treshold
Cover threshold : $cover_tresh
";
Cover threshold : $cover_tresh
";
print {*STDERR} "
Number of hits : $align
Number of processors used : $process
";
if ($gigablast=~ /^yes$/i) { print {*STDERR} "
gigablast: yes\n" }
unless ($locale) { print {*STDERR} "
Organism : $orgn\n" }
print {*STDERR} "
***************************************************************\n\n";
Number of hits : $align
Number of processors used : $process
";
if ( $gigablast=~ /^yes$/i ){
print {*STDERR} "\n gigablast: yes\n";
}
unless ($locale) {
print {*STDERR} "\n Organism : $orgn\n";
}
print {*STDERR} "\n***************************************************************\n\n";
}
......@@ -198,19 +198,19 @@ sub NCBI_DATABASE
#------------------------------------------------------------------------------------------------------------------------
sub HELP
{
my($org,@orga)= &LIST_ORGA();
my ($list_orga)=join(', ',@orga);
my ($org, @orga) = &LIST_ORGA();
my ($list_orga) = join(', ', @orga);
print {*STDERR} "
usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
-program ...... Program Name (blastp)
Default = blastp
Default = blastp
-database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Default = pdb at NCBI
-infile ....... Query_file = a list of sequences in fasta format
-outfile ...... Name the outfile to make a template file for t_coffee
Default = STDOUT or default.profile if method is profile
Default = pdb at NCBI
-infile ....... Query_file = a list of sequences in fasta format
-outfile ...... Name the outfile to make a template file for t_coffee
Default = STDOUT or default.profile if method is profile
-evalue ....... Evalue threshold Default = 1
-matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Default BLOSUM62
......@@ -222,7 +222,7 @@ sub HELP
Default = Off
-organism ..... $list_orga are available
Default is All_organisms
-identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
-identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
Default 50
-cover ........ Cover threshold = provide a % : sequence covering Default: 30
-hits ........ Number of hits
......@@ -432,66 +432,64 @@ sub WEB_BLAST
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
my ($blast_dir,$database,$query_file,$Eval,$align,$method,$matrix,$filter,$process,$gigablast,$database_expresso,$blast_dir_expresso,$runblast)=@_;
my $n=0;
if ($method=~ /^profile$/i && $gigablast=~ /^no$/i)
{
open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
my ($blast_dir, $database, $query_file, $Eval, $align,
$method, $matrix, $filter, $process, $gigablast,
$database_expresso, $blast_dir_expresso, $runblast) = @_;
my $n = 0;
if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
}
elsif ($method=~ /^geneid$/i && $gigablast=~ /^no$/i)
elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
{
open (COM,"$blast_dir -p blastp -d $database -i $query_file -v $align -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
open (COM,"$blast_dir -p blastp -d $database -i $query_file -v $align -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
}
elsif ($method=~ /^geneid$|^pdbid$/i && ($gigablast=~ /^yes$/i))
elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
{
unless ($database eq "nr" || $database eq "pdb" || $database eq "refseq_protein" || $database eq "pdbaa" ) { print {*STDERR} "\nsorry invalid database for gigablast\n";exit;} ;
if ($database eq 'pdb') { $database='pdbaa';}
if ($database eq 'refseq_protein') { $database='refprot';}
if ($database eq '') { print {*STDERR} "provide a valid database!\n" ;exit;}
open (COM,"$runblast -d $database -p blastp -e $Eval -v $align -F F \<$query_file |");
unless ( $database eq "nr" || $database eq "pdb" || $database eq "refseq_protein" || $database eq "pdbaa" ){
print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
}
if ( $database eq 'pdb') { $database = 'pdbaa';}
if ( $database eq 'refseq_protein') { $database = 'refprot';}
if ( $database eq '') { print {*STDERR} "provide a valid database!\n"; exit;}
open (COM,"$runblast -d $database -p blastp -e $Eval -v $align -F F \<$query_file |");
}
elsif ($method=~ /^profile$/i && ($gigablast=~ /^yes$/i)) { print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;}
elsif ($method=~ /^pdbid$/i && $database=~ /expressopdb/)
{
#BLAST pour Expresso
open (COM,"$BLASTMAT; $BLASTDB;$blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
}
elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
#BLAST pour Expresso
open (COM,"$BLASTMAT; $BLASTDB;$blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
}
else
{
open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
else{
open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
}
unless ($quiet=~ /on/) { print {*STDERR} "\nrun BLAST..."; }
unless ( $quiet=~ /on/ ) { print {*STDERR} "\nrun BLAST..."; }
my $name_database, my $posted, my $version;
open (SOR2,">blast_result.txt") or die;
open (my $SOR2, '>', "blast_result.txt") or die;
$/="Query=";
while (<COM>)
{
if ($_=~ /Database: (\S+)/g) { $name_database=$1;}
if ($_=~ /Posted date: (.+?)\n/) { $posted=$1; }
if ($_=~ /BLASTP\s+(\S+)/o) { $version=$1;}
print SOR2 $_;
push (@list_pdb,$_) ;
if ($_=~ /\s*(.+?)\s/) { print {*STDERR} "\n$1 done";}
$/ = 'Query=';
while (<COM>){
if ( $_=~ /Database:\s+(\S+)/g ) { $name_database = $1;}
if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted = $1;}
if ( $_=~ /(BLASTP\s+\S+)/o ) { $version = $1;}
print {$SOR2} $_;
push (@list_pdb,$_) ;
if ($_=~ /\s*(.+?)\s/) { print {*STDERR} "\n$1 done";}
}
close COM;
close SOR2;
close $SOR2;
print {*STDERR} "\n";
$name_database = $database if ( $name_database =~ m{/} );
unless ($quiet=~ /on/i) {
print {*STDOUT} "
Version: BLASTP $version
Database: $name_database
print {*STDOUT} "
Version: $version
Database: $name_database
Posted date: $posted\n\n";
}
}
shift (@list_pdb);
return (@list_pdb);
}
......@@ -499,9 +497,9 @@ sub LOCAL_BLAST
#-----------------------------------------------------------------------------------------------------------------------------
sub PARSING
{
my($list_pdb,$locale,$distant,$method,$quiet,$database,$gigablast)=@_;
my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
my(@list_pdb)=@$list_pdb; my(@result_not_sort)=();my $n=0;
open (SOR, '>', 'webblast.log') or die;
open (my $SOR, '>', 'webblast.log') or die;
if ($gigablast=~ /^yes$/i) { $locale=2;$distant=0;}
if ($gigablast=~ /^no$/i) { $locale=1;}
......@@ -511,7 +509,7 @@ sub PARSING
{
my $query, my $length_query, my($pdb_id), my $comp=0;
if ($pdb_result=~/No hits found/m) { print SOR $pdb_result; next;}
if ($pdb_result=~/No hits found/m) { print {$SOR} $pdb_result; next;}
$pdb_result=~ s/ALIGNMENTS//;
local $/=undef;
......@@ -526,16 +524,16 @@ sub PARSING
open (F3, '<', 'web_tempo.result') or die ;
while ($_=<F3>)
{
if ($_=~ /BLASTP\s+(\S+)/o) { $version_d=$1;}
if ($_=~ /Database:\s+(.+?)$/o) { $database_d=$1;}
if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d=$1; last;}
if ($_=~ /(BLASTP\s+\S+)/o) { $version_d = $1;}
if ($_=~ /Database:\s+(.+?)$/o) { $database_d = $1;}
if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d = $1; last;}
}
close F3;
$database_d = $database if ( $database_d =~ m{/} );
# $database_d = $database if ( $database_d =~ m{/} );
unless ($quiet=~ /on/i || $n>0) {++$n;
print {*STDOUT} "
Version: BLASTP $version_d
Database: $database_d
Version: $version_d
Database: $database_d
Posted date: $poste_d\n\n";
}
......@@ -555,7 +553,7 @@ sub PARSING
unless ($method !~ /^geneid$/i) { if ($comp<=$bits) { $comp=$bits;} else { last;} }
if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' )
{ print SOR " can't parse $pdb_result"; next; }
{ print {$SOR} " can't parse $pdb_result"; next; }
if ($method =~ /^pdbid$/i)
{
......@@ -588,7 +586,7 @@ sub PARSING
if($locale==1) {($refseq) = ($intra_res=~ />(.*?)\s/o); $refseq=~ s/_//; }
else {($refseq) = ($intra_res=~ /^>pdb\|(.{6})/im);$refseq=~ s/\|//;}
unless ($refseq) { print SOR $intra_res; next; }
unless ($refseq) { print {$SOR} $intra_res; next; }
push (@result_not_sort,("$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb"));
}
elsif ($distant==1 )
......@@ -599,7 +597,7 @@ sub PARSING
elsif ($database=~ /swiss/i)
{
my($refseq) = ($intra_res=~ />.*?sp\|(.+?)\|/o);
unless ($refseq) { print SOR $pdb_result; next; }
unless ($refseq) { print {$SOR} $pdb_result; next; }
$refseq=~ s/\.\d+$//;
push (@result_not_sort,("$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot"));
}
......@@ -607,7 +605,7 @@ sub PARSING
else {die;}
}
}
close SOR;
close $SOR;
undef (@list_pdb);
if ($method =~/^geneid$/i) { return (@result_not_sort); }
......@@ -783,29 +781,31 @@ sub ORGN
sub LIST_ORGA
{
my(%orgs)= (
'Homo sapiens' =>'1',
'Bos taurus' =>'1',
'Gallus gallus' =>'1',
'Viruses' =>'1',
'Bacteria' =>'1',
'Eukaryota' =>'1',
'Mammalia' =>'1',
'Vertebrata' =>'1',
'All organisms' =>'1',
'Fungi' =>'1',
'Primates' =>'1',
'Archaea' =>'1',
'Arabidopsis thaliana' =>'1',
'Caenorhabditis elegans' =>'1',
'Escherichia coli' =>'1',
'Mus musculus' =>'1',
'Drosophila melanogaster' =>'1',
my (%orgs) = (
'Homo sapiens' =>'1',
'Bos taurus' =>'1',
'Gallus gallus' =>'1',
'Viruses' =>'1',
'Bacteria' =>'1',
'Eukaryota' =>'1',
'Mammalia' =>'1',
'Vertebrata' =>'1',
'All organisms' =>'1',
'Fungi' =>'1',
'Primates' =>'1',
'Archaea' =>'1',
'Arabidopsis thaliana' =>'1',
'Caenorhabditis elegans' =>'1',
'Escherichia coli' =>'1',
'Mus musculus' =>'1',
'Drosophila melanogaster' =>'1',
);
my (@cle)=keys(%orgs);
foreach my $cle(@cle){ $cle=~ s/ /_/; }
my (@cle) = keys(%orgs);
foreach my $cle(@cle){
$cle=~ s/ /_/g;
}
return (@cle);
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment