Commit 5fcebdb2 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

improve webblast code

parent 61926980
......@@ -284,15 +284,14 @@ sub HELP
BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed localy]
";
exit;
exit 1;
return;
}
#-----------------------------------------------------------------------------------------
sub OPTIONS_GET {
my %opt = ();
GetOptions
GetOptions
(
'infile=s' =>\$opt{infile},
'outfile=s' =>\$opt{outfile},
......@@ -301,8 +300,8 @@ sub OPTIONS_GET {
'blast_dir=s' =>\$opt{blast_dir},
'identity=f' =>\$opt{treshold},
'cover=f' =>\$opt{cover},
'evalue=f' =>\$opt{evalue},
'hits=i' =>\$opt{hits},
'evalue=f' =>\$opt{evalue},
'hits=i' =>\$opt{hits},
'matrix=s' =>\$opt{matrix},
'filter=s' =>\$opt{filter},
'method=s' =>\$opt{method},
......@@ -311,165 +310,237 @@ sub OPTIONS_GET {
'quiet=s' =>\$opt{quiet},
'gigablast=s' =>\$opt{gigablast},
);
if ($ARGV[0]) {print "Unprocessed by Getopt::Long\n $ARGV[0]\n"; &HELP();}
my($evalue_tresh) = $opt{'evalue'}; unless ($evalue_tresh) { $evalue_tresh=1;};
my($cover_tresh) = $opt{'cover'}; unless (defined $cover_tresh) { $cover_tresh=30;};
my($query_file) = $opt{'infile'}; unless ($query_file) { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
my($outfil) = $opt{'outfile'}; unless ($outfil) { $outfil='';};
my($treshold) = $opt{'treshold'}; unless (defined $treshold) { $treshold=50;};
my($blast_dir) = $opt{'blast_dir'}; unless ($blast_dir) { $blast_dir='';};
my($database) = $opt{'database'}; unless ($database) { $database='';};
my($program) = $opt{'program'}; unless ($program) { $program='blastp';};
my($align) = $opt{'hits'}; unless (defined $align) { $align=1;};
my($matrix) = $opt{'matrix'}; unless ($matrix) { $matrix='BLOSUM62';};
my($filter) = $opt{'filter'}; unless ($filter) { $filter='F';};
my($method) = $opt{'method'}; unless ($method) {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
my($organism) = $opt{'organism'}; unless ($organism) { $organism='All organisms';};
my($process) = $opt{'processor'}; unless ($process) { $process=1;};
my($param) = $opt{'quiet'}; unless ($param) { $param='off';};
my($gigablast) = $opt{'gigablast'}; unless ($gigablast) {$gigablast='no';};
if ($method !~ /(^geneid$|^pdbid$|^profile$)/i) { print {*STDERR} "unknown method for the flag -method\n";&HELP(); }
if ($treshold <0 || $treshold >100) { print {*STDERR} "\nout of range for the option -treshold \n"; &HELP();}
if ($cover_tresh <0 || $cover_tresh >100) { print {*STDERR} "\nout of range for the option -cover \n"; &HELP();}
if ($align <0) { print {*STDERR} "\n error with option align\n"; &HELP();}
if ($gigablast!~/^yes$|^no$/i) { print {*STDERR} "invalid argument for gigaglast option : yes/no\n";exit;};
if ($filter!~ /^[TFRLMCV]{1}$|^off$/i) {print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";exit;}
if ($matrix!~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/) { print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";exit }
if ($outfil eq "" && $method=~ /^profile$/i) { $outfil='default_profile.template'}
if ($param!~ /^on$|^off$/i) { print {*STDERR} "valid values for -quiet is on or off\n";exit;}
my($orgn,@all_orgn)= &ORGN($organism);
return ($program,$database,$blast_dir,$query_file,
$outfil,$treshold,$cover_tresh,$evalue_tresh,
$align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
if ( $ARGV[0] ){
print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
&HELP();
}
my($evalue_tresh) = $opt{'evalue'}; unless ($evalue_tresh) { $evalue_tresh = 1;};
my($cover_tresh) = $opt{'cover'}; unless (defined $cover_tresh) { $cover_tresh = 30;};
my($query_file) = $opt{'infile'}; unless ($query_file) { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
my($outfil) = $opt{'outfile'}; unless ($outfil) { $outfil = '';};
my($treshold) = $opt{'treshold'}; unless (defined $treshold) { $treshold = 50;};
my($blast_dir) = $opt{'blast_dir'}; unless ($blast_dir) { $blast_dir = '';};
my($database) = $opt{'database'}; unless ($database) { $database = '';};
my($program) = $opt{'program'}; unless ($program) { $program = 'blastp';};
my($align) = $opt{'hits'}; unless (defined $align) { $align = 1;};
my($matrix) = $opt{'matrix'}; unless ($matrix) { $matrix = 'BLOSUM62';};
my($filter) = $opt{'filter'}; unless ($filter) { $filter = 'F';};
my($method) = $opt{'method'}; unless ($method) {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
my($organism) = $opt{'organism'}; unless ($organism) { $organism = 'All organisms';};
my($process) = $opt{'processor'}; unless ($process) { $process = 1;};
my($param) = $opt{'quiet'}; unless ($param) { $param = 'off';};
my($gigablast) = $opt{'gigablast'}; unless ($gigablast) {$gigablast = 'no';};
if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
print {*STDERR} "unknown method for the flag -method\n";
&HELP();
}
if ( $treshold<0 || $treshold>100 ){
print {*STDERR} "\nout of range for the option -treshold \n";
&HELP();
}
if ( $cover_tresh<0 || $cover_tresh>100 ){
print {*STDERR} "\nout of range for the option -cover \n";
&HELP();
}
if ( $align<0 ){
print {*STDERR} "\n error with option align\n";
&HELP();
}
if ( $gigablast !~ /^yes$|^no$/i ){
print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
exit 1;
}
if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
exit 1;
}
if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
exit 1;
}
if ( $outfil eq '' && $method=~ /^profile$/i ){
$outfil = 'default_profile.template';
}
if ( $param!~ /^on$|^off$/i ){
print {*STDERR} "valid values for -quiet is on or off\n";
exit 1;
}
my ($orgn, @all_orgn) = &ORGN($organism);
return ($program,$database,$blast_dir,$query_file,
$outfil,$treshold,$cover_tresh,$evalue_tresh,
$align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
}
#--------------------------------------------------------------------------------------------------
sub RECOVER
{
my($pdb_result,$aln_length,$length_query)=@_;
sub RECOVER {
my ($pdb_result, $aln_length, $length_query) = @_;
my $nb_gap=0;
if ($pdb_result=~ /(score.+?\n\n\n).+?score/ism) #cas ou plusieurs HSP, prend que le 1er
{ $pdb_result= $1;}
if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
$pdb_result = $1;
}
$length_query =~ s/,//g;
my ($requete) = join('',($pdb_result=~/^Query(.*)\n/gm));
my ($requete) = join('', ($pdb_result=~/^Query(.*)\n/gm) );
$requete =~ s/[^A-Z-]//g;
my(@sequence) = split('',$requete);
for (my $i=0; $i<=$#sequence; $i++)
{
if($sequence[$i] eq "-"){ ++$nb_gap; }
my (@sequence) = split('', $requete);
for (my $i=0; $i<=$#sequence; $i++){
if ( $sequence[$i] eq '-'){
++$nb_gap;
}
}
my($recouvrement)= sprintf("%-3d",(($aln_length-$nb_gap)/$length_query)*100);
my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
undef(@sequence);
return ($recouvrement,$nb_gap);
return ($recouvrement, $nb_gap);
}
#--------------------------------------------------------------------------------------------------
sub WEB_BLAST
{
open (SOR1, '>', 'web_tempo.result') or die;
my($query_file,$Eval,$program,$database,$matrix,$method,$align,$orgn,$filter)=@_;
my $aln_view, my $format='Txt';
my($description)=$align;
if ($method=~/^profile$/i) { $aln_view ='FlatQueryAnchoredNoIdentities'} else { $aln_view ='Pairwise'}
if ($filter eq 'F') { $filter='off';}
sub WEB_BLAST {
open (my $SOR1, '>', 'web_tempo.result') or die;
my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
my $aln_view;
my $format = 'Txt';
my ($description) = $align;
if ( $method=~/^profile$/i ){
$aln_view = 'FlatQueryAnchoredNoIdentities';
}
else {
$aln_view = 'Pairwise';
}
$/='>';
open(FIC,$query_file) or die "can not open $query_file $!\n";
my(@sequences)=<FIC>;
close FIC;
if ( $filter eq 'F' ){
$filter = 'off';
}
$/ = '>';
open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
my (@sequences) = <$FIC>;
close $FIC;
shift(@sequences);
foreach my $sequence(@sequences)
{
$sequence=~ s/>//g;
$sequence=">$sequence";
my($name)=($sequence=~ /^>(.+)\n/);
push(@names, $name);
my($encoded_query)= uri_escape($sequence);
push (@list_encoded, $encoded_query);
}
foreach my $sequence(@sequences){
$sequence =~ s/>//g;
$sequence = ">$sequence";
my ($name) = ($sequence =~ /^>(.+)\n/);
push(@names, $name);
my ($encoded_query) = uri_escape($sequence);
push (@list_encoded, $encoded_query);
}
undef(@sequences);
if (scalar (@names != @list_encoded)) { die "error $!";}
foreach my $encoded_seq(@list_encoded)
{
my $nb=0;
print {*STDERR} "BLAST $names[$i]...";
if ( scalar (@names != @list_encoded) ){
die "error $!";
}
foreach my $encoded_seq(@list_encoded){
my $nb = 0;
print {*STDERR} "BLAST $names[$i]...";
#-- BUILD THE REQUEST
my($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;
my($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
$req -> content_type('application/x-www-form-urlencoded');
$req -> content($arguments);
my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;
my ($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
$req -> content_type('application/x-www-form-urlencoded');
$req -> content($arguments);
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
my($response) = $ua -> request($req);
if ($response -> content =~ /Server Error/i) { die "Server Error at NCBI!!Sorry try later\n"; }
$response -> content =~ /^\s{4}RID = (.*)$/m; my($rid) = $1;
$response -> content =~ /^\s{4}RTOE = (.*)$/m; my($wait)= $1;
unless ($rid && $wait) { die "parse error: $!" };
for (my $j=0; $j<=$wait/2; $j++) { print {*STDERR} "."; sleep 2; }
my($verif)=0;
while ()
{
for (my $j=0; $j<=5; $j++) { print {*STDERR} "."; sleep 1; }
$req = new HTTP::Request GET =>
"http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
$response = $ua->request($req);
if ($response->content =~ /Status=WAITING/im) { next; }
elsif ($response->content =~ /Status=FAILED/im) { print {*STDERR} "Search $rid failed\n"; $verif=1; last; }
elsif ($response->content =~ /Status=UNKNOWN/im) { print {*STDERR} "Search $rid expired\n"; $verif=1; last; }
elsif ($response->content =~ /Status=READY/im)
{
if ($response->content =~ /ThereAreHits=yes/im){last;}
else { print {*STDERR} "No hits found.\n";$verif=1;last; }
}
elsif ($response->content =~ /can\'t connect/im)
{
print {*STDERR} "\nCan't connect to www.ncbi.nlm.nih.gov:80...new attempt";
if ($nb <3) { ++$nb; next; }
else { print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n"; $verif=1; last;}
}
else { print {*STDERR} "unknown error\n"; $verif=1; last; }
}
if($verif==1){ ++$i; next; }
my ($response) = $ua -> request($req);
if ( $response -> content =~ /Server Error/i ){
die "Server Error at NCBI!!Sorry try later\n";
}
$response -> content =~ /^\s{4}RID = (.*)$/m;
my ($rid) = $1;
$response -> content =~ /^\s{4}RTOE = (.*)$/m;
my ($wait)= $1;
unless ($rid && $wait){
die "parse error: $!";
}
for (my $j=0; $j<=$wait/2; $j++){
print {*STDERR} '.';
sleep 2;
}
my ($verif) = 0;
while (){
for (my $j=0; $j<=5; $j++){
print {*STDERR} '.';
sleep 1;
}
$req = new HTTP::Request GET =>
"http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
$response = $ua->request($req);
if ( $response->content =~ /Status=WAITING/im ){
next;
}
elsif ( $response->content =~ /Status=FAILED/im ){
print {*STDERR} "Search $rid failed\n";
$verif = 1;
last;
}
elsif ( $response->content =~ /Status=UNKNOWN/im ){
print {*STDERR} "Search $rid expired\n";
$verif = 1;
last;
}
elsif ( $response->content =~ /Status=READY/im ){
if ( $response->content =~ /ThereAreHits=yes/im ){
last;
}
else {
print {*STDERR} "No hits found.\n";
$verif = 1;
last;
}
}
elsif ( $response->content =~ /can\'t connect/im ){
print {*STDERR} "\nCan't connect to www.ncbi.nlm.nih.gov:80...new attempt";
if ( $nb <3 ){
++$nb;
next;
}
else {
print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
$verif = 1;
last;
}
}
else {
print {*STDERR} "unknown error\n";
$verif = 1;
last;
}
}
if( $verif==1 ){
++$i;
next;
}
#-- GET RESULT
while ()
{
sleep 3;
while (){
sleep 3;
$req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
$response = $ua -> request($req);
if ($response->content =~ /Altschul/i) { print {*STDERR} "Search Complete\n"; push(@list_pdb,$response -> content);last; }
else { next; }
}
print SOR1 (@list_pdb);
print {$SOR1} (@list_pdb);
++$i;
}
undef (@list_encoded);
close SOR1;
close $SOR1;
return (@list_pdb);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment