Commit 07eece73 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

start cleaning blast parsing

parent 922d0029
......@@ -507,130 +507,150 @@ sub LOCAL_BLAST
}
#-----------------------------------------------------------------------------------------------------------------------------
sub PARSING
{
sub PARSING {
my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
my(@list_pdb)=@$list_pdb; my(@result_not_sort)=();my $n=0;
open (my $SOR, '>', 'webblast.log') or die;
if ($gigablast=~ /^yes$/i) { $locale=2;$distant=0;}
if ($gigablast=~ /^no$/i) { $locale=1;}
if ($distant==1) { $locale=0;}
foreach my $pdb_result(@list_pdb)
{
my $query, my $length_query, my($pdb_id), my $comp=0;
if ($pdb_result=~/No hits found/m) { print {$SOR} $pdb_result; next;}
my (@list_pdb) = @$list_pdb;
my (@result_not_sort) = ();
my $n = 0;
open (my $SOR, '>', 'webblast.log') or die;
$pdb_result=~ s/ALIGNMENTS//;
local $/=undef;
my(@intra_res)= split(/(?=\n\n>)/s,$pdb_result);
if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
if ( $gigablast=~ /^no$/i ) { $locale = 1;}
if ( $distant==1 ) { $locale = 0;}
foreach my $pdb_result(@list_pdb){
my $query, my $length_query, my($pdb_id), my $comp=0;
if ( $pdb_result=~/No hits found/m ){
print {$SOR} $pdb_result;
next;
}
if ( $distant==1 )
{
my $version_d, my $database_d, my $poste_d;
undef $/; ($query,$length_query)=($intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo);
$/="\n";
open (F3, '<', 'web_tempo.result') or die ;
while ($_=<F3>)
{
if ($_=~ /(BLASTP\s+\S+)/o) { $version_d = $1;}
if ($_=~ /Database:\s+(.+?)$/o) { $database_d = $1;}
if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d = $1; last;}
}
close F3;
# $database_d = $database if ( $database_d =~ m{/} );
unless ($quiet=~ /on/i || $n>0) {++$n;
print {*STDOUT} "
$pdb_result =~ s/ALIGNMENTS//;
local $/ = undef;
my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);
if ( $distant==1 ){
my $version_d, my $database_d, my $poste_d;
undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
$/ = "\n";
open (F3, '<', 'web_tempo.result') or die ;
while ($_=<F3>){
if ($_=~ /(BLASTP\s+\S+)/o) { $version_d = $1;}
if ($_=~ /Database:\s+(.+?)$/o) { $database_d = $1;}
if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d = $1; last;}
}
close F3;
# $database_d = $database if ( $database_d =~ m{/} );
unless ($quiet=~ /on/i || $n>0){
++$n;
print {*STDOUT} "
Version: $version_d
Database: $database_d
Posted date: $poste_d\n\n";
}
}
else { ($query,$length_query)=($intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo);}
shift(@intra_res) if ( exists($intra_res[1]) );
}
}
else{
($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
}
foreach my $intra_res(@intra_res) #look for the different results of the query
{
my($aln_length,$identity) = ($intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im);
my($recouvrement,$gap) = &RECOVER($intra_res,$aln_length,$length_query);
my($evalue) = ($intra_res=~ /Expect = (.+?)\s/im);
my($bits ) = ($intra_res=~ /Score =\s+([\d.]+)\s/im);
unless ($method !~ /^geneid$/i) { if ($comp<=$bits) { $comp=$bits;} else { last;} }
if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' )
{ print {$SOR} " can't parse $pdb_result"; next; }
if ($method =~ /^pdbid$/i)
{
if ($locale == 1) { ($pdb_id) = ($intra_res=~ /^>(.{6})/im); $pdb_id=~ s/_//; $pdb_id=uc($pdb_id);}
else { ($pdb_id) = ($intra_res=~ /^>pdb\|(.{6})/im); $pdb_id=~ s/\|//; }
($evalue) = ($intra_res=~ /Expect = (.+?)\s/im);
push (@result_not_sort,("$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t"));
}
elsif ($method =~/^geneid$/i)
{
if ($database !~ /pdb/i && $database !~ /swiss/i && ($locale=~/1|2/))
{
while ($intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg)
{
my $databank =$1;
my $last =$4;
my $refseq =$2;
if ($databank eq 'pdb') { $refseq.=$last }
$refseq=~ s/\.\d+$//;
push (@result_not_sort,"$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
}
}
elsif ($database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2))
{
my $refseq;
if($locale==1) {($refseq) = ($intra_res=~ />(.*?)\s/o); $refseq=~ s/_//; }
else {($refseq) = ($intra_res=~ /^>pdb\|(.{6})/im);$refseq=~ s/\|//;}
unless ($refseq) { print {$SOR} $intra_res; next; }
push (@result_not_sort,("$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb"));
}
elsif ($distant==1 )
{
my($resul)=&MULTI_EQUIVALENT($query,$identity,$recouvrement,$bits,$evalue,$intra_res);
push (@result_not_sort,$resul);
}
elsif ($database=~ /swiss/i)
{
my($refseq) = ($intra_res=~ />.*?sp\|(.+?)\|/o);
unless ($refseq) { print {$SOR} $pdb_result; next; }
$refseq=~ s/\.\d+$//;
push (@result_not_sort,("$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot"));
}
}
else {die;}
}
shift(@intra_res) if ( exists($intra_res[1]) );
foreach my $intra_res(@intra_res){ #look for the different results of the query
my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
my($recouvrement, $gap) = &RECOVER($intra_res, $aln_length, $length_query);
my($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );
my($bits) = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );
unless ( $method !~ /^geneid$/i ){
if ( $comp<=$bits ){
$comp=$bits;
}
else{
last;
}
}
if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
print {$SOR} " can't parse $pdb_result";
next;
}
if ( $method =~ /^pdbid$/i ){
if ( $locale==1 ){
($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
$pdb_id =~ s/_//;
$pdb_id = uc($pdb_id);
}
else{
($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
$pdb_id =~ s/\|//;
}
($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );
push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
}
elsif ( $method =~/^geneid$/i ){
if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
my $databank = $1;
my $last = $4;
my $refseq = $2;
if ( $databank eq 'pdb' ){
$refseq .= $last;
}
$refseq =~ s/\.\d+$//;
push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
}
}
elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
my $refseq;
if ( $locale==1 ){
($refseq) = ( $intra_res=~ />(.*?)\s/o );
$refseq =~ s/_//;
}
else{
($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
$refseq =~ s/\|//;
}
unless ($refseq){
print {$SOR} $intra_res;
next;
}
push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
}
elsif ( $distant==1 ){
my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
push (@result_not_sort, $resul);
}
elsif ( $database=~ /swiss/i ){
my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
unless ($refseq){
print {$SOR} $pdb_result;
next;
}
$refseq =~ s/\.\d+$//;
push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
}
}
else {die;}
}
}
close $SOR;
undef (@list_pdb);
if ($method =~/^geneid$/i) { return (@result_not_sort); }
else
{
my(@result_sort)=
map {$_->[1]}
sort { $b->[0]<=>$a->[0]}
map {[/\t([\d.]+)%/,$_]}
@result_not_sort;
undef(@result_not_sort);
return (@result_sort);
if ( $method =~/^geneid$/i ){
return (@result_not_sort);
}
else{
my(@result_sort) = map { $_->[1] }
sort { $b->[0]<=>$a->[0] }
map { [/\t([\d.]+)%/,$_] }
@result_not_sort;
undef(@result_not_sort);
return (@result_sort);
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment