Commit 7364ef64 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Fix template acc bug

parent a7e69e85
......@@ -269,14 +269,6 @@ for(my $r=0; $r<=$#original_names; $r++){
$fasta_header =~ s{^>}{}; # Full FASTA header
$short_name =~ s{^([^\s\t]+).*$}{$1}; # Short name without description
# Prepare sequence $r for BLAST
open(my $READY2BLAST, '>', "$cache/$date.seq2blast.fas.$r");
my $noGap = $original_seq[$r];
$noGap =~ s{\-}{}g; #remove gaps
print {$READY2BLAST} ">$r\n$noGap\n";
close $READY2BLAST;
undef $noGap;
my @equivalent_blast_hits; #Multiple BLAST hits with the same highest e-value/score
......@@ -292,6 +284,13 @@ for(my $r=0; $r<=$#original_names; $r++){
}
else{
# else send them to webblast
# Prepare sequence $r for BLAST
open(my $READY2BLAST, '>', "$cache/$date.seq2blast.fas.$r");
my $noGap = $original_seq[$r];
$noGap =~ s{\-}{}g; #remove gaps
print {$READY2BLAST} ">$r\n$noGap\n";
close $READY2BLAST;
if ($r==0){
# Show BLAST parameters only for the first BLAST run
display_blast_param();
......@@ -331,7 +330,7 @@ for(my $r=0; $r<=$#original_names; $r++){
# Get Nt sequence from template
if ( exists($template_NT->{$short_name}) && $template_NT->{$short_name} ne '' ){
if ( $template_NT->{$short_name} !~ /^My_Seq$/i ){
downloadSeq($cache, $date, '', '', $template_NT->{$short_name});
download_seq($cache, $date, '', '', $template_NT->{$short_name});
@nt_GIs = $template_NT->{$short_name};
}
else {
......@@ -359,6 +358,7 @@ for(my $r=0; $r<=$#original_names; $r++){
@failureStatus = ('No_nt_link', $equivalent_blast_hits[$qq]);
next HIT_LINK;
}
#TODO: remove redundancy here !!!
print " linked with [$ntGIs $geneID]\n";
......@@ -374,7 +374,7 @@ for(my $r=0; $r<=$#original_names; $r++){
if ( $chr ne '' ){
#Get Chr seq
downloadSeq($cache, $date, $amont, $aval, $chr);
download_seq($cache, $date, $amont, $aval, $chr);
$intronStep = 1;
@nt_GIs = ("$chr:$amont-$aval", @nt_GIs);
......@@ -1022,7 +1022,7 @@ sub downloadSeqFromGIs{
return;
}
sub downloadSeq{
sub download_seq{
my ($cache, $date, $amont, $aval, @acc) = @_;
my $cp = 0;
......@@ -1031,8 +1031,9 @@ sub downloadSeq{
DOWNL_SEQ:
for(my $a=0; $a<=$#acc; $a++){
my $pacc2puid = $acc[$a];
if ( $pacc2puid !~ /^[NAX][CGTWZM]_/ ){
if ( $pacc2puid !~ /^[NAX][CGTSWZMR]_/ ){ #Not RefSeq acc
#pacc = primary acc NOT prot acc ! #265666 -> S55551
system("wget -q -O $cache/${date}_${acc[$a]}.gui 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?&db=nucleotide&term=${pacc2puid}[pacc]&tool=ProtoGene&email=smoretti\@unil.ch'");
open(my $GUI, '<', "$cache/${date}_${acc[$a]}.gui");
DOWNL:
......@@ -1592,17 +1593,17 @@ sub checkTemplate {
chomp($name);
$realSeqName = $name;
$realSeqName =~ s/^(>.*?)_[GS]_/$1/;
$realSeqName =~ s/^(>.*?)_[GS]_.*$/$1/;
$realSeqName =~ s{\s}{}g;
$realSeqName = 'wHaT' if ($realSeqName !~ /[\w\>]/);
my $protTarget = $name;
$protTarget =~ s/^.*_S_ *([^ ]+)/$1/;
$protTarget =~ s/^.*_S_ *([^ ]+) .*$/$1/;
$protTarget = '' if ( $protTarget !~ /^[\w]+$/ || $protTarget eq 'No_BLASTp_Result' || $protTarget =~ /^My_own_seq$/ );
$proteinTarget{$realSeqName} = $protTarget;
$proteinTarget{$realSeqName} = $protTarget;
my $nuclTarget = $name;
$nuclTarget =~ s/^.*_G_ *([^ ]+)/$1/;
$nuclTarget =~ s/^.*_G_ *([^ ]+) .*$/$1/;
$nuclTarget = '' if ( $nuclTarget !~ /^[\w]+$/ || $nuclTarget eq 'No_nt_link' || $nuclTarget =~ /Not_searched/ || $nuclTarget =~ /navailable$/ || $nuclTarget eq 'Alignment_failure' );
$nucleotideTarget{$realSeqName} = $nuclTarget;
$nucleotideSeq{$realSeqName} = '';
......@@ -1623,7 +1624,7 @@ sub checkTemplate {
template_failure();
exit 1;
}
return (\%proteinTarget, \%nucleotideTarget, \%nucleotideSeq);
return (\%nucleotideTarget, \%proteinTarget, \%nucleotideSeq);
}
return(undef, undef, undef);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment