Commit 25da5587 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Use new NCBI eutilsAPIKey

parent dc629ff6
# #
#Version: 4.2.1 #Version: 4.2.2
#OS: Linux #OS: Linux
#Author: Sebastien Moretti #Author: Sebastien Moretti
#E-mail: moretti.sebastien [AT] gmail.com #E-mail: moretti.sebastien [AT] gmail.com
# #
4.2.2
Use new and simpler blast submitter/parser
4.2.1 4.2.1
Fix for revtrans option that cannot be strictly boolean for the new Fix for revtrans option that cannot be strictly boolean for the new
Tcoffee web server Tcoffee web server
......
...@@ -31,6 +31,9 @@ use Views; # Non-text outputs, e.g. HTML/CSS ...@@ -31,6 +31,9 @@ use Views; # Non-text outputs, e.g. HTML/CSS
my $cachePath = '/scratch/beegfs/monthly/tcoffee/ProtoGene_Cache'; # Cache directory my $cachePath = '/scratch/beegfs/monthly/tcoffee/ProtoGene_Cache'; # Cache directory
my $cacheStorageTime = 15; # Do not update sequences younger than X days my $cacheStorageTime = 15; # Do not update sequences younger than X days
my $eutilsEmail = quotemeta('tcoffee@vital-it.ch'); # Put your e-mail for NCBI eutils requests
my $eutilsTool = 'ProtoGene';
my $eutilsAPIKey = '2fc650885ef3f15ca562083e6332e7103c08';
my $userEMail = 'moretti.sebastien@gmail.com'; # To receive e-mails with encountered problems; leave blank to inactive my $userEMail = 'moretti.sebastien@gmail.com'; # To receive e-mails with encountered problems; leave blank to inactive
### BLAST parameters ### ### BLAST parameters ###
...@@ -61,7 +64,7 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on ...@@ -61,7 +64,7 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on
################## Option management ################## Option management
my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0); my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0);
my ($cache, $cleancache) = ('update', 'update'); #TODO Finish to implement my ($cache, $cleancache) = ('update', 'update');#TODO Finish to implement
my ($debug, $tmp) = (0, 0); my ($debug, $tmp) = (0, 0);
my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0); my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0);
my %opts = ('msa|in=s' => \$msa, # Input sequences my %opts = ('msa|in=s' => \$msa, # Input sequences
...@@ -578,11 +581,11 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g ...@@ -578,11 +581,11 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
=item Vital-IT computing center =item Vital-IT computing center
=item Swiss Institute of Bioinformatics =item SIB Swiss Institute of Bioinformatics
=item Lausanne, Switzerland =item Lausanne, Switzerland
=item http://www.vital-it.ch/ =item https://www.vital-it.ch/
=back =back
...@@ -891,7 +894,7 @@ sub blastPAcc2PGI{ ...@@ -891,7 +894,7 @@ sub blastPAcc2PGI{
my $protGI = ''; my $protGI = '';
#FIXME: should be ${blastHit}[pacc] but something is broken at NCBI #FIXME: should be ${blastHit}[pacc] but something is broken at NCBI
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term=$blastHit&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch"); my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term=$blastHit&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
if ( $content =~ /<Id>(\d+)<\/Id>/ ){ if ( $content =~ /<Id>(\d+)<\/Id>/ ){
$protGI = $1; $protGI = $1;
} }
...@@ -905,7 +908,7 @@ sub protGI2NTGIs{ ...@@ -905,7 +908,7 @@ sub protGI2NTGIs{
my $ntGIs = ''; my $ntGIs = '';
my $geneID = ''; my $geneID = '';
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=protein&db=nuccore,gene&id=$protGI&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch"); my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=protein&db=nuccore,gene&id=$protGI&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
my @xml = split("\n", $content); my @xml = split("\n", $content);
my $flag = 0; my $flag = 0;
...@@ -942,8 +945,8 @@ sub geneID2Chr{ ...@@ -942,8 +945,8 @@ sub geneID2Chr{
my $chr = ''; my $chr = '';
my ($amont, $aval) = ('', ''); my ($amont, $aval) = ('', '');
# my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=$geneID&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch"); # my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=$geneID&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=$geneID&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch"); my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=$geneID&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
my @xml = split("\n", $content); my @xml = split("\n", $content);
my $flag = 0; my $flag = 0;
...@@ -992,10 +995,10 @@ sub downloadSeqFromGIs{ ...@@ -992,10 +995,10 @@ sub downloadSeqFromGIs{
GET_SEQ: GET_SEQ:
for(my $a=0; $a<=$#acc; $a++){ for(my $a=0; $a<=$#acc; $a++){
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){ if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]-$amont.fas") ; fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a]-$amont.fas") ;
} }
else{ else{
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch'", "$cache/$acc[$a].fas"); fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a].fas");
} }
} }
...@@ -1014,17 +1017,17 @@ sub download_seq{ ...@@ -1014,17 +1017,17 @@ sub download_seq{
if ( $pacc2puid !~ /^[NAX][CGTSWZMR]_/ ){ #Not RefSeq acc if ( $pacc2puid !~ /^[NAX][CGTSWZMR]_/ ){ #Not RefSeq acc
#pacc = primary acc NOT prot acc ! #265666 -> S55551 #pacc = primary acc NOT prot acc ! #265666 -> S55551
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term=${pacc2puid}[pacc]&tool=ProtoGene&email=smoretti\@unil.ch"); my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term=${pacc2puid}[pacc]&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
if ( $content =~ /<Id>(\d+)<\/Id>/ ){ if ( $content =~ /<Id>(\d+)<\/Id>/ ){
$pacc2puid = $1; $pacc2puid = $1;
} }
} }
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){ if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]--$from-$to.fas"); fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a]--$from-$to.fas");
} }
else{ else{
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a].fas"); fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a].fas");
} }
#FIXME Don't remember exactly what all this function does #FIXME Don't remember exactly what all this function does
......
- Replace webblast.pl by a simpler blast caller! Replace webblast.pl by a simpler blast caller!
=> use tsv blast format to make parsing easier and more perenial => use tsv blast format to make parsing easier and more perenial
=> parser db agnostic => parser db agnostic
=> only blast NCBI ??? => only blast NCBI ???
- Better deal with temporary files: must have a unique name !!!! Better deal with temporary files: must have a unique name !!!!
- Check what gigablaster returns differently between morning & afternoon - Check what gigablaster returns differently between morning & afternoon
=> local blast instead ? => local blast instead ?
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment