Commit 25da5587 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Use new NCBI eutilsAPIKey

parent dc629ff6
#
#Version: 4.2.1
#Version: 4.2.2
#OS: Linux
#Author: Sebastien Moretti
#E-mail: moretti.sebastien [AT] gmail.com
#
4.2.2
Use new and simpler blast submitter/parser
4.2.1
Fix for revtrans option that cannot be strictly boolean for the new
Tcoffee web server
......
......@@ -31,6 +31,9 @@ use Views; # Non-text outputs, e.g. HTML/CSS
my $cachePath = '/scratch/beegfs/monthly/tcoffee/ProtoGene_Cache'; # Cache directory
my $cacheStorageTime = 15; # Do not update sequences younger than X days
my $eutilsEmail = quotemeta('tcoffee@vital-it.ch'); # Put your e-mail for NCBI eutils requests
my $eutilsTool = 'ProtoGene';
my $eutilsAPIKey = '2fc650885ef3f15ca562083e6332e7103c08';
my $userEMail = 'moretti.sebastien@gmail.com'; # To receive e-mails with encountered problems; leave blank to inactive
### BLAST parameters ###
......@@ -61,7 +64,7 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on
################## Option management
my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0);
my ($cache, $cleancache) = ('update', 'update'); #TODO Finish to implement
my ($cache, $cleancache) = ('update', 'update');#TODO Finish to implement
my ($debug, $tmp) = (0, 0);
my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0);
my %opts = ('msa|in=s' => \$msa, # Input sequences
......@@ -578,11 +581,11 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
=item Vital-IT computing center
=item Swiss Institute of Bioinformatics
=item SIB Swiss Institute of Bioinformatics
=item Lausanne, Switzerland
=item http://www.vital-it.ch/
=item https://www.vital-it.ch/
=back
......@@ -891,7 +894,7 @@ sub blastPAcc2PGI{
my $protGI = '';
#FIXME: should be ${blastHit}[pacc] but something is broken at NCBI
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term=$blastHit&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch");
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=protein&term=$blastHit&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
if ( $content =~ /<Id>(\d+)<\/Id>/ ){
$protGI = $1;
}
......@@ -905,7 +908,7 @@ sub protGI2NTGIs{
my $ntGIs = '';
my $geneID = '';
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=protein&db=nuccore,gene&id=$protGI&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch");
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=protein&db=nuccore,gene&id=$protGI&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
my @xml = split("\n", $content);
my $flag = 0;
......@@ -942,8 +945,8 @@ sub geneID2Chr{
my $chr = '';
my ($amont, $aval) = ('', '');
# my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=$geneID&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch");
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=$geneID&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch");
# my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=$geneID&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=$geneID&retmode=xml&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
my @xml = split("\n", $content);
my $flag = 0;
......@@ -992,10 +995,10 @@ sub downloadSeqFromGIs{
GET_SEQ:
for(my $a=0; $a<=$#acc; $a++){
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]-$amont.fas") ;
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a]-$amont.fas") ;
}
else{
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch'", "$cache/$acc[$a].fas");
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$acc[$a]&rettype=fasta&retmode=text&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a].fas");
}
}
......@@ -1014,17 +1017,17 @@ sub download_seq{
if ( $pacc2puid !~ /^[NAX][CGTSWZMR]_/ ){ #Not RefSeq acc
#pacc = primary acc NOT prot acc ! #265666 -> S55551
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term=${pacc2puid}[pacc]&tool=ProtoGene&email=smoretti\@unil.ch");
my $content = fetch("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=nucleotide&term=${pacc2puid}[pacc]&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey");
if ( $content =~ /<Id>(\d+)<\/Id>/ ){
$pacc2puid = $1;
}
}
if ( $amont =~ /^\d+$/ && $aval =~ /^\d+$/ ){
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a]--$from-$to.fas");
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&from=$amont&to=$aval&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a]--$from-$to.fas");
}
else{
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=ProtoGene&email=smoretti\@unil.ch", "$cache/$acc[$a].fas");
fetch_fasta("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=$pacc2puid&rettype=fasta&retmode=text&tool=$eutilsTool&email=$eutilsEmail&api_key=$eutilsAPIKey", "$cache/$acc[$a].fas");
}
#FIXME Don't remember exactly what all this function does
......
- Replace webblast.pl by a simpler blast caller!
Replace webblast.pl by a simpler blast caller!
=> use tsv blast format to make parsing easier and more perenial
=> parser db agnostic
=> only blast NCBI ???
- Better deal with temporary files: must have a unique name !!!!
Better deal with temporary files: must have a unique name !!!!
- Check what gigablaster returns differently between morning & afternoon
=> local blast instead ?
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment