Commit e5a2a4df authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Add a simple way to get chromosomic sequences info about gene acc and positions via eSummary

parent 67e149c1
......@@ -33,7 +33,7 @@ $ENV{'PATH'} .= ':/mnt/local/bin/'; # additional path for executable on the serv
my $Version = '3.2.3';
my $Version = '3.2.4';
my $uct = 15; # UpdateCacheThreshold: number of days before update
my $cachedir = '/scratch/frt/tcoffee/ProtoGene_Cache'; # Cache directory
##### User settings ####################################
......@@ -506,9 +506,9 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
=over 8
=item version 3.2.3
=item version 3.2.4
=item on Nov 1st, 2007
=item on Nov 28th, 2007
=back
......@@ -862,7 +862,8 @@ sub geneID2Chr{
for(my $rep=0;$rep <= 8; $rep++){
$count++;
#Gene Acc
system("wget -q -O $cache/${date}_${blastHit}gene.tmp 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=$geneID&retmode=xml'");
# system("wget -q -O $cache/${date}_${blastHit}gene.tmp 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&id=$geneID&retmode=xml'");
system("wget -q -O $cache/${date}_${blastHit}gene.tmp 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=$geneID&retmode=xml'");
open(my $GACC, '<', "$cache/${date}_${blastHit}gene.tmp");
my $flag = 0;
GENE_CHR:
......@@ -870,24 +871,28 @@ sub geneID2Chr{
if ( $_ =~ /\<ERROR\>Empty id list \- nothing todo\<\/ERROR\>/ && $flag==0 ){
$rep = $rep-15 if ( $count==1 );
}
if ( $_ =~ /\<Entrezgene_locus\>/ && $flag==0 ){
$flag = 1;
}
elsif ( $_ =~ /\<Gene-commentary_type value=\"genomic\"\>/ && $flag==1 ){
$flag = 2;
# if ( $_ =~ /\<Entrezgene_locus\>/ && $flag==0 ){
if ( $_ =~ /<Item Name="GenomicInfoType" Type="Structure">/ && $flag==0 ){
$flag = 1;
}
elsif ( $_ =~ /\<Gene-commentary_type value=/ && $flag==2 ){
last GENE_CHR;
}
elsif ( $_ =~ /\<Gene-commentary_accession\>([\w\_\-\.]+)\<\/Gene-commentary_accession\>/ && $flag==2 ){
# elsif ( $_ =~ /\<Gene-commentary_type value=\"genomic\"\>/ && $flag==1 ){
# $flag = 2;
# }
# elsif ( $_ =~ /\<Gene-commentary_type value=/ && $flag==2 ){
# last GENE_CHR;
# }
# elsif ( $_ =~ /\<Gene-commentary_accession\>([\w\_\-\.]+)\<\/Gene-commentary_accession\>/ && $flag==2 ){
elsif ( $_ =~ /<Item Name="ChrAccVer" Type="String">([\w\_\-\.]+)\.?\d*<\/Item>/ && $flag==1 ){
$chr = $1;
$flag = 3;
}
elsif ( $_ =~ /\<Seq-interval_from\>(\d+)\<\/Seq-interval_from\>/ && $flag==3 ){
# elsif ( $_ =~ /\<Seq-interval_from\>(\d+)\<\/Seq-interval_from\>/ && $flag==3 ){
elsif ( $_ =~ /<Item Name="ChrStart" Type="Integer">(\d+)<\/Item>/ && $flag==3 ){
$amont = $1;
$flag = 4;
}
elsif ( $_ =~ /\<Seq-interval_to\>(\d+)\<\/Seq-interval_to\>/ && $flag==4 ){
# elsif ( $_ =~ /\<Seq-interval_to\>(\d+)\<\/Seq-interval_to\>/ && $flag==4 ){
elsif ( $_ =~ /<Item Name="ChrStop" Type="Integer">(\d+)<\/Item>/ && $flag==4 ){
$aval = $1;
last GENE_CHR;
}
......
#
#Version: 3.2.3
#Version: 3.2.4
#OS: Linux
#Author: Sebastien Moretti
#E-mail: moretti.sebastien [AT] gmail.com
#
History of ProtoGene/PACMAN improvements:
3.2.4
Add a simple way to get chromosomic sequences info about gene
acc and positions via eSummary
3.2.3
Add more equivalent hits to ProtoGene
......
......@@ -241,8 +241,7 @@ sub NCBI_DATABASE {
}
}
#------------------------------------------------------------------------------------------------------------------------
sub HELP
{
sub HELP {
my ($org, @orga) = &LIST_ORGA();
my ($list_orga) = join(', ', @orga);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment