Commit f6a3e51a authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Version compliant with ProtoGene RPM 4.2.0 and its manually edited files in /software

parent d536e753
...@@ -12,26 +12,28 @@ use warnings; ...@@ -12,26 +12,28 @@ use warnings;
use diagnostics; use diagnostics;
use Carp; use Carp;
use File::Which qw(which); # Locate external executable programs in the PATH use File::Which qw(which); # Locate external executable programs in the PATH
use Time::localtime; # Use localtime+PID for a pseudo-uniq temp file name use Time::localtime; # Use localtime+PID for a pseudo-uniq temp file name
use Getopt::Long; # Options specifications use Getopt::Long; # Options specifications
use File::Copy qw(move); # Avoid external 'mv' command usage use File::Copy qw(move); # Avoid external 'mv' command usage
use LWP::Simple; # To test gigablaster availability use LWP::Simple; # To test gigablaster availability
use Mail::Send; # Send warnings and errors files by e-mail ==> only if the $userEMail variable is defined use Mail::Send; # Send warnings and errors files by e-mail ==> only if the $userEMail variable is defined
use lib '/mnt/common/share/ProtoGene/'; # Local path for ProtoGene's own perl modules use lib '/mnt/common/share/ProtoGene/'; # Local path for ProtoGene's own perl modules
use Exonerate; # Exonerate runner, parser, ... use Exonerate; # Exonerate runner, parser, ...
use Views; # Non-text outputs, e.g. HTML/CSS use Views; # Non-text outputs, e.g. HTML/CSS
#use CheckOutput; # Check output for cds consistancy with query #use CheckOutput; # Check output for cds consistancy with query
################## CONFIGURATION ################## ################## CONFIGURATION ##################
my $cachePath = '/scratch/fhgfs/tcoffee/ProtoGene_Cache'; # Cache directory #$ENV{'PATH'} .= ':/mnt/local/bin/:./'; # Additional path for executables
my $cacheStorageTime = 15; # Do not update sequences younger than X days
my $userEMail = 'moretti.sebastien@gmail.com'; # To receive e-mails with encountered problems; leave blank to inactive my $cachePath = '/scratch/fhgfs/tcoffee/ProtoGene_Cache'; # Cache directory
my $cacheStorageTime = 15; # Do not update sequences younger than X days
my $userEMail = 'moretti.sebastien@gmail.com'; # To receive e-mails with encountered problems; leave blank to inactive
### BLAST parameters ### ### BLAST parameters ###
my $blast_param = { 'evalue' => 0.05, my $blast_param = { 'evalue' => 0.05,
...@@ -52,7 +54,7 @@ my $blast_param = { 'evalue' => 0.05, ...@@ -52,7 +54,7 @@ my $blast_param = { 'evalue' => 0.05,
################################################### ###################################################
my $VERSION = '4.2.1'; my $VERSION = '4.2.0';
my $webblast_exe = '/mnt/common/share/ProtoGene/webblast.pl'; my $webblast_exe = '/mnt/common/share/ProtoGene/webblast.pl';
my $blast_exe = 'blastall'; # Or wu-blastall for Wu-BLAST; for local blast usage my $blast_exe = 'blastall'; # Or wu-blastall for Wu-BLAST; for local blast usage
...@@ -60,10 +62,9 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on ...@@ -60,10 +62,9 @@ my $exonerate_exe = 'exonerate'; # Exonerate 1.0 because current parser on
################## Option management ################## Option management
my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim) = ('', 0, 0, 0, '', '', 0); my ($msa, $revtrans, $pep, $hideBOJ, $run_name, $template, $lim, $cache) = ('', 0, 0, 0, '', '', 0, 'update');
my ($cache, $cleancache) = ('update', 'update'); my ($debug, $tmp) = (0, 0);
my ($debug, $tmp) = (0, 0); my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0);
my ($db, $species, $local, $giga) = ($blast_param->{'db1'}, $blast_param->{'species'}, 0, 0);
my %opts = ('msa|in=s' => \$msa, # Input sequences my %opts = ('msa|in=s' => \$msa, # Input sequences
'revtrans:s' => \$revtrans, # Use to reverse-translate sequences with no match 'revtrans:s' => \$revtrans, # Use to reverse-translate sequences with no match
'pep' => \$pep, # Add the original peptide query beneath the related CDS seq 'pep' => \$pep, # Add the original peptide query beneath the related CDS seq
...@@ -71,8 +72,7 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences ...@@ -71,8 +72,7 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences
'run_name=s' => \$run_name, # Use another name, instead of input seq name, for result files 'run_name=s' => \$run_name, # Use another name, instead of input seq name, for result files
'template=s' => \$template, # Use a template file 'template=s' => \$template, # Use a template file
'lim=i' => \$lim, # Limit number of input query sequences 'lim=i' => \$lim, # Limit number of input query sequences
'cachedir=s' => \$cache, # Cache directory 'cache=s' => \$cache, # Cache behavior
'cacheclean=s' => \$cleancache, # Cache behavior
'orgm|species=s' => \$species, # Organism(s) to blast against 'orgm|species=s' => \$species, # Organism(s) to blast against
'db|database=s' => \$db, # Database to blast against 'db|database=s' => \$db, # Database to blast against
...@@ -89,7 +89,6 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences ...@@ -89,7 +89,6 @@ my %opts = ('msa|in=s' => \$msa, # Input sequences
'tmp' => \$tmp, # To keep traces of fake intermediate files like fake xml from NCBI, fake aln, ... 'tmp' => \$tmp, # To keep traces of fake intermediate files like fake xml from NCBI, fake aln, ...
); );
my $test_option_values = Getopt::Long::GetOptions(%opts); my $test_option_values = Getopt::Long::GetOptions(%opts);
$revtrans = 1 if ( $revtrans eq '' ); # Allow revtrans to be a boolean or a string option (for tcoffee web server)
################## Short help message ################## Short help message
...@@ -97,30 +96,29 @@ if ( !$test_option_values || ($msa eq '' && $cache ne 'empty' && $cache ne 'old' ...@@ -97,30 +96,29 @@ if ( !$test_option_values || ($msa eq '' && $cache ne 'empty' && $cache ne 'old'
print {*STDERR} "\n\tCannot open the MSA file in FASTA format print {*STDERR} "\n\tCannot open the MSA file in FASTA format
\tTry: $0 --msa=path_of_the_fasta_msa_file [Options] \tTry: $0 --msa=path_of_the_fasta_msa_file [Options]
\tOptions: --orgm =All_organisms, Bacteria, Viruses, Vertebrata, \tOptions: --orgm=All_organisms, Bacteria, Viruses, Vertebrata,
\t Eukaryota, Mammalia, Primates, Homo_sapiens, \t Eukaryota, Mammalia, Primates, Homo_sapiens,
\t Gallus_gallus, Bos_taurus, Escherichia_coli, \t Gallus_gallus, Bos_taurus, Escherichia_coli,
\t Arabidopsis_thaliana, Mus_musculus, \t Arabidopsis_thaliana, Mus_musculus,
\t Drosophila_Melanogaster, ... \t Drosophila_Melanogaster, ...
\t default is '$blast_param->{'species'}' \t default is '$blast_param->{'species'}'
\t --db =nr, pdb, swissprot, refseq_protein \t --db=nr, pdb, swissprot, refseq_protein
\t default is '$blast_param->{'db1'}' \t default is '$blast_param->{'db1'}'
\t --local to execute a local BLAST query with \t --local to execute a local BLAST query with
\t --db=path_for_a_local_db_blast_formated\n \t --db=path_for_a_local_db_blast_formated\n
\t --template to provide your own nucleotidic sequences \t --template to provide your own nucleotidic sequences
\t following the cds file format \t following the cds file format
\t --revtrans reverse-translates sequences with no \t --revtrans reverse-translates sequences with no
\t blast hit, in IUB (IUPAC) depiction code \t blast hit, in IUB (IUPAC) depiction code
\t They are removed from the alignement by default \t They are removed from the alignement by default
\t --pep adds the original peptide query beneath the \t --pep adds the original peptide query beneath the
\t back-translated sequence \t back-translated sequence
\t --cachedir ='own_PATH_directory' \t --cache=none, update, use, 'own_PATH_directory', old, empty
\t (default is '$cachePath') \t to select the cache mode
\t --cacheclean=none, update, use, old, empty \t default is 'update'\n
\t to select the cache behavior (default is 'update')\n \t --debug prints extra information when running
\t --debug prints extra information when running \t --version prints version information
\t --version prints version information \t --help prints a full help message\n\n";
\t --help prints a full help message\n\n";
exit(1); exit(1);
} }
...@@ -508,13 +506,7 @@ with these options available: ...@@ -508,13 +506,7 @@ with these options available:
=item I<--pep> to add the original peptide query beneath the back-translated sequence =item I<--pep> to add the original peptide query beneath the back-translated sequence
=item I<--cachedir>=your_own_path_directory =item I<--cache>=none, update, use, own_path_directory, old, empty
=item cachedir sets cache directory
=item I<--cacheclean>=none, update, use, old, empty
=item cacheclean manages cache behavior
=item none: no cache usage, none temporary files are stored =item none: no cache usage, none temporary files are stored
...@@ -522,6 +514,8 @@ with these options available: ...@@ -522,6 +514,8 @@ with these options available:
=item use: force use cache, whatever the age of files =item use: force use cache, whatever the age of files
=item own_path_directory: use my own directory, and its files
=item old: remove the old files in the cache directory =item old: remove the old files in the cache directory
=item empty: empty completely the cache directory =item empty: empty completely the cache directory
...@@ -552,9 +546,9 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g ...@@ -552,9 +546,9 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
=item - =item -
=item I<exonerate> version 2 from http://www.ebi.ac.uk/~guy/exonerate/ =item I<exonerate> from http://www.ebi.ac.uk/~guy/exonerate/
=item I<blast> from ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ =item I<blast> from http://www.ncbi.nlm.nih.gov/BLAST/download.shtml or http://blast.wustl.edu/
=back =back
...@@ -562,9 +556,9 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g ...@@ -562,9 +556,9 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
=over 8 =over 8
=item version 4.2.1 =item version 4.2.0
=item on Dec 18th, 2013 =item on Aug 08th, 2013
=back =back
...@@ -842,7 +836,7 @@ sub fetch { ...@@ -842,7 +836,7 @@ sub fetch {
my $content = get($url); my $content = get($url);
print {*STDERR} "[$content]\n\n" if ($debug); print {*STDERR} "[$content]\n\n" if ($debug);
if ( defined $content ){ if ( defined $content ){
next XML if ( $content =~ /<ERROR>/i ); next XML if ( $content =~ /<ERROR>/i && $content !~ /<ERROR>Can not find description/i );
return $content; return $content;
} }
} }
...@@ -905,7 +899,7 @@ sub protGI2NTGIs{ ...@@ -905,7 +899,7 @@ sub protGI2NTGIs{
my $ntGIs = ''; my $ntGIs = '';
my $geneID = ''; my $geneID = '';
my $content = fetch("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=protein&db=nuccore,nucleotide,gene&id=$protGI&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch"); my $content = fetch("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=protein&db=nuccore,gene&id=$protGI&retmode=xml&tool=ProtoGene&email=smoretti\@unil.ch");
my @xml = split("\n", $content); my @xml = split("\n", $content);
my $flag = 0; my $flag = 0;
...@@ -1438,7 +1432,7 @@ sub revtransBuilding{ ...@@ -1438,7 +1432,7 @@ sub revtransBuilding{
$readyname =~ s{ +}{ }g; $readyname =~ s{ +}{ }g;
print OUT "$readyname\n"; print OUT "$readyname\n";
if ( $revtrans ){ if ( $revtrans==1 ){
my $final_seq = ''; my $final_seq = '';
for(my $w=0; $w < length($original_seq[$order]); $w++){ for(my $w=0; $w < length($original_seq[$order]); $w++){
my $aa = substr($original_seq[$order], $w, 1); my $aa = substr($original_seq[$order], $w, 1);
......
...@@ -161,7 +161,7 @@ elsif ( $distant==1 ){ ...@@ -161,7 +161,7 @@ elsif ( $distant==1 ){
@list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter); @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
} }
else { else {
die " Report bug to armougom\@igs.cnrs-mrs.fr\n"; die " Report bug to poirot\@igs.cnrs-mrs.fr\n";
} }
#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST #-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment