Commit 67e149c1 authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

continue cleaning code

parent c2edd580
......@@ -28,8 +28,6 @@ use loci_from_Exonerate; # Exonerate parser
############## Specific to our server
#L'installer doit creer $HOME/.PACMAN/ and $HOME/.PACMAN/.perllib/
#et mettre les differents modules dans $HOME/.PACMAN/.perllib/
$ENV{'PATH'} .= ':/mnt/local/bin/'; # additional path for executable on the server
##############
......@@ -520,11 +518,11 @@ PROTOGENE re-builds the original alignment with nucleotidic information it has g
=item Sebastien MORETTI
=item Sebastien.Moretti@igs.cnrs-mrs.fr
=item moretti.sebastien [AT] gmail.com
=item Frederic REINIER
=item Frederic.Reinier@igs.cnrs-mrs.fr
=item reinier [AT] crs4.it
=item Lab. Information Genomique et Structurale - IGS
......@@ -622,9 +620,9 @@ sub cacheManagement{
elsif ( $Cache eq 'none' ){
}
elsif ( -d $Cache ){
die "\n\tPACMAN cannot access to your directory\n\n" if ( ! -x $Cache );
die "\n\tPACMAN cannot write into your directory\n\n" if ( ! -w $Cache );
die "\n\tPACMAN cannot read into your directory\n\n" if ( ! -r $Cache );
die "\n\tProtoGene cannot access to your directory\n\n" if ( ! -x $Cache );
die "\n\tProtoGene cannot write into your directory\n\n" if ( ! -w $Cache );
die "\n\tProtoGene cannot read into your directory\n\n" if ( ! -r $Cache );
$cache = $Cache;
}
else {
......
......@@ -2,7 +2,7 @@
#Version: 3.2.3
#OS: Linux
#Author: Sebastien Moretti
#E-mail: moretti.sebastien@gmail.com
#E-mail: moretti.sebastien [AT] gmail.com
#
History of ProtoGene/PACMAN improvements:
......@@ -157,3 +157,4 @@ History of ProtoGene/PACMAN improvements:
17 oct 2005
Addition of a version option and variable
......@@ -5,26 +5,27 @@ use warnings;
use diagnostics;
my $infile = $ARGV[0] or die "\n\tYou must specify a fasta file to process\n\n";
open(FILE,"$infile") if ( -e "$infile" );
open(my $FILE, '<', "$infile") if ( -e "$infile" );
my $seq='';
my $flag=0;
while(<FILE>){
if ($_ =~ /^>/){
print $_ if ($flag==0);
print "\n$_" if ($flag>0);
$flag++;
my $seq = '';
my $flag = 0;
while(<$FILE>){
if ( $_ =~ /^>/ ){
print $_ if ( $flag==0 );
print "\n$_" if ( $flag>0 );
$flag++;
}
else {
$seq=$_;
chomp($seq);
$seq =~ s/ //g;
$seq =~ s/\d//g;
$seq =~ s/(.)/_${1}_/g;
print $seq;
$seq = $_;
chomp($seq);
$seq =~ s/ //g;
$seq =~ s/\d//g;
$seq =~ s/(.)/_${1}_/g;
print $seq;
}
}
close FILE;
close $FILE;
print "\n";
exit;
exit 0;
......@@ -171,4 +171,3 @@ sub Usage{
return;
}
......@@ -761,163 +761,160 @@ sub PARSING {
#-------------------------------------------------------------------------------------------------------------------
sub MULTI_EQUIVALENT
{
my($query,$identity,$recouvrement,$bits,$evalue,$intra_res)=@_;
sub MULTI_EQUIVALENT {
my($query, $identity, $recouvrement, $bits, $evalue, $intra_res) = @_;
my @result=();
while ($intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g)
{
my $databank =$1;
my $last =$4;
my $refseq =$2;
while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g ){
my $databank = $1;
my $last = $4;
my $refseq = $2;
if ($databank eq 'pdb') { $refseq.=$last }
$refseq=~ s/\.\d+$//;
push (@result,"$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
if ( $databank eq 'pdb' ){ $refseq .= $last; }
$refseq =~ s/\.\d+$//;
push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
}
return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
sub AFFICHAGE_REFSEQ_PARSING
{
my($result_sort,$cover_tresh,$identity_treshold,$out_file)=@_;
my(@result_sort)=@$result_sort, my(@name_gid)=();my@resultats=();my $afficher="";
(my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s","Sequence Name","Accession number","Databank","%Identity","%Cover","BITS","Evalue"));
foreach my $result_sort(@result_sort)
{
my($seq_name,$refseq_name,$identiq,$cover,$bits,$evalue,$bank)= split("\t",$result_sort);
$evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
($identiq)= split(/%/,$identiq);
if ($identiq >= $identity_treshold && $cover >= $cover_tresh)
{
push (@name_gid,">$seq_name\@$bank\_\_$refseq_name\n");
(($afficher).= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ",$seq_name,$refseq_name,$bank,$identiq,$cover,$bits,$evalue));
$afficher.="\n";
}
else {next;}
sub AFFICHAGE_REFSEQ_PARSING {
my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
my(@result_sort) = @$result_sort, my(@name_gid) = ();
my@resultats = ();
my $afficher = '';
(my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s", 'Sequence Name', 'Accession number', 'Databank', "%Identity", "%Cover", 'BITS', 'Evalue'));
foreach my $result_sort(@result_sort){
my($seq_name, $refseq_name, $identiq, $cover, $bits, $evalue, $bank) = split("\t", $result_sort);
$evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
($identiq) = split(/%/,$identiq);
if ($identiq >= $identity_treshold && $cover >= $cover_tresh){
push (@name_gid, ">$seq_name\@$bank\_\_$refseq_name\n");
(($afficher) .= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ", $seq_name, $refseq_name, $bank, $identiq, $cover, $bits, $evalue));
$afficher .= "\n";
}
else {next;}
}
if ($afficher) { print "\n$entete\n\n"; print $afficher; }
if ($afficher) { print "\n$entete\n\n"; print $afficher; }
if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
print {*STDOUT} "\n", @name_gid;
close SOR;
if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
print {*STDOUT} "\n", @name_gid;
close SOR;
return;
}
#-------------------------------------------------------------------------------------------------------------
sub AFFICHAGE_PDB_PARSING
{
sub AFFICHAGE_PDB_PARSING {
my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
my($result_sort,$cover_tresh,$identity_treshold,$out_file)=@_;
my(@result_sort)=@$result_sort, my @sortie=();
my(@result_sort) = @$result_sort, my @sortie=();
print {*STDOUT} "\n\n",(my($en_tete)= sprintf("%-40s %-10s %-10s %-12s %-10s","Sequence Name","PDB_id","Evalue","Identity(%)","Cover(%)")),"\n\n";
print {*STDOUT} "\n\n",(my($en_tete) = sprintf("%-40s %-10s %-10s %-12s %-10s", 'Sequence Name', 'PDB_id', 'Evalue', 'Identity(%)', 'Cover(%)')),"\n\n";
foreach my $result_sort(@result_sort)
{
my($seq_name,$pdb_name,$EValue,$identiq,$cover)= split("\t",$result_sort);
($identiq)= split(/%/,$identiq);
foreach my $result_sort(@result_sort){
my($seq_name, $pdb_name, $EValue, $identiq, $cover) = split("\t", $result_sort);
($identiq) = split(/%/, $identiq);
if ($identiq >= $identity_treshold && $cover >= $cover_tresh)
{
push (@pdb_list,$pdb_name);
$EValue =~ s/,$//;
print {*STDOUT} ((my $afficher)= sprintf("%-40s %-10s %-10s %-12s %-10s",$seq_name,$pdb_name,$EValue,$identiq,$cover)),"\n";
push (@sortie,">$seq_name _P_ $pdb_name\n");
}
else {next;}
if ( $identiq >= $identity_treshold && $cover >= $cover_tresh ){
push (@pdb_list, $pdb_name);
$EValue =~ s/,$//;
print {*STDOUT} ((my $afficher) = sprintf("%-40s %-10s %-10s %-12s %-10s", $seq_name, $pdb_name, $EValue, $identiq, $cover)),"\n";
push (@sortie, ">$seq_name _P_ $pdb_name\n");
}
else {next;}
}
undef(@result_sort);
print {*STDOUT} "\n**********************************************************************\n\n";
#-- OUTFILE /STDOUT
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
print {*STDOUT} @sortie;
close SOR;
return;
}
#-----------------------------------------------------------------------------------------------------------------------------------
sub PROFILE
{
my($list_pdb,$out_file,$distant)=@_;
my(@list_pdb)=@$list_pdb, my(@sortie)=();
my %names=(); my $i=0; my($name)='';
sub PROFILE {
my($list_pdb, $out_file, $distant) = @_;
my(@list_pdb) = @$list_pdb, my(@sortie)=();
my %names = (); my $i = 0; my($name) = '';
open (SOR1,">$out_file") or die;
foreach my $pdb_result(@list_pdb)
{
if ($pdb_result =~ /No hits found/i) { next; }
else
{
++$i;
if ($distant==1) {($name) =($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
else {($name) =($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
foreach my $pdb_result(@list_pdb){
if ( $pdb_result =~ /No hits found/i ){ next; }
else{
++$i;
if ( $distant==1 ){($name) = ($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
else {($name) = ($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
my($name1)= ($name=~ /(.+?)\s+$/);
my($name1) = ($name=~ /(.+?)\s+$/);
open (SOR,">tempo_file_profile") or die "can not open tempo_file_profile";
print SOR "Query= $pdb_result";
close SOR;
open (my $SOR, '>', "tempo_file_profile") or die "can not open tempo_file_profile";
print {$SOR} "Query= $pdb_result";
close $SOR;
open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
close COM;
push (@sortie,">$name1 _R_ ${i}.profile\n");
open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
close COM;
push (@sortie,">$name1 _R_ ${i}.profile\n");
}
}
}
unlink("tempo_file_profile");
undef(@list_pdb);
print {*STDERR} "\n**********************************************************************\n\n";
#-- OUTFILE /STDOUT
if ($out_file) { open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
print {*STDOUT} @sortie;
close SOR1;
return;
}
#--------------------------------------------------------------------------------------------------------------------------------
sub ORGN
{
my($organism)=@_;
$organism=~ s/_/ /;
my(%orgs)= (
'Homo sapiens' =>'1',
'Bos taurus' =>'1',
'Gallus gallus' =>'1',
'Viruses' =>'1',
'Bacteria' =>'1',
'Eukaryota' =>'1',
'Mammalia' =>'1',
sub ORGN {
my($organism) = @_;
$organism =~ s/_/ /;
my(%orgs) = (
'Homo sapiens' =>'1',
'Bos taurus' =>'1',
'Gallus gallus' =>'1',
'Viruses' =>'1',
'Bacteria' =>'1',
'Eukaryota' =>'1',
'Mammalia' =>'1',
'Vertebrata' =>'1',
'All organisms' =>'1',
'Fungi' =>'1',
'Primates' =>'1',
'Archaea' =>'1',
'Arabidopsis thaliana' =>'1',
'Caenorhabditis elegans' =>'1',
'Escherichia coli' =>'1',
'Mus musculus' =>'1',
'Drosophila melanogaster' =>'1',
'All organisms' =>'1',
'Fungi' =>'1',
'Primates' =>'1',
'Archaea' =>'1',
'Arabidopsis thaliana' =>'1',
'Caenorhabditis elegans' =>'1',
'Escherichia coli' =>'1',
'Mus musculus' =>'1',
'Drosophila melanogaster' =>'1',
);
if (exists $orgs{$organism})
{ $organism=~ s/ /+/g; return ($organism); }
else { print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n"; &HELP(); }
if ( exists $orgs{$organism} ){
$organism =~ s/ /+/g;
return ($organism);
}
else{
print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n";
&HELP();
return;
}
}
#------------------------------------------------------------------------------------------------------------------------------------
sub LIST_ORGA
{
sub LIST_ORGA {
my (%orgs) = (
'Homo sapiens' =>'1',
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment