webblast.pl 35.6 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
2
3
#!/usr/bin/env perl
#
#
4
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
5
6
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
7
####### method genid, pdbid and profile
Sebastien Moretti's avatar
Sebastien Moretti committed
8
9
10
11
#
#############################################################################################
#use Env qw(HOME);
#use lib "$HOME/.lib_webblast/";
Sebastien Moretti's avatar
Sebastien Moretti committed
12
13
14
use LWP::UserAgent;
use HTML::Parser;                                            # @@@@@@@ #
use HTTP::Request::Common qw(POST);                         # @/^   ^\@ #
Sebastien Moretti's avatar
Sebastien Moretti committed
15
16
17
18
use URI::Escape;                                           # @/ -   - \@ #
use Getopt::Long;                                         ##  \   ^   /  ##
use strict;                                              ##    |  0  |    ##
use warnings;                                           ####### \ _ / #######
19
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
20
21


22
23
24
25
############################  EXPRESSO PARAM  ##########################################################
my $database_expresso  = 'pdb';                                            #PDB database name          #
my $blast_dir_expresso = '/mnt/local/bin/blastall';                        #blastall executable        #
my $BLASTMAT           = 'export BLASTMAT=/mnt/local/ncbi/data/';          #matrix directory for blast #
Sebastien Moretti's avatar
Sebastien Moretti committed
26
my $BLASTDB            = 'export BLASTDB=/scratch/frt/blastnet/database/'; #PDB_seqres directoty       #
27
########################################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
28

29
my $runblast = '/mnt/local/bin/runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
30

31
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(),my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
32
33
my($ua)= LWP::UserAgent->new;

34

Sebastien Moretti's avatar
Sebastien Moretti committed
35
36
37
38
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
39

Sebastien Moretti's avatar
Sebastien Moretti committed
40
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
41

Sebastien Moretti's avatar
Sebastien Moretti committed
42
43
44
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
45

Sebastien Moretti's avatar
Sebastien Moretti committed
46
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
47

48
49
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
50

Sebastien Moretti's avatar
Sebastien Moretti committed
51
52
53
54
55
56
57
58
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST LOCALY\n";
        }
59
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
60
61
62
63
64
65
66
67
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST LOCALY\n";
        }
68
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
69
}
Sebastien Moretti's avatar
Sebastien Moretti committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN GIGABLASTER\n";
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST AT THE NCBI\n";
        }
    }
}

##- Define parameters through -method flag
88

Sebastien Moretti's avatar
Sebastien Moretti committed
89
90
91
92
93
94
95
96
97
98
99
100
101
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
            print {*STDERR} "\nprovide a valid database name to RUN GIGABLASTER: nr, pdb or refseq_protein\n";
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
102
}
Sebastien Moretti's avatar
Sebastien Moretti committed
103
104
105
106
107
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
108
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
109
}
Sebastien Moretti's avatar
Sebastien Moretti committed
110
111
112
113
114
115
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
116
}
Sebastien Moretti's avatar
Sebastien Moretti committed
117
118
else {
    die "unknown method\n";
119
120
}

Sebastien Moretti's avatar
Sebastien Moretti committed
121
122
123
124
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
125

Sebastien Moretti's avatar
Sebastien Moretti committed
126
127
128
129
130
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
131
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
132
133
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
134
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
135
136
137
138
139
140
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
141
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
142
143
             Cover threshold :          $cover_tresh
";
144
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
145
146
147
148
149
150
151
152
153
154
155
156
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


    print {*STDERR} "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
157
158
}

159

Sebastien Moretti's avatar
Sebastien Moretti committed
160
161
162
163
164
165
166
167
168
169
170
171
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
    die " Report bug to armougom\@igs.cnrs-mrs.fr\n";
}
Sebastien Moretti's avatar
Sebastien Moretti committed
172
173

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
174
175
176
177
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
178
179
180
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
181
182
183
184
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
185
186
187
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
188
189
190
191
192
193
194
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
195

Sebastien Moretti's avatar
Sebastien Moretti committed
196
exit 0;
197

Sebastien Moretti's avatar
Sebastien Moretti committed
198
199
200
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
217

218
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
219
220
221
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
242
}
Sebastien Moretti's avatar
Sebastien Moretti committed
243
244
#------------------------------------------------------------------------------------------------------------------------
sub HELP
245
{
Sebastien Moretti's avatar
Sebastien Moretti committed
246
247
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
248

249
250
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
251
252

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
253
                                            Default = blastp
254
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
255
256
257
258
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
259
260
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
261
                                            Default BLOSUM62
262
263
                            -method ....... geneid, pdbid, profile
                            -gigablast..... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
264
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/Giga2/~database/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
265
                                            Default no
266
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
267
                                            Default = Off
268
269
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
270
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
271
272
273
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
274
                                            Default = 1
275
276
277
278
279
                            -processor .... Number of processors to use
                                            Default = 1
                            -blast_dir .... Indicates where your BLAST directory is installed localy
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
280
281
282
283
284

                     Environement Variables
                     These variables can be set from the environement
           DATABASE......................[Indicates where your database file must be fetched (localy)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed localy]
285

Sebastien Moretti's avatar
Sebastien Moretti committed
286
";
Sebastien Moretti's avatar
Sebastien Moretti committed
287
288
    exit 1;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
289
290
}
#-----------------------------------------------------------------------------------------
291
sub OPTIONS_GET {
Sebastien Moretti's avatar
Sebastien Moretti committed
292
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
293
294

    GetOptions
Sebastien Moretti's avatar
Sebastien Moretti committed
295
              (
296
297
298
299
300
301
302
303
304
305
306
307
           'infile=s'    =>\$opt{infile},
           'outfile=s'    =>\$opt{outfile},
           'program=s'     =>\$opt{program},
           'database=s'     =>\$opt{database},
           'blast_dir=s'     =>\$opt{blast_dir},
           'identity=f'       =>\$opt{treshold},
           'cover=f'           =>\$opt{cover},
           'evalue=f'           =>\$opt{evalue},
           'hits=i'              =>\$opt{hits},
           'matrix=s'             =>\$opt{matrix},
           'filter=s'              =>\$opt{filter},
           'method=s'               =>\$opt{method},
308
           'organism=s'              =>\$opt{organism},
309
310
311
312
           'processor=i'              =>\$opt{processor},
           'quiet=s'                   =>\$opt{quiet},
           'gigablast=s'                =>\$opt{gigablast},
           );
Sebastien Moretti's avatar
Sebastien Moretti committed
313
314
315
316

    if ( $ARGV[0] ){
        print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
        &HELP();
317
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh)        { $evalue_tresh = 1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh) { $cover_tresh = 30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)          { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)              { $outfil = '';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)    { $treshold = 50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)           { $blast_dir = '';};
    my($database)     = $opt{'database'};           unless ($database)            { $database = '';};
    my($program)      = $opt{'program'};             unless ($program)             { $program = 'blastp';};
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align = 1;};
    my($matrix)       = $opt{'matrix'};                unless ($matrix)              { $matrix = 'BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)              { $filter = 'F';};
    my($method)       = $opt{'method'};                  unless ($method)              {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)            { $organism = 'All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)             { $process = 1;};
    my($param)        = $opt{'quiet'};                      unless ($param)               { $param = 'off';};
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)           {$gigablast = 'no';};

    if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
        print {*STDERR} "unknown method for the flag -method\n";
        &HELP();
    }
340
    if ( $treshold<0 || $treshold>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
341
342
343
        print {*STDERR} "\nout of range for the option -treshold \n";
        &HELP();
    }
344
    if ( $cover_tresh<0 || $cover_tresh>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
345
346
347
        print {*STDERR} "\nout of range for the option -cover \n";
        &HELP();
    }
348
    if ( $align<0 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
349
350
351
        print {*STDERR} "\n error with option align\n";
        &HELP();
    }
352
    if ( $gigablast !~ /^yes$|^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
353
354
355
        print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
        exit 1;
    }
356
    if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
357
358
359
        print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
        exit 1;
    }
360
    if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
Sebastien Moretti's avatar
Sebastien Moretti committed
361
362
363
        print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
        exit 1;
    }
364
    if ( $outfil eq '' && $method=~ /^profile$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
365
366
367
        $outfil = 'default_profile.template';
    }

368
    if ( $param!~ /^on$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
369
370
371
372
373
374
375
        print {*STDERR} "valid values for -quiet is on or off\n";
        exit 1;
    }
    my ($orgn, @all_orgn) = &ORGN($organism);
    return ($program,$database,$blast_dir,$query_file,
            $outfil,$treshold,$cover_tresh,$evalue_tresh,
            $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
376
377
378
}

#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
379
380
sub RECOVER {
    my ($pdb_result, $aln_length, $length_query) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
381
    my $nb_gap=0;
Sebastien Moretti's avatar
Sebastien Moretti committed
382
383
384
385
386

    if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
        $pdb_result = $1;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
387
    $length_query  =~ s/,//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
388
    my ($requete)  =  join('', ($pdb_result=~/^Query(.*)\n/gm) );
Sebastien Moretti's avatar
Sebastien Moretti committed
389
    $requete       =~ s/[^A-Z-]//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
390
391
392
393
394
395
    my (@sequence) =  split('', $requete);

    for (my $i=0; $i<=$#sequence; $i++){
        if ( $sequence[$i] eq '-'){
            ++$nb_gap;
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
396
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
397
    my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
Sebastien Moretti's avatar
Sebastien Moretti committed
398
    undef(@sequence);
Sebastien Moretti's avatar
Sebastien Moretti committed
399
    return ($recouvrement, $nb_gap);
Sebastien Moretti's avatar
Sebastien Moretti committed
400
401
}
#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
sub WEB_BLAST {
    open (my $SOR1, '>', 'web_tempo.result') or die;
    my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
    my $aln_view;
    my $format = 'Txt';
    my ($description) = $align;
    if ( $method=~/^profile$/i ){
        $aln_view = 'FlatQueryAnchoredNoIdentities';
    }
    else {
        $aln_view = 'Pairwise';
    }

    if ( $filter eq 'F' ){
        $filter = 'off';
    }

    $/ = '>';
    open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
    my (@sequences) = <$FIC>;
    close $FIC;
Sebastien Moretti's avatar
Sebastien Moretti committed
423
424
    shift(@sequences);

Sebastien Moretti's avatar
Sebastien Moretti committed
425
426
427
428
429
430
431
432
433
434
    foreach my $sequence(@sequences){
        $sequence =~ s/>//g;
        $sequence = ">$sequence";

        my ($name) = ($sequence =~ /^>(.+)\n/);
        push(@names, $name);
        my ($encoded_query) = uri_escape($sequence);
        push (@list_encoded, $encoded_query);
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
435
    undef(@sequences);
Sebastien Moretti's avatar
Sebastien Moretti committed
436
437
438
439
440
441
442
    if ( scalar (@names != @list_encoded) ){
        die "error $!";
    }
    foreach my $encoded_seq(@list_encoded){
        my $nb = 0;
        print {*STDERR} "BLAST $names[$i]...";

Sebastien Moretti's avatar
Sebastien Moretti committed
443
#-- BUILD THE REQUEST
Sebastien Moretti's avatar
Sebastien Moretti committed
444
445
446
447
448
449
        my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;

        my ($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
        $req -> content_type('application/x-www-form-urlencoded');
        $req -> content($arguments);

Sebastien Moretti's avatar
Sebastien Moretti committed
450
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
Sebastien Moretti's avatar
Sebastien Moretti committed
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
        my ($response) = $ua -> request($req);

        if ( $response -> content =~ /Server Error/i ){
            die "Server Error at NCBI!!Sorry try later\n";
        }
        $response -> content =~ /^\s{4}RID = (.*)$/m;
        my ($rid) = $1;

        $response -> content =~ /^\s{4}RTOE = (.*)$/m;
        my ($wait)= $1;
        unless ($rid && $wait){
            die "parse error: $!";
        }
        for (my $j=0; $j<=$wait/2; $j++){
            print {*STDERR} '.';
            sleep 2;
        }

        my ($verif) = 0;

        while (){
            for (my $j=0; $j<=5; $j++){
                print  {*STDERR} '.';
                sleep 1;
            }

            $req = new HTTP::Request GET =>
                "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
            $response = $ua->request($req);
            if ( $response->content =~ /Status=WAITING/im ){
                next;
            }
            elsif ( $response->content =~ /Status=FAILED/im ){
                print {*STDERR} "Search $rid failed\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=UNKNOWN/im ){
                print {*STDERR} "Search $rid expired\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=READY/im ){
                if ( $response->content =~ /ThereAreHits=yes/im ){
                    last;
                }
                else {
                    print {*STDERR} "No hits found.\n";
                    $verif = 1;
                    last;
                }
            }
            elsif ( $response->content =~ /can\'t connect/im ){
                print {*STDERR} "\nCan't connect to www.ncbi.nlm.nih.gov:80...new attempt";
                if ( $nb <3 ){
                    ++$nb;
                    next;
                }
                else {
                    print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
                    $verif = 1;
                    last;
                }
            }
            else {
                print {*STDERR} "unknown error\n";
                $verif = 1;
                last;
            }
        }

        if( $verif==1 ){
            ++$i;
            next;
        }

Sebastien Moretti's avatar
Sebastien Moretti committed
527
#-- GET RESULT
Sebastien Moretti's avatar
Sebastien Moretti committed
528
529
530

        while (){
            sleep 3;
531
532
533
534
535
536
537
538
539
540
        $req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
        $response = $ua -> request($req);

        if   ($response->content =~ /Altschul/i) {  print {*STDERR} "Search Complete\n"; push(@list_pdb,$response -> content);last; }
        else { next; }
    }
    print {$SOR1} (@list_pdb);
    ++$i;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
541
    undef (@list_encoded);
542

Sebastien Moretti's avatar
Sebastien Moretti committed
543
    close $SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
544
    return (@list_pdb);
545

Sebastien Moretti's avatar
Sebastien Moretti committed
546
547
548
549
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
550
551
552
553
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
554
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
555
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
556
    }
557
558
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
559
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
560
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
561
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
562
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
563
564
565
566
567
568
569
        unless ( $database eq "nr" || $database eq "pdb" || $database eq "refseq_protein" || $database eq "pdbaa" ){
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
        if ( $database eq 'refseq_protein') { $database = 'refprot';}
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
570
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
571
572
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
Sebastien Moretti's avatar
Sebastien Moretti committed
573
    }
574
575
576
577
578
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
        #BLAST pour Expresso
        open (COM,"$BLASTMAT; $BLASTDB; $blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
    }
    else{
Sebastien Moretti's avatar
Sebastien Moretti committed
579
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
580
    }
581
582
583

    unless ( $quiet=~ /on/ ) { print {*STDERR} "\nrun BLAST..."; }

Sebastien Moretti's avatar
Sebastien Moretti committed
584
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
585

Sebastien Moretti's avatar
Sebastien Moretti committed
586
    open (my $SOR2, '>', 'blast_result.txt') or die;
587

Sebastien Moretti's avatar
Sebastien Moretti committed
588
589
590
591
592
593
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
594
        push (@list_pdb, $_);
Sebastien Moretti's avatar
Sebastien Moretti committed
595
596
        if ( $_=~ /\s*(.+?)\s/ ){
            print {*STDERR} "\n$1 done";
597
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
598
599
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
600
    close $SOR2;
601
    print {*STDERR} "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
602

Sebastien Moretti's avatar
Sebastien Moretti committed
603
    $name_database = $database if ( $name_database =~ m{/} );
604
    unless ($quiet=~ /on/i) {
Sebastien Moretti's avatar
Sebastien Moretti committed
605
606
607
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
608
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
609
    }
610
    shift (@list_pdb);
Sebastien Moretti's avatar
Sebastien Moretti committed
611
612
613
614
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
615
sub PARSING {
Sebastien Moretti's avatar
Sebastien Moretti committed
616
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
617

618
619
620
621
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
622

623
624
625
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
626

627
628
629
630
631
632
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
633

634
635
636
637
638
639
640
641
642
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
643
            open (F3, '<', 'web_tempo.result') or die;
644
645
646
647
648
649
650
651
652
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
653
                print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
654
655
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
656
             Posted date: $poste_d\n\n";
657
658
659
660
661
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
662

663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
745
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
746
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
747
    undef (@list_pdb);
748

749
750
751
752
753
754
755
756
757
758
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
759
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
760
761
762
763
}

#-------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
764
765
sub MULTI_EQUIVALENT {
    my($query, $identity, $recouvrement, $bits, $evalue, $intra_res) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
766
767

    my @result=();
Sebastien Moretti's avatar
Sebastien Moretti committed
768
769
770
771
    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g ){
        my $databank = $1;
        my $last     = $4;
        my $refseq   = $2;
772

Sebastien Moretti's avatar
Sebastien Moretti committed
773
774
775
        if ( $databank eq 'pdb' ){ $refseq .= $last; }
        $refseq =~ s/\.\d+$//;
        push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
Sebastien Moretti's avatar
Sebastien Moretti committed
776
777
778
779
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
sub AFFICHAGE_REFSEQ_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
    my(@result_sort) = @$result_sort, my(@name_gid) = ();
    my@resultats = ();
    my $afficher = '';

    (my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s", 'Sequence Name', 'Accession number', 'Databank', "%Identity", "%Cover", 'BITS', 'Evalue'));

    foreach my $result_sort(@result_sort){
        my($seq_name, $refseq_name, $identiq, $cover, $bits, $evalue, $bank) = split("\t", $result_sort);
        $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
        ($identiq) = split(/%/,$identiq);

        if ($identiq >= $identity_treshold && $cover >= $cover_tresh){
            push (@name_gid, ">$seq_name\@$bank\_\_$refseq_name\n");
            (($afficher) .=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ", $seq_name, $refseq_name, $bank, $identiq, $cover, $bits, $evalue));
            $afficher .= "\n";
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
799
800
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
801
    if ($afficher) { print "\n$entete\n\n"; print $afficher; }
Sebastien Moretti's avatar
Sebastien Moretti committed
802
803


Sebastien Moretti's avatar
Sebastien Moretti committed
804
805
806
807
808
    if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
    if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
    print {*STDOUT} "\n", @name_gid;
    close SOR;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
809
810
811
}
#-------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
812
813
sub AFFICHAGE_PDB_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
814

Sebastien Moretti's avatar
Sebastien Moretti committed
815
    my(@result_sort) = @$result_sort, my @sortie=();
816

Sebastien Moretti's avatar
Sebastien Moretti committed
817
    print {*STDOUT} "\n\n",(my($en_tete) = sprintf("%-40s %-10s %-10s %-12s %-10s", 'Sequence Name', 'PDB_id', 'Evalue', 'Identity(%)', 'Cover(%)')),"\n\n";
818

Sebastien Moretti's avatar
Sebastien Moretti committed
819
820
821
    foreach my $result_sort(@result_sort){
        my($seq_name, $pdb_name, $EValue, $identiq, $cover) = split("\t", $result_sort);
        ($identiq)                                          = split(/%/, $identiq);
822

Sebastien Moretti's avatar
Sebastien Moretti committed
823
824
825
826
827
828
829
        if ( $identiq >= $identity_treshold && $cover >= $cover_tresh ){
            push (@pdb_list, $pdb_name);
            $EValue =~ s/,$//;
            print {*STDOUT} ((my $afficher) = sprintf("%-40s %-10s %-10s %-12s %-10s", $seq_name, $pdb_name, $EValue, $identiq, $cover)),"\n";
            push (@sortie, ">$seq_name _P_ $pdb_name\n");
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
830
831
    }
    undef(@result_sort);
832
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
833
834

#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
835
    if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
836
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
837
    close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
838
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
839
840
}
#-----------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
841
842
843
844
845
sub PROFILE {
    my($list_pdb, $out_file, $distant) = @_;

    my(@list_pdb) = @$list_pdb, my(@sortie)=();
    my %names = ();   my $i = 0;    my($name) = '';
Sebastien Moretti's avatar
Sebastien Moretti committed
846
847

    open (SOR1,">$out_file") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
848
849
850
851
852
853
    foreach my $pdb_result(@list_pdb){
        if ( $pdb_result =~ /No hits found/i ){ next; }
        else{
            ++$i;
            if ( $distant==1 ){($name) = ($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
            else              {($name) = ($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
854

Sebastien Moretti's avatar
Sebastien Moretti committed
855
            my($name1) = ($name=~ /(.+?)\s+$/);
856

Sebastien Moretti's avatar
Sebastien Moretti committed
857
858
859
            open (my $SOR, '>', "tempo_file_profile") or die "can not open tempo_file_profile";
            print {$SOR} "Query= $pdb_result";
            close $SOR;
860

Sebastien Moretti's avatar
Sebastien Moretti committed
861
862
863
            open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
            close COM;
            push (@sortie,">$name1 _R_ ${i}.profile\n");
864

Sebastien Moretti's avatar
Sebastien Moretti committed
865
        }
866
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
867
    unlink("tempo_file_profile");
868
869
    undef(@list_pdb);

870
    print {*STDERR} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
871
#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
872
    if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
873
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
874
    close SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
875
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
876
877
878
}

#--------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
879
880
881
882
883
884
885
886
887
888
889
890
891
892
sub ORGN {
    my($organism) = @_;

    $organism =~ s/_/ /;

    my(%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
        'Bacteria'                =>'1',
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
893
        'Vertebrata'              =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
894
895
896
897
898
899
900
901
902
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
903
        );
Sebastien Moretti's avatar
Sebastien Moretti committed
904

Sebastien Moretti's avatar
Sebastien Moretti committed
905
906
907
908
909
910
911
912
913
    if ( exists $orgs{$organism} ){
        $organism =~ s/ /+/g;
        return ($organism);
    }
    else{
        print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n";
        &HELP();
        return;
    }
914
}
Sebastien Moretti's avatar
Sebastien Moretti committed
915
916
#------------------------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
917
sub LIST_ORGA {
Sebastien Moretti's avatar
Sebastien Moretti committed
918
919
920
921
922
923
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
924
        'Bacteria'                =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
925
926
927
928
929
930
931
932
933
934
935
936
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
937
938
        );

Sebastien Moretti's avatar
Sebastien Moretti committed
939
940
941
942
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
943
944
    return (@cle);
}
945