webblast.pl 34.8 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
#!/usr/bin/env perl
2

3
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
4
5
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
6
####### method genid, pdbid and profile
7

Sebastien Moretti's avatar
Sebastien Moretti committed
8
#############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
9
use LWP::UserAgent;
10
11
use HTTP::Request::Common qw(POST);
use Getopt::Long;
Sebastien Moretti's avatar
Sebastien Moretti committed
12
use URI::Escape;
13
14
use strict;
use warnings;
15
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
16
17


18
19
############################  EXPRESSO PARAM  #################################################
my $database_expresso  = 'pdb';                                   #PDB database name          #
20
my $blast_dir_expresso = 'blastall';                              #blastall executable        #
21
###############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
22

23
my $runblast = 'runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
24

Sebastien Moretti's avatar
Sebastien Moretti committed
25
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(), my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
26
27
my($ua)= LWP::UserAgent->new;

28

Sebastien Moretti's avatar
Sebastien Moretti committed
29
30
31
32
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
33

Sebastien Moretti's avatar
Sebastien Moretti committed
34
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
35

Sebastien Moretti's avatar
Sebastien Moretti committed
36
37
38
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
39

Sebastien Moretti's avatar
Sebastien Moretti committed
40
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
41

42
43
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
44

Sebastien Moretti's avatar
Sebastien Moretti committed
45
46
47
48
49
50
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
51
            print "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
52
        }
53
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
54
55
56
57
58
59
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
60
            print "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
61
        }
62
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
63
}
Sebastien Moretti's avatar
Sebastien Moretti committed
64
65
66
67
68
69
70
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
71
            print "\nRUN ON GIGABLASTER\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
72
73
74
75
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
76
            print "\nRUN BLAST AT THE NCBI\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
77
78
79
80
81
        }
    }
}

##- Define parameters through -method flag
82

Sebastien Moretti's avatar
Sebastien Moretti committed
83
84
85
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
86
            print {*STDERR} "\nprovide a valid database name to RUN ON GIGABLASTER: nr, pdb or refseq_protein\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
87
88
89
90
91
92
93
94
95
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
96
}
Sebastien Moretti's avatar
Sebastien Moretti committed
97
98
99
100
101
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
102
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
103
}
Sebastien Moretti's avatar
Sebastien Moretti committed
104
105
106
107
108
109
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
110
}
Sebastien Moretti's avatar
Sebastien Moretti committed
111
112
else {
    die "unknown method\n";
113
114
}

Sebastien Moretti's avatar
Sebastien Moretti committed
115
116
117
118
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
119

Sebastien Moretti's avatar
Sebastien Moretti committed
120
121
122
123
124
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
125
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
126
127
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
128
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
129
130
131
132
133
134
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
135
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
136
137
             Cover threshold :          $cover_tresh
";
138
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
139
140
141
142
143
144
145
146
147
148
149
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


150
    print "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
151
152
}

153

Sebastien Moretti's avatar
Sebastien Moretti committed
154
155
156
157
158
159
160
161
162
163
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
164
    die " Report bug to poirot\@igs.cnrs-mrs.fr\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
165
}
Sebastien Moretti's avatar
Sebastien Moretti committed
166
167

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
168
169
170
171
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
172
173
174
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
175
176
177
178
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
179
180
181
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
182
183
184
185
186
187
188
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
189

Sebastien Moretti's avatar
Sebastien Moretti committed
190
exit 0;
191

Sebastien Moretti's avatar
Sebastien Moretti committed
192
193
194
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
211

212
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
213
214
215
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
236
}
Sebastien Moretti's avatar
Sebastien Moretti committed
237
#------------------------------------------------------------------------------------------------------------------------
238
sub HELP {
Sebastien Moretti's avatar
Sebastien Moretti committed
239
240
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
241

242
243
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
244
245

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
246
                                            Default = blastp
247
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
248
249
250
251
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
252
253
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
254
                                            Default BLOSUM62
255
                            -method ....... geneid, pdbid, profile
256
                            -gigablast .... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
257
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/public_g/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
258
                                            Default no
259
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
260
                                            Default = Off
261
262
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
263
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
264
265
266
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
267
                                            Default = 1
268
269
                            -processor .... Number of processors to use
                                            Default = 1
270
                            -blast_dir .... Indicates where your BLAST directory is installed locally
271
272
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
273

274
275
                     Environment Variables
                     These variables can be set from the environment
276
277
           DATABASE......................[Indicates where your database file must be fetched (locally)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed locally]
278

Sebastien Moretti's avatar
Sebastien Moretti committed
279
";
Sebastien Moretti's avatar
Sebastien Moretti committed
280
281
    exit 1;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
282
283
}
#-----------------------------------------------------------------------------------------
284
sub OPTIONS_GET {
Sebastien Moretti's avatar
Sebastien Moretti committed
285
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
286
287

    GetOptions
Sebastien Moretti's avatar
Sebastien Moretti committed
288
              (
289
290
291
292
293
294
295
296
297
298
299
300
           'infile=s'    =>\$opt{infile},
           'outfile=s'    =>\$opt{outfile},
           'program=s'     =>\$opt{program},
           'database=s'     =>\$opt{database},
           'blast_dir=s'     =>\$opt{blast_dir},
           'identity=f'       =>\$opt{treshold},
           'cover=f'           =>\$opt{cover},
           'evalue=f'           =>\$opt{evalue},
           'hits=i'              =>\$opt{hits},
           'matrix=s'             =>\$opt{matrix},
           'filter=s'              =>\$opt{filter},
           'method=s'               =>\$opt{method},
301
           'organism=s'              =>\$opt{organism},
302
303
304
305
           'processor=i'              =>\$opt{processor},
           'quiet=s'                   =>\$opt{quiet},
           'gigablast=s'                =>\$opt{gigablast},
           );
Sebastien Moretti's avatar
Sebastien Moretti committed
306
307
308
309

    if ( $ARGV[0] ){
        print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
        &HELP();
310
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh)        { $evalue_tresh = 1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh) { $cover_tresh = 30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)          { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)              { $outfil = '';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)    { $treshold = 50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)           { $blast_dir = '';};
    my($database)     = $opt{'database'};           unless ($database)            { $database = '';};
    my($program)      = $opt{'program'};             unless ($program)             { $program = 'blastp';};
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align = 1;};
    my($matrix)       = $opt{'matrix'};                unless ($matrix)              { $matrix = 'BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)              { $filter = 'F';};
    my($method)       = $opt{'method'};                  unless ($method)              {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)            { $organism = 'All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)             { $process = 1;};
    my($param)        = $opt{'quiet'};                      unless ($param)               { $param = 'off';};
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)           {$gigablast = 'no';};

    if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
        print {*STDERR} "unknown method for the flag -method\n";
        &HELP();
    }
333
    if ( $treshold<0 || $treshold>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
334
335
336
        print {*STDERR} "\nout of range for the option -treshold \n";
        &HELP();
    }
337
    if ( $cover_tresh<0 || $cover_tresh>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
338
339
340
        print {*STDERR} "\nout of range for the option -cover \n";
        &HELP();
    }
341
    if ( $align<0 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
342
343
344
        print {*STDERR} "\n error with option align\n";
        &HELP();
    }
345
    if ( $gigablast !~ /^yes$|^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
346
347
348
        print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
        exit 1;
    }
349
    if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
350
351
352
        print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
        exit 1;
    }
353
    if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
Sebastien Moretti's avatar
Sebastien Moretti committed
354
355
356
        print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
        exit 1;
    }
357
    if ( $outfil eq '' && $method=~ /^profile$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
358
359
360
        $outfil = 'default_profile.template';
    }

361
    if ( $param!~ /^on$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
362
363
364
365
366
367
368
        print {*STDERR} "valid values for -quiet is on or off\n";
        exit 1;
    }
    my ($orgn, @all_orgn) = &ORGN($organism);
    return ($program,$database,$blast_dir,$query_file,
            $outfil,$treshold,$cover_tresh,$evalue_tresh,
            $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
369
370
371
}

#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
372
373
sub RECOVER {
    my ($pdb_result, $aln_length, $length_query) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
374
    my $nb_gap=0;
Sebastien Moretti's avatar
Sebastien Moretti committed
375
376
377
378
379

    if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
        $pdb_result = $1;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
380
    $length_query  =~ s/,//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
381
    my ($requete)  =  join('', ($pdb_result=~/^Query(.*)\n/gm) );
Sebastien Moretti's avatar
Sebastien Moretti committed
382
    $requete       =~ s/[^A-Z-]//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
383
384
385
386
387
388
    my (@sequence) =  split('', $requete);

    for (my $i=0; $i<=$#sequence; $i++){
        if ( $sequence[$i] eq '-'){
            ++$nb_gap;
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
389
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
390
    my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
Sebastien Moretti's avatar
Sebastien Moretti committed
391
    undef(@sequence);
Sebastien Moretti's avatar
Sebastien Moretti committed
392
    return ($recouvrement, $nb_gap);
Sebastien Moretti's avatar
Sebastien Moretti committed
393
394
}
#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
sub WEB_BLAST {
    open (my $SOR1, '>', 'web_tempo.result') or die;
    my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
    my $aln_view;
    my $format = 'Txt';
    my ($description) = $align;
    if ( $method=~/^profile$/i ){
        $aln_view = 'FlatQueryAnchoredNoIdentities';
    }
    else {
        $aln_view = 'Pairwise';
    }

    if ( $filter eq 'F' ){
        $filter = 'off';
    }

    $/ = '>';
    open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
    my (@sequences) = <$FIC>;
    close $FIC;
Sebastien Moretti's avatar
Sebastien Moretti committed
416
417
    shift(@sequences);

Sebastien Moretti's avatar
Sebastien Moretti committed
418
419
420
421
422
423
424
425
426
427
    foreach my $sequence(@sequences){
        $sequence =~ s/>//g;
        $sequence = ">$sequence";

        my ($name) = ($sequence =~ /^>(.+)\n/);
        push(@names, $name);
        my ($encoded_query) = uri_escape($sequence);
        push (@list_encoded, $encoded_query);
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
428
    undef(@sequences);
Sebastien Moretti's avatar
Sebastien Moretti committed
429
430
431
432
433
    if ( scalar (@names != @list_encoded) ){
        die "error $!";
    }
    foreach my $encoded_seq(@list_encoded){
        my $nb = 0;
434
        print "BLAST $names[$i]...";
Sebastien Moretti's avatar
Sebastien Moretti committed
435

Sebastien Moretti's avatar
Sebastien Moretti committed
436
#-- BUILD THE REQUEST
Sebastien Moretti's avatar
Sebastien Moretti committed
437
438
        my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;

439
        my ($req) = new HTTP::Request POST => 'https://blast.ncbi.nlm.nih.gov/Blast.cgi';
Sebastien Moretti's avatar
Sebastien Moretti committed
440
441
442
        $req -> content_type('application/x-www-form-urlencoded');
        $req -> content($arguments);

Sebastien Moretti's avatar
Sebastien Moretti committed
443
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
Sebastien Moretti's avatar
Sebastien Moretti committed
444
445
446
447
448
449
450
451
452
453
454
455
456
457
        my ($response) = $ua -> request($req);

        if ( $response -> content =~ /Server Error/i ){
            die "Server Error at NCBI!!Sorry try later\n";
        }
        $response -> content =~ /^\s{4}RID = (.*)$/m;
        my ($rid) = $1;

        $response -> content =~ /^\s{4}RTOE = (.*)$/m;
        my ($wait)= $1;
        unless ($rid && $wait){
            die "parse error: $!";
        }
        for (my $j=0; $j<=$wait/2; $j++){
458
            print '.';
Sebastien Moretti's avatar
Sebastien Moretti committed
459
460
461
462
463
464
465
            sleep 2;
        }

        my ($verif) = 0;

        while (){
            for (my $j=0; $j<=5; $j++){
466
                print '.';
Sebastien Moretti's avatar
Sebastien Moretti committed
467
468
469
470
                sleep 1;
            }

            $req = new HTTP::Request GET =>
471
                "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
Sebastien Moretti's avatar
Sebastien Moretti committed
472
473
474
475
476
477
478
479
480
481
            $response = $ua->request($req);
            if ( $response->content =~ /Status=WAITING/im ){
                next;
            }
            elsif ( $response->content =~ /Status=FAILED/im ){
                print {*STDERR} "Search $rid failed\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=UNKNOWN/im ){
482
                print {*STDERR} "Search  [$rid]  expired\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
483
484
485
486
487
488
489
490
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=READY/im ){
                if ( $response->content =~ /ThereAreHits=yes/im ){
                    last;
                }
                else {
491
                    print {*STDERR} "No hits found\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
492
493
494
495
496
                    $verif = 1;
                    last;
                }
            }
            elsif ( $response->content =~ /can\'t connect/im ){
497
                print {*STDERR} "\nCan't connect to blast.ncbi.nlm.nih.gov... new attempt";
Sebastien Moretti's avatar
Sebastien Moretti committed
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
                if ( $nb <3 ){
                    ++$nb;
                    next;
                }
                else {
                    print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
                    $verif = 1;
                    last;
                }
            }
            else {
                print {*STDERR} "unknown error\n";
                $verif = 1;
                last;
            }
        }

        if( $verif==1 ){
            ++$i;
            next;
        }

Sebastien Moretti's avatar
Sebastien Moretti committed
520
#-- GET RESULT
Sebastien Moretti's avatar
Sebastien Moretti committed
521
522
523

        while (){
            sleep 3;
524
        $req = new HTTP::Request GET => "https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
525
526
        $response = $ua -> request($req);

527
        if   ($response->content =~ /Altschul/i) {  print "Search Complete\n"; push(@list_pdb,$response -> content);last; }
528
529
530
531
532
533
        else { next; }
    }
    print {$SOR1} (@list_pdb);
    ++$i;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
534
    undef (@list_encoded);
535

Sebastien Moretti's avatar
Sebastien Moretti committed
536
    close $SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
537
    return (@list_pdb);
538

Sebastien Moretti's avatar
Sebastien Moretti committed
539
540
541
542
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
543
544
545
546
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
547
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
548
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
549
    }
550
551
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
552
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
553
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
554
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
555
    {
556
        unless ( $database eq 'nr' || $database eq 'pdb' || $database eq 'refseq_protein' || $database eq 'pdbaa' ){
Sebastien Moretti's avatar
Sebastien Moretti committed
557
558
559
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
560
#        if ( $database eq 'refseq_protein') { $database = 'refseq_protein';}
Sebastien Moretti's avatar
Sebastien Moretti committed
561
562
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
563
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
564
565
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
Sebastien Moretti's avatar
Sebastien Moretti committed
566
    }
567
568
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
        #BLAST pour Expresso
Sebastien Moretti's avatar
Sebastien Moretti committed
569
        open (COM,"$blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
570
571
    }
    else{
Sebastien Moretti's avatar
Sebastien Moretti committed
572
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
573
    }
574

575
    unless ( $quiet=~ /on/ ) { print "\nrun BLAST..."; }
576

Sebastien Moretti's avatar
Sebastien Moretti committed
577
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
578

Sebastien Moretti's avatar
Sebastien Moretti committed
579
    open (my $SOR2, '>', 'blast_result.txt') or die;
580

Sebastien Moretti's avatar
Sebastien Moretti committed
581
582
583
584
585
586
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
587
        push (@list_pdb, $_);
588
#        if ( $_=~ /\s*(.+?)\s/ ){
589
#            print "\n$1 done";
590
#        }
Sebastien Moretti's avatar
Sebastien Moretti committed
591
592
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
593
    close $SOR2;
594
#    print "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
595

Sebastien Moretti's avatar
Sebastien Moretti committed
596
    $name_database = $database if ( $name_database =~ m{/} );
597
    unless ($quiet=~ /on/i) {
Sebastien Moretti's avatar
Sebastien Moretti committed
598
599
600
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
601
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
602
    }
603
    shift (@list_pdb);
Sebastien Moretti's avatar
Sebastien Moretti committed
604
605
606
607
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
608
sub PARSING {
Sebastien Moretti's avatar
Sebastien Moretti committed
609
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
610

611
612
613
614
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
615

616
617
618
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
619

620
621
622
623
624
625
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
626

627
628
629
630
631
632
633
634
635
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
636
            open (F3, '<', 'web_tempo.result') or die;
637
638
639
640
641
642
643
644
645
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
646
                print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
647
648
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
649
             Posted date: $poste_d\n\n";
650
651
652
653
654
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
655

656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
738
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
739
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
740
    undef (@list_pdb);
741

742
743
744
745
746
747
748
749
750
751
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
752
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
753
754
755
756
}

#-------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
757
758
sub MULTI_EQUIVALENT {
    my($query, $identity, $recouvrement, $bits, $evalue, $intra_res) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
759
760

    my @result=();
Sebastien Moretti's avatar
Sebastien Moretti committed
761
762
763
764
    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g ){
        my $databank = $1;
        my $last     = $4;
        my $refseq   = $2;
765

Sebastien Moretti's avatar
Sebastien Moretti committed
766
767
768
        if ( $databank eq 'pdb' ){ $refseq .= $last; }
        $refseq =~ s/\.\d+$//;
        push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
Sebastien Moretti's avatar
Sebastien Moretti committed
769
770
771
772
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
sub AFFICHAGE_REFSEQ_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
    my(@result_sort) = @$result_sort, my(@name_gid) = ();
    my@resultats = ();
    my $afficher = '';

    (my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s", 'Sequence Name', 'Accession number', 'Databank', "%Identity", "%Cover", 'BITS', 'Evalue'));

    foreach my $result_sort(@result_sort){
        my($seq_name, $refseq_name, $identiq, $cover, $bits, $evalue, $bank) = split("\t", $result_sort);
        $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
        ($identiq) = split(/%/,$identiq);

        if ($identiq >= $identity_treshold && $cover >= $cover_tresh){
            push (@name_gid, ">$seq_name\@$bank\_\_$refseq_name\n");
            (($afficher) .=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ", $seq_name, $refseq_name, $bank, $identiq, $cover, $bits, $evalue));
            $afficher .= "\n";
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
792
793
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
794
    if ($afficher) { print "\n$entete\n\n"; print $afficher; }
Sebastien Moretti's avatar
Sebastien Moretti committed
795
796


Sebastien Moretti's avatar
Sebastien Moretti committed
797
798
799
800
801
    if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
    if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
    print {*STDOUT} "\n", @name_gid;
    close SOR;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
802
803
804
}
#-------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
805
806
sub AFFICHAGE_PDB_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
807

Sebastien Moretti's avatar
Sebastien Moretti committed
808
    my(@result_sort) = @$result_sort, my @sortie=();
809

Sebastien Moretti's avatar
Sebastien Moretti committed
810
    print {*STDOUT} "\n\n",(my($en_tete) = sprintf("%-40s %-10s %-10s %-12s %-10s", 'Sequence Name', 'PDB_id', 'Evalue', 'Identity(%)', 'Cover(%)')),"\n\n";
811

Sebastien Moretti's avatar
Sebastien Moretti committed
812
813
814
    foreach my $result_sort(@result_sort){
        my($seq_name, $pdb_name, $EValue, $identiq, $cover) = split("\t", $result_sort);
        ($identiq)                                          = split(/%/, $identiq);
815

Sebastien Moretti's avatar
Sebastien Moretti committed
816
817
818
819
820
821
822
        if ( $identiq >= $identity_treshold && $cover >= $cover_tresh ){
            push (@pdb_list, $pdb_name);
            $EValue =~ s/,$//;
            print {*STDOUT} ((my $afficher) = sprintf("%-40s %-10s %-10s %-12s %-10s", $seq_name, $pdb_name, $EValue, $identiq, $cover)),"\n";
            push (@sortie, ">$seq_name _P_ $pdb_name\n");
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
823
824
    }
    undef(@result_sort);
825
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
826
827

#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
828
    if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
829
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
830
    close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
831
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
832
833
}
#-----------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
834
835
836
837
838
sub PROFILE {
    my($list_pdb, $out_file, $distant) = @_;

    my(@list_pdb) = @$list_pdb, my(@sortie)=();
    my %names = ();   my $i = 0;    my($name) = '';
Sebastien Moretti's avatar
Sebastien Moretti committed
839
840

    open (SOR1,">$out_file") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
841
842
843
844
845
846
    foreach my $pdb_result(@list_pdb){
        if ( $pdb_result =~ /No hits found/i ){ next; }
        else{
            ++$i;
            if ( $distant==1 ){($name) = ($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
            else              {($name) = ($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
847

Sebastien Moretti's avatar
Sebastien Moretti committed
848
            my($name1) = ($name=~ /(.+?)\s+$/);
849

Sebastien Moretti's avatar
Sebastien Moretti committed
850
851
852
            open (my $SOR, '>', "tempo_file_profile") or die "can not open tempo_file_profile";
            print {$SOR} "Query= $pdb_result";
            close $SOR;
853

Sebastien Moretti's avatar
Sebastien Moretti committed
854
855
856
            open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
            close COM;
            push (@sortie,">$name1 _R_ ${i}.profile\n");
857

Sebastien Moretti's avatar
Sebastien Moretti committed
858
        }
859
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
860
    unlink("tempo_file_profile");
861
862
    undef(@list_pdb);

863
    print "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
864
#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
865
    if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
866
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
867
    close SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
868
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
869
870
871
}

#--------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
872
873
874
875
876
877
878
879
880
881
882
883
884
885
sub ORGN {
    my($organism) = @_;

    $organism =~ s/_/ /;

    my(%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
        'Bacteria'                =>'1',
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
886
        'Vertebrata'              =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
887
888
889
890
891
892
893
894
895
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
896
        );
Sebastien Moretti's avatar
Sebastien Moretti committed
897

Sebastien Moretti's avatar
Sebastien Moretti committed
898
899
900
901
902
903
904
905
906
    if ( exists $orgs{$organism} ){
        $organism =~ s/ /+/g;
        return ($organism);
    }
    else{
        print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n";
        &HELP();
        return;
    }
907
}
Sebastien Moretti's avatar
Sebastien Moretti committed
908
909
#------------------------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
910
sub LIST_ORGA {
Sebastien Moretti's avatar
Sebastien Moretti committed
911
912
913
914
915
916
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
917
        'Bacteria'                =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
918
919
920
921
922
923
924
925
926
927
928
929
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
930
931
        );

Sebastien Moretti's avatar
Sebastien Moretti committed
932
933
934
935
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
936
937
    return (@cle);
}
938