webblast.pl 35.3 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
2
3
#!/usr/bin/env perl
#
#
4
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
5
6
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
7
####### method genid, pdbid and profile
Sebastien Moretti's avatar
Sebastien Moretti committed
8
9
10
11
#
#############################################################################################
#use Env qw(HOME);
#use lib "$HOME/.lib_webblast/";
Sebastien Moretti's avatar
Sebastien Moretti committed
12
13
14
use LWP::UserAgent;
use HTML::Parser;                                            # @@@@@@@ #
use HTTP::Request::Common qw(POST);                         # @/^   ^\@ #
Sebastien Moretti's avatar
Sebastien Moretti committed
15
16
17
18
use URI::Escape;                                           # @/ -   - \@ #
use Getopt::Long;                                         ##  \   ^   /  ##
use strict;                                              ##    |  0  |    ##
use warnings;                                           ####### \ _ / #######
19
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
20
21


22
23
24
25
############################  EXPRESSO PARAM  ##########################################################
my $database_expresso  = 'pdb';                                            #PDB database name          #
my $blast_dir_expresso = '/mnt/local/bin/blastall';                        #blastall executable        #
my $BLASTMAT           = 'export BLASTMAT=/mnt/local/ncbi/data/';          #matrix directory for blast #
Sebastien Moretti's avatar
Sebastien Moretti committed
26
my $BLASTDB            = 'export BLASTDB=/scratch/frt/blastnet/database/'; #PDB_seqres directoty       #
27
########################################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
28

29
my $runblast = '/mnt/local/bin/runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
30

31
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(),my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
32
33
my($ua)= LWP::UserAgent->new;

34

Sebastien Moretti's avatar
Sebastien Moretti committed
35
36
37
38
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
39

Sebastien Moretti's avatar
Sebastien Moretti committed
40
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
41

Sebastien Moretti's avatar
Sebastien Moretti committed
42
43
44
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
45

Sebastien Moretti's avatar
Sebastien Moretti committed
46
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
47

48
49
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
50

Sebastien Moretti's avatar
Sebastien Moretti committed
51
52
53
54
55
56
57
58
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST LOCALY\n";
        }
59
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
60
61
62
63
64
65
66
67
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST LOCALY\n";
        }
68
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
69
}
Sebastien Moretti's avatar
Sebastien Moretti committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN GIGABLASTER\n";
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST AT THE NCBI\n";
        }
    }
}

##- Define parameters through -method flag
88

Sebastien Moretti's avatar
Sebastien Moretti committed
89
90
91
92
93
94
95
96
97
98
99
100
101
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
            print {*STDERR} "\nprovide a valid database name to RUN GIGABLASTER: nr, pdb or refseq_protein\n";
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
102
}
Sebastien Moretti's avatar
Sebastien Moretti committed
103
104
105
106
107
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
108
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
109
}
Sebastien Moretti's avatar
Sebastien Moretti committed
110
111
112
113
114
115
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
116
}
Sebastien Moretti's avatar
Sebastien Moretti committed
117
118
else {
    die "unknown method\n";
119
120
}

Sebastien Moretti's avatar
Sebastien Moretti committed
121
122
123
124
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
125

Sebastien Moretti's avatar
Sebastien Moretti committed
126
127
128
129
130
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
131
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
132
133
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
134
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
135
136
137
138
139
140
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
141
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
142
143
             Cover threshold :          $cover_tresh
";
144
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
145
146
147
148
149
150
151
152
153
154
155
156
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


    print {*STDERR} "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
157
158
}

159

Sebastien Moretti's avatar
Sebastien Moretti committed
160
161
162
163
164
165
166
167
168
169
170
171
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
    die " Report bug to armougom\@igs.cnrs-mrs.fr\n";
}
Sebastien Moretti's avatar
Sebastien Moretti committed
172
173

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
174
175
176
177
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
178
179
180
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
181
182
183
184
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
185
186
187
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
188
189
190
191
192
193
194
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
195

Sebastien Moretti's avatar
Sebastien Moretti committed
196
exit 0;
197

Sebastien Moretti's avatar
Sebastien Moretti committed
198
199
200
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
217

218
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
219
220
221
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
242
}
Sebastien Moretti's avatar
Sebastien Moretti committed
243
244
#------------------------------------------------------------------------------------------------------------------------
sub HELP
245
{
Sebastien Moretti's avatar
Sebastien Moretti committed
246
247
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
248

249
250
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
251
252

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
253
                                            Default = blastp
254
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
255
256
257
258
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
259
260
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
261
                                            Default BLOSUM62
262
263
                            -method ....... geneid, pdbid, profile
                            -gigablast..... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
264
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/Giga2/~database/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
265
                                            Default no
266
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
267
                                            Default = Off
268
269
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
270
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
271
272
273
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
274
                                            Default = 1
275
276
277
278
279
                            -processor .... Number of processors to use
                                            Default = 1
                            -blast_dir .... Indicates where your BLAST directory is installed localy
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
280
281
282
283
284

                     Environement Variables
                     These variables can be set from the environement
           DATABASE......................[Indicates where your database file must be fetched (localy)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed localy]
285

Sebastien Moretti's avatar
Sebastien Moretti committed
286
";
Sebastien Moretti's avatar
Sebastien Moretti committed
287
288
    exit 1;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
289
290
}
#-----------------------------------------------------------------------------------------
291
sub OPTIONS_GET {
Sebastien Moretti's avatar
Sebastien Moretti committed
292
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
293
294

    GetOptions
Sebastien Moretti's avatar
Sebastien Moretti committed
295
              (
296
297
298
299
300
301
302
303
304
305
306
307
           'infile=s'    =>\$opt{infile},
           'outfile=s'    =>\$opt{outfile},
           'program=s'     =>\$opt{program},
           'database=s'     =>\$opt{database},
           'blast_dir=s'     =>\$opt{blast_dir},
           'identity=f'       =>\$opt{treshold},
           'cover=f'           =>\$opt{cover},
           'evalue=f'           =>\$opt{evalue},
           'hits=i'              =>\$opt{hits},
           'matrix=s'             =>\$opt{matrix},
           'filter=s'              =>\$opt{filter},
           'method=s'               =>\$opt{method},
308
           'organism=s'              =>\$opt{organism},
309
310
311
312
           'processor=i'              =>\$opt{processor},
           'quiet=s'                   =>\$opt{quiet},
           'gigablast=s'                =>\$opt{gigablast},
           );
Sebastien Moretti's avatar
Sebastien Moretti committed
313
314
315
316

    if ( $ARGV[0] ){
        print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
        &HELP();
317
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh)        { $evalue_tresh = 1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh) { $cover_tresh = 30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)          { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)              { $outfil = '';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)    { $treshold = 50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)           { $blast_dir = '';};
    my($database)     = $opt{'database'};           unless ($database)            { $database = '';};
    my($program)      = $opt{'program'};             unless ($program)             { $program = 'blastp';};
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align = 1;};
    my($matrix)       = $opt{'matrix'};                unless ($matrix)              { $matrix = 'BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)              { $filter = 'F';};
    my($method)       = $opt{'method'};                  unless ($method)              {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)            { $organism = 'All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)             { $process = 1;};
    my($param)        = $opt{'quiet'};                      unless ($param)               { $param = 'off';};
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)           {$gigablast = 'no';};

    if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
        print {*STDERR} "unknown method for the flag -method\n";
        &HELP();
    }
340
    if ( $treshold<0 || $treshold>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
341
342
343
        print {*STDERR} "\nout of range for the option -treshold \n";
        &HELP();
    }
344
    if ( $cover_tresh<0 || $cover_tresh>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
345
346
347
        print {*STDERR} "\nout of range for the option -cover \n";
        &HELP();
    }
348
    if ( $align<0 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
349
350
351
        print {*STDERR} "\n error with option align\n";
        &HELP();
    }
352
    if ( $gigablast !~ /^yes$|^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
353
354
355
        print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
        exit 1;
    }
356
    if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
357
358
359
        print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
        exit 1;
    }
360
    if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
Sebastien Moretti's avatar
Sebastien Moretti committed
361
362
363
        print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
        exit 1;
    }
364
    if ( $outfil eq '' && $method=~ /^profile$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
365
366
367
        $outfil = 'default_profile.template';
    }

368
    if ( $param!~ /^on$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
369
370
371
372
373
374
375
        print {*STDERR} "valid values for -quiet is on or off\n";
        exit 1;
    }
    my ($orgn, @all_orgn) = &ORGN($organism);
    return ($program,$database,$blast_dir,$query_file,
            $outfil,$treshold,$cover_tresh,$evalue_tresh,
            $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
376
377
378
}

#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
379
380
sub RECOVER {
    my ($pdb_result, $aln_length, $length_query) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
381
    my $nb_gap=0;
Sebastien Moretti's avatar
Sebastien Moretti committed
382
383
384
385
386

    if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
        $pdb_result = $1;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
387
    $length_query  =~ s/,//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
388
    my ($requete)  =  join('', ($pdb_result=~/^Query(.*)\n/gm) );
Sebastien Moretti's avatar
Sebastien Moretti committed
389
    $requete       =~ s/[^A-Z-]//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
390
391
392
393
394
395
    my (@sequence) =  split('', $requete);

    for (my $i=0; $i<=$#sequence; $i++){
        if ( $sequence[$i] eq '-'){
            ++$nb_gap;
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
396
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
397
    my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
Sebastien Moretti's avatar
Sebastien Moretti committed
398
    undef(@sequence);
Sebastien Moretti's avatar
Sebastien Moretti committed
399
    return ($recouvrement, $nb_gap);
Sebastien Moretti's avatar
Sebastien Moretti committed
400
401
}
#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
sub WEB_BLAST {
    open (my $SOR1, '>', 'web_tempo.result') or die;
    my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
    my $aln_view;
    my $format = 'Txt';
    my ($description) = $align;
    if ( $method=~/^profile$/i ){
        $aln_view = 'FlatQueryAnchoredNoIdentities';
    }
    else {
        $aln_view = 'Pairwise';
    }

    if ( $filter eq 'F' ){
        $filter = 'off';
    }

    $/ = '>';
    open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
    my (@sequences) = <$FIC>;
    close $FIC;
Sebastien Moretti's avatar
Sebastien Moretti committed
423
424
    shift(@sequences);

Sebastien Moretti's avatar
Sebastien Moretti committed
425
426
427
428
429
430
431
432
433
434
    foreach my $sequence(@sequences){
        $sequence =~ s/>//g;
        $sequence = ">$sequence";

        my ($name) = ($sequence =~ /^>(.+)\n/);
        push(@names, $name);
        my ($encoded_query) = uri_escape($sequence);
        push (@list_encoded, $encoded_query);
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
435
    undef(@sequences);
Sebastien Moretti's avatar
Sebastien Moretti committed
436
437
438
439
440
441
442
    if ( scalar (@names != @list_encoded) ){
        die "error $!";
    }
    foreach my $encoded_seq(@list_encoded){
        my $nb = 0;
        print {*STDERR} "BLAST $names[$i]...";

Sebastien Moretti's avatar
Sebastien Moretti committed
443
#-- BUILD THE REQUEST
Sebastien Moretti's avatar
Sebastien Moretti committed
444
445
446
447
448
449
        my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;

        my ($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
        $req -> content_type('application/x-www-form-urlencoded');
        $req -> content($arguments);

Sebastien Moretti's avatar
Sebastien Moretti committed
450
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
Sebastien Moretti's avatar
Sebastien Moretti committed
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
        my ($response) = $ua -> request($req);

        if ( $response -> content =~ /Server Error/i ){
            die "Server Error at NCBI!!Sorry try later\n";
        }
        $response -> content =~ /^\s{4}RID = (.*)$/m;
        my ($rid) = $1;

        $response -> content =~ /^\s{4}RTOE = (.*)$/m;
        my ($wait)= $1;
        unless ($rid && $wait){
            die "parse error: $!";
        }
        for (my $j=0; $j<=$wait/2; $j++){
            print {*STDERR} '.';
            sleep 2;
        }

        my ($verif) = 0;

        while (){
            for (my $j=0; $j<=5; $j++){
                print  {*STDERR} '.';
                sleep 1;
            }

            $req = new HTTP::Request GET =>
                "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
            $response = $ua->request($req);
            if ( $response->content =~ /Status=WAITING/im ){
                next;
            }
            elsif ( $response->content =~ /Status=FAILED/im ){
                print {*STDERR} "Search $rid failed\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=UNKNOWN/im ){
                print {*STDERR} "Search $rid expired\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=READY/im ){
                if ( $response->content =~ /ThereAreHits=yes/im ){
                    last;
                }
                else {
                    print {*STDERR} "No hits found.\n";
                    $verif = 1;
                    last;
                }
            }
            elsif ( $response->content =~ /can\'t connect/im ){
                print {*STDERR} "\nCan't connect to www.ncbi.nlm.nih.gov:80...new attempt";
                if ( $nb <3 ){
                    ++$nb;
                    next;
                }
                else {
                    print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
                    $verif = 1;
                    last;
                }
            }
            else {
                print {*STDERR} "unknown error\n";
                $verif = 1;
                last;
            }
        }

        if( $verif==1 ){
            ++$i;
            next;
        }

Sebastien Moretti's avatar
Sebastien Moretti committed
527
#-- GET RESULT
Sebastien Moretti's avatar
Sebastien Moretti committed
528
529
530

        while (){
            sleep 3;
531
532
533
534
535
536
537
538
539
540
        $req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
        $response = $ua -> request($req);

        if   ($response->content =~ /Altschul/i) {  print {*STDERR} "Search Complete\n"; push(@list_pdb,$response -> content);last; }
        else { next; }
    }
    print {$SOR1} (@list_pdb);
    ++$i;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
541
    undef (@list_encoded);
542

Sebastien Moretti's avatar
Sebastien Moretti committed
543
    close $SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
544
    return (@list_pdb);
545

Sebastien Moretti's avatar
Sebastien Moretti committed
546
547
548
549
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
550
551
552
553
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
554
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
555
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
556
    }
557
558
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
559
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
560
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
561
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
562
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
563
564
565
566
567
568
569
        unless ( $database eq "nr" || $database eq "pdb" || $database eq "refseq_protein" || $database eq "pdbaa" ){
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
        if ( $database eq 'refseq_protein') { $database = 'refprot';}
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
570
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
571
572
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
Sebastien Moretti's avatar
Sebastien Moretti committed
573
    }
574
575
576
577
578
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
        #BLAST pour Expresso
        open (COM,"$BLASTMAT; $BLASTDB; $blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
    }
    else{
Sebastien Moretti's avatar
Sebastien Moretti committed
579
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
580
    }
581
582
583

    unless ( $quiet=~ /on/ ) { print {*STDERR} "\nrun BLAST..."; }

Sebastien Moretti's avatar
Sebastien Moretti committed
584
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
585

Sebastien Moretti's avatar
Sebastien Moretti committed
586
    open (my $SOR2, '>', 'blast_result.txt') or die;
587

Sebastien Moretti's avatar
Sebastien Moretti committed
588
589
590
591
592
593
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
594
        push (@list_pdb, $_);
Sebastien Moretti's avatar
Sebastien Moretti committed
595
596
        if ( $_=~ /\s*(.+?)\s/ ){
            print {*STDERR} "\n$1 done";
597
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
598
599
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
600
    close $SOR2;
601
    print {*STDERR} "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
602

Sebastien Moretti's avatar
Sebastien Moretti committed
603
    $name_database = $database if ( $name_database =~ m{/} );
604
    unless ($quiet=~ /on/i) {
Sebastien Moretti's avatar
Sebastien Moretti committed
605
606
607
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
608
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
609
    }
610
    shift (@list_pdb);
Sebastien Moretti's avatar
Sebastien Moretti committed
611
612
613
614
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
615
sub PARSING {
Sebastien Moretti's avatar
Sebastien Moretti committed
616
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
617

618
619
620
621
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
622

623
624
625
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
626

627
628
629
630
631
632
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
633

634
635
636
637
638
639
640
641
642
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
643
            open (F3, '<', 'web_tempo.result') or die;
644
645
646
647
648
649
650
651
652
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
653
                print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
654
655
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
656
             Posted date: $poste_d\n\n";
657
658
659
660
661
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
662

663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
745
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
746
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
747
    undef (@list_pdb);
748

749
750
751
752
753
754
755
756
757
758
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
759
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
760
761
762
763
764
765
766
767
768
}

#-------------------------------------------------------------------------------------------------------------------

sub MULTI_EQUIVALENT
{
    my($query,$identity,$recouvrement,$bits,$evalue,$intra_res)=@_;

    my @result=();
769
770
771
772
773
774
775
776
777
    while  ($intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g)
    {
    my $databank =$1;
    my $last     =$4;
    my $refseq   =$2;

    if ($databank eq 'pdb') { $refseq.=$last }
    $refseq=~ s/\.\d+$//;
    push (@result,"$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
Sebastien Moretti's avatar
Sebastien Moretti committed
778
779
780
781
782
783
784
785
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
sub AFFICHAGE_REFSEQ_PARSING
{
    my($result_sort,$cover_tresh,$identity_treshold,$out_file)=@_;
    my(@result_sort)=@$result_sort, my(@name_gid)=();my@resultats=();my $afficher="";
786
787
788

(my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s","Sequence Name","Accession number","Databank","%Identity","%Cover","BITS","Evalue"));

Sebastien Moretti's avatar
Sebastien Moretti committed
789
    foreach my $result_sort(@result_sort)
790
791
    {
    my($seq_name,$refseq_name,$identiq,$cover,$bits,$evalue,$bank)= split("\t",$result_sort);
792
    $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
793
794
795
796
797
798
799
800
801
    ($identiq)= split(/%/,$identiq);

    if ($identiq >= $identity_treshold && $cover >= $cover_tresh)
    {
        push (@name_gid,">$seq_name\@$bank\_\_$refseq_name\n");
        (($afficher).=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ",$seq_name,$refseq_name,$bank,$identiq,$cover,$bits,$evalue));
        $afficher.="\n";
    }
    else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
802
803
804
805
806
    }

if ($afficher) { print "\n$entete\n\n"; print $afficher; }


807
808
809
810
if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
print {*STDOUT} "\n", @name_gid;
close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
811
812
813
}
#-------------------------------------------------------------------------------------------------------------

814
sub AFFICHAGE_PDB_PARSING
Sebastien Moretti's avatar
Sebastien Moretti committed
815
{
816

Sebastien Moretti's avatar
Sebastien Moretti committed
817
818
    my($result_sort,$cover_tresh,$identity_treshold,$out_file)=@_;
    my(@result_sort)=@$result_sort, my @sortie=();
819
820
821

    print {*STDOUT} "\n\n",(my($en_tete)= sprintf("%-40s %-10s %-10s %-12s %-10s","Sequence Name","PDB_id","Evalue","Identity(%)","Cover(%)")),"\n\n";

Sebastien Moretti's avatar
Sebastien Moretti committed
822
    foreach my $result_sort(@result_sort)
823
824
825
826
827
828
829
    {
    my($seq_name,$pdb_name,$EValue,$identiq,$cover)= split("\t",$result_sort);
    ($identiq)= split(/%/,$identiq);

    if ($identiq >= $identity_treshold && $cover >= $cover_tresh)
    {
        push (@pdb_list,$pdb_name);
830
        $EValue =~ s/,$//;
831
832
833
834
        print {*STDOUT} ((my $afficher)= sprintf("%-40s %-10s %-10s %-12s %-10s",$seq_name,$pdb_name,$EValue,$identiq,$cover)),"\n";
        push (@sortie,">$seq_name _P_ $pdb_name\n");
    }
    else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
835
836
    }
    undef(@result_sort);
837
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
838
839
840

#-- OUTFILE /STDOUT
    if   ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
841
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
842
843
844
845
846
847
    close SOR;

}
#-----------------------------------------------------------------------------------------------------------------------------------
sub PROFILE
{
848
849
850
    my($list_pdb,$out_file,$distant)=@_;
    my(@list_pdb)=@$list_pdb,  my(@sortie)=();
    my %names=();   my $i=0;    my($name)='';
Sebastien Moretti's avatar
Sebastien Moretti committed
851
852
853

    open (SOR1,">$out_file") or die;
    foreach my $pdb_result(@list_pdb)
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
    {
    if ($pdb_result =~ /No hits found/i) { next; }
    else
    {
        ++$i;
        if ($distant==1) {($name)   =($pdb_result =~ /Query=\s*(.+?)Length/smoi)  or die "\nparse error in distant profile\n";}
        else             {($name)   =($pdb_result =~ /\s*(.+?)\(.*?letters/ismo)  or die "\nparse error in profile\n";}

        my($name1)= ($name=~ /(.+?)\s+$/);

        open (SOR,">tempo_file_profile") or die "can not open tempo_file_profile";
        print SOR "Query= $pdb_result";
        close SOR;

        open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
        close COM;
        push (@sortie,">$name1 _R_ ${i}.profile\n");

    }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
874
    unlink("tempo_file_profile");
875
876
    undef(@list_pdb);

877
    print {*STDERR} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
878
879
#-- OUTFILE /STDOUT
    if   ($out_file) { open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
880
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
881
    close SOR1;
882

Sebastien Moretti's avatar
Sebastien Moretti committed
883
884
885
886
}

#--------------------------------------------------------------------------------------------------------------------------------
sub ORGN
887
{
Sebastien Moretti's avatar
Sebastien Moretti committed
888
889
890
891
    my($organism)=@_;
    $organism=~ s/_/ /;

    my(%orgs)= (
892
893
894
895
896
897
898
899
900
901
902
903
904

        'Homo sapiens'         =>'1',
        'Bos taurus'             =>'1',
        'Gallus gallus'         =>'1',
        'Viruses'              =>'1',
        'Bacteria'            =>'1',
        'Eukaryota'            =>'1',
        'Mammalia'              =>'1',
        'Vertebrata'              =>'1',
        'All organisms'          =>'1',
        'Fungi'                 =>'1',
        'Primates'             =>'1',
        'Archaea'               =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
905
906
907
                'Arabidopsis thaliana'   =>'1',
                'Caenorhabditis elegans'  =>'1',
                'Escherichia coli'        =>'1',
908
        'Mus musculus'             =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
909
                'Drosophila melanogaster'   =>'1',
910
        );
Sebastien Moretti's avatar
Sebastien Moretti committed
911

912
    if (exists $orgs{$organism})
Sebastien Moretti's avatar
Sebastien Moretti committed
913
    { $organism=~ s/ /+/g; return ($organism); }
914
    else { print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n"; &HELP(); }
915
916

}
Sebastien Moretti's avatar
Sebastien Moretti committed
917
918
919
920
#------------------------------------------------------------------------------------------------------------------------------------

sub LIST_ORGA
{
Sebastien Moretti's avatar
Sebastien Moretti committed
921
922
923
924
925
926
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
927
        'Bacteria'                =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
928
929
930
931
932
933
934
935
936
937
938
939
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
940
941
        );

Sebastien Moretti's avatar
Sebastien Moretti committed
942
943
944
945
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
946
947
    return (@cle);
}
948