webblast.pl 34 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
2
3
#!/usr/bin/env perl
#
#
4
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
5
6
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
7
####### method genid, pdbid and profile
Sebastien Moretti's avatar
Sebastien Moretti committed
8
9
10
11
#
#############################################################################################
#use Env qw(HOME);
#use lib "$HOME/.lib_webblast/";
Sebastien Moretti's avatar
Sebastien Moretti committed
12
13
14
use LWP::UserAgent;
use HTML::Parser;                                            # @@@@@@@ #
use HTTP::Request::Common qw(POST);                         # @/^   ^\@ #
Sebastien Moretti's avatar
Sebastien Moretti committed
15
16
17
18
use URI::Escape;                                           # @/ -   - \@ #
use Getopt::Long;                                         ##  \   ^   /  ##
use strict;                                              ##    |  0  |    ##
use warnings;                                           ####### \ _ / #######
19
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
20
21


22
23
24
25
############################  EXPRESSO PARAM  ##########################################################
my $database_expresso  = 'pdb';                                            #PDB database name          #
my $blast_dir_expresso = '/mnt/local/bin/blastall';                        #blastall executable        #
my $BLASTMAT           = 'export BLASTMAT=/mnt/local/ncbi/data/';          #matrix directory for blast #
Sebastien Moretti's avatar
Sebastien Moretti committed
26
my $BLASTDB            = 'export BLASTDB=/scratch/frt/blastnet/database/'; #PDB_seqres directoty       #
27
########################################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
28

29
my $runblast = '/mnt/local/bin/runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
30

31
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(),my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
32
33
my($ua)= LWP::UserAgent->new;

34

Sebastien Moretti's avatar
Sebastien Moretti committed
35
36
37
38
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
39

Sebastien Moretti's avatar
Sebastien Moretti committed
40
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
41

Sebastien Moretti's avatar
Sebastien Moretti committed
42
43
44
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
45

Sebastien Moretti's avatar
Sebastien Moretti committed
46
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
47

48
49
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
50

Sebastien Moretti's avatar
Sebastien Moretti committed
51
52
53
54
55
56
57
58
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST LOCALY\n";
        }
59
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
60
61
62
63
64
65
66
67
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST LOCALY\n";
        }
68
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
69
}
Sebastien Moretti's avatar
Sebastien Moretti committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN GIGABLASTER\n";
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST AT THE NCBI\n";
        }
    }
}

##- Define parameters through -method flag
88

Sebastien Moretti's avatar
Sebastien Moretti committed
89
90
91
92
93
94
95
96
97
98
99
100
101
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
            print {*STDERR} "\nprovide a valid database name to RUN GIGABLASTER: nr, pdb or refseq_protein\n";
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
102
}
Sebastien Moretti's avatar
Sebastien Moretti committed
103
104
105
106
107
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
108
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
109
}
Sebastien Moretti's avatar
Sebastien Moretti committed
110
111
112
113
114
115
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
116
}
Sebastien Moretti's avatar
Sebastien Moretti committed
117
118
else {
    die "unknown method\n";
119
120
}

Sebastien Moretti's avatar
Sebastien Moretti committed
121
122
123
124
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
125

Sebastien Moretti's avatar
Sebastien Moretti committed
126
127
128
129
130
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
131
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
132
133
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
134
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
135
136
137
138
139
140
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
141
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
142
143
             Cover threshold :          $cover_tresh
";
144
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
145
146
147
148
149
150
151
152
153
154
155
156
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


    print {*STDERR} "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
157
158
}

159

Sebastien Moretti's avatar
Sebastien Moretti committed
160
161
162
163
164
165
166
167
168
169
170
171
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
    die " Report bug to armougom\@igs.cnrs-mrs.fr\n";
}
Sebastien Moretti's avatar
Sebastien Moretti committed
172
173

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
174
175
176
177
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
178
179
180
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
181
182
183
184
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
185
186
187
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
188
189
190
191
192
193
194
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
195

Sebastien Moretti's avatar
Sebastien Moretti committed
196
exit 0;
197
                   
Sebastien Moretti's avatar
Sebastien Moretti committed
198
199
200
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
217

218
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
219
220
221
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
242
243
244
245
} 
#------------------------------------------------------------------------------------------------------------------------
sub HELP
{   
Sebastien Moretti's avatar
Sebastien Moretti committed
246
247
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
248

249
250
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
251
252

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
253
                                            Default = blastp
254
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
255
256
257
258
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
259
260
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
261
                                            Default BLOSUM62
262
263
                            -method ....... geneid, pdbid, profile
                            -gigablast..... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
264
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/Giga2/~database/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
265
                                            Default no
266
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
267
                                            Default = Off
268
269
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
270
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
271
272
273
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
274
                                            Default = 1
275
276
277
278
279
                            -processor .... Number of processors to use
                                            Default = 1
                            -blast_dir .... Indicates where your BLAST directory is installed localy
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
280
281
282
283
284

                     Environement Variables
                     These variables can be set from the environement
           DATABASE......................[Indicates where your database file must be fetched (localy)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed localy]
285

Sebastien Moretti's avatar
Sebastien Moretti committed
286
287
288
289
290
291
";
	
    exit;
    
}
#-----------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
292
293
sub OPTIONS_GET {   
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
294
295
296
 
    GetOptions 
              (
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
	       'infile=s'    =>\$opt{infile},
	       'outfile=s'    =>\$opt{outfile},
	       'program=s'     =>\$opt{program},
	       'database=s'     =>\$opt{database},
	       'blast_dir=s'     =>\$opt{blast_dir},
	       'identity=f'       =>\$opt{treshold},
	       'cover=f'           =>\$opt{cover},
	       'evalue=f'           =>\$opt{evalue},     
	       'hits=i'              =>\$opt{hits},	   
	       'matrix=s'             =>\$opt{matrix},
	       'filter=s'              =>\$opt{filter},
	       'method=s'               =>\$opt{method},
           'organism=s'              =>\$opt{organism},
	       'processor=i'              =>\$opt{processor},
	       'quiet=s'                   =>\$opt{quiet},
	       'gigablast=s'                =>\$opt{gigablast},
Sebastien Moretti's avatar
Sebastien Moretti committed
313
314
315
316
317
318
	       );
  
    if ($ARGV[0]) {print "Unprocessed by Getopt::Long\n $ARGV[0]\n"; &HELP();} 
   
  

319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh) { $evalue_tresh=1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh)  { $cover_tresh=30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)   { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)       { $outfil='';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)     { $treshold=50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)    { $blast_dir='';};
    my($database)     = $opt{'database'};           unless ($database)     { $database='';};
    my($program)      = $opt{'program'};             unless ($program)      { $program='blastp';};  
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align=1;};  
    my($matrix)       = $opt{'matrix'};                unless ($matrix)      { $matrix='BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)      { $filter='F';};
    my($method)       = $opt{'method'};                  unless ($method)      {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)    { $organism='All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)     { $process=1;};   
    my($param)        = $opt{'quiet'};                      unless ($param)       { $param='off';}; 
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)    {$gigablast='no';}; 
    if ($method !~ /(^geneid$|^pdbid$|^profile$)/i)  { print {*STDERR} "unknown method for the flag -method\n";&HELP(); }

	if ($treshold <0 || $treshold >100)               { print {*STDERR} "\nout of range for the option -treshold \n"; &HELP();}  
	if ($cover_tresh <0 || $cover_tresh >100)         { print {*STDERR} "\nout of range for the option -cover \n"; &HELP();} 
	if ($align <0)                                    { print {*STDERR} "\n error with option   align\n"; &HELP();}
	if ($gigablast!~/^yes$|^no$/i)                    { print {*STDERR} "invalid argument for gigaglast option : yes/no\n";exit;};
	if ($filter!~ /^[TFRLMCV]{1}$|^off$/i)                  {print {*STDERR}  "valid values for -filter are T,F,R,L,M,C,or V!\n";exit;}
	if ($matrix!~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/) { print {*STDERR} "valid values for -matrix  are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";exit }
	if ($outfil eq "" && $method=~ /^profile$/i) { $outfil='default_profile.template'}
	
	if ($param!~ /^on$|^off$/i)                       { print {*STDERR} "valid values for -quiet is on or off\n";exit;}
	my($orgn,@all_orgn)= &ORGN($organism);
	return ($program,$database,$blast_dir,$query_file,
		    $outfil,$treshold,$cover_tresh,$evalue_tresh,
		    $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
}

#--------------------------------------------------------------------------------------------------
sub RECOVER
{
    my($pdb_result,$aln_length,$length_query)=@_;
    my $nb_gap=0;
    
    if ($pdb_result=~ /(score.+?\n\n\n).+?score/ism) #cas ou plusieurs HSP, prend que le 1er
    { $pdb_result= $1;}
    
    $length_query  =~ s/,//g;
    my ($requete)  =  join('',($pdb_result=~/^Query(.*)\n/gm));
    $requete       =~ s/[^A-Z-]//g;
    my(@sequence)  =  split('',$requete);
 
    for (my $i=0; $i<=$#sequence; $i++)
    {
	if($sequence[$i] eq "-"){ ++$nb_gap; }
    }
    my($recouvrement)= sprintf("%-3d",(($aln_length-$nb_gap)/$length_query)*100);
    undef(@sequence);
    return ($recouvrement,$nb_gap); 
   
}
#--------------------------------------------------------------------------------------------------
sub WEB_BLAST
{
378
    open (SOR1, '>', 'web_tempo.result') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
379
    my($query_file,$Eval,$program,$database,$matrix,$method,$align,$orgn,$filter)=@_;
380
    my $aln_view, my $format='Txt';
Sebastien Moretti's avatar
Sebastien Moretti committed
381
    my($description)=$align;
382
    if ($method=~/^profile$/i) { $aln_view ='FlatQueryAnchoredNoIdentities'} else { $aln_view ='Pairwise'}
Sebastien Moretti's avatar
Sebastien Moretti committed
383
   
384
    if ($filter eq 'F') { $filter='off';}
Sebastien Moretti's avatar
Sebastien Moretti committed
385

386
    $/='>';
Sebastien Moretti's avatar
Sebastien Moretti committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
    open(FIC,$query_file) or die "can not open $query_file $!\n";
    my(@sequences)=<FIC>;
    close FIC;
    shift(@sequences);

    foreach my $sequence(@sequences)	
    {
	$sequence=~ s/>//g;
	$sequence=">$sequence";
	
	my($name)=($sequence=~ /^>(.+)\n/);
	push(@names, $name); 
	my($encoded_query)= uri_escape($sequence);
	push (@list_encoded, $encoded_query);    
    } 
    
    undef(@sequences);
    if (scalar (@names != @list_encoded)) { die "error $!";}     
    foreach my $encoded_seq(@list_encoded)
    {    
	my $nb=0;
408
	print {*STDERR} "BLAST $names[$i]...";
Sebastien Moretti's avatar
Sebastien Moretti committed
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
	
#-- BUILD THE REQUEST
		
	my($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;
	
	my($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
	$req -> content_type('application/x-www-form-urlencoded');
	$req -> content($arguments);
	
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
	my($response) = $ua -> request($req);
	
	if ($response -> content =~ /Server Error/i) { die "Server Error at NCBI!!Sorry try later\n"; }
	$response -> content =~ /^\s{4}RID = (.*)$/m;   my($rid) = $1;
	$response -> content =~ /^\s{4}RTOE = (.*)$/m;	my($wait)= $1;
	unless ($rid && $wait)             { die "parse error: $!" };
425
	for (my $j=0; $j<=$wait/2; $j++)   {    print {*STDERR} ".";	sleep 2;   }
Sebastien Moretti's avatar
Sebastien Moretti committed
426
427
428
429
430
	
	my($verif)=0;
	
	while ()
	{  		
431
		for (my $j=0; $j<=5; $j++)  { print  {*STDERR} ".";  sleep 1; }
Sebastien Moretti's avatar
Sebastien Moretti committed
432
433
434
435
436
		
		$req = new HTTP::Request GET =>
		    "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";   
		$response = $ua->request($req);	   
		if    ($response->content =~ /Status=WAITING/im) {  next; }	
437
438
		elsif ($response->content =~ /Status=FAILED/im)  { print {*STDERR} "Search $rid failed\n"; $verif=1; last; }	    
		elsif ($response->content =~ /Status=UNKNOWN/im) { print {*STDERR} "Search $rid expired\n"; $verif=1; last; }	    
Sebastien Moretti's avatar
Sebastien Moretti committed
439
440
441
		elsif ($response->content =~ /Status=READY/im) 
		{	       
		    if   ($response->content =~ /ThereAreHits=yes/im){last;}	       
442
		    else { print {*STDERR} "No hits found.\n";$verif=1;last;  }
Sebastien Moretti's avatar
Sebastien Moretti committed
443
444
445
		}
		elsif ($response->content =~ /can\'t connect/im)
		{ 
446
		    print {*STDERR} "\nCan't connect to www.ncbi.nlm.nih.gov:80...new attempt"; 
Sebastien Moretti's avatar
Sebastien Moretti committed
447
		    if ($nb <3) { ++$nb; next; } 
448
		    else { print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n"; $verif=1; last;}
Sebastien Moretti's avatar
Sebastien Moretti committed
449
		}
450
		else { print {*STDERR} "unknown error\n"; $verif=1; last; }
Sebastien Moretti's avatar
Sebastien Moretti committed
451
452
453
454
455
456
457
458
459
460
461
462
	    } 
	
	if($verif==1){ ++$i; next; }
	
#-- GET RESULT
	
	while ()
	{
	    sleep 3;
	    $req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
		$response = $ua -> request($req);
	    
463
	    if   ($response->content =~ /Altschul/i) {  print {*STDERR} "Search Complete\n"; push(@list_pdb,$response -> content);last; }
Sebastien Moretti's avatar
Sebastien Moretti committed
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
	    else { next; }
	}
	print SOR1 (@list_pdb);
	++$i;
    }
    
    undef (@list_encoded);
    
    close SOR1;
    return (@list_pdb);
    
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
479
480
481
482
483
484
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){ 
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
485
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
486
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i ) 
Sebastien Moretti's avatar
Sebastien Moretti committed
487
    { 
Sebastien Moretti's avatar
Sebastien Moretti committed
488
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
489
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
490
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
491
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
492
493
494
495
496
497
498
        unless ( $database eq "nr" || $database eq "pdb" || $database eq "refseq_protein" || $database eq "pdbaa" ){
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
        if ( $database eq 'refseq_protein') { $database = 'refprot';}
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
499
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
500
501
502
503
504
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
    } 
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){	
    	#BLAST pour Expresso
Sebastien Moretti's avatar
Sebastien Moretti committed
505
        open (COM,"$BLASTMAT; $BLASTDB; $blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;       
Sebastien Moretti's avatar
Sebastien Moretti committed
506
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
507
508
    else{  
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
509
    }
510
    
Sebastien Moretti's avatar
Sebastien Moretti committed
511
    unless ( $quiet=~ /on/ ) { print {*STDERR} "\nrun BLAST..."; } 
512
    
Sebastien Moretti's avatar
Sebastien Moretti committed
513
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
514

Sebastien Moretti's avatar
Sebastien Moretti committed
515
    open (my $SOR2, '>', 'blast_result.txt') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
516
    
Sebastien Moretti's avatar
Sebastien Moretti committed
517
518
519
520
521
522
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
523
524
525
526
        push (@list_pdb, $_) ; 
        if ( $_=~ /\s*(.+?)\s/ ){
            print {*STDERR} "\n$1 done";
        } 
Sebastien Moretti's avatar
Sebastien Moretti committed
527
528
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
529
    close $SOR2;
530
    print {*STDERR} "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
531

Sebastien Moretti's avatar
Sebastien Moretti committed
532
    $name_database = $database if ( $name_database =~ m{/} );
Sebastien Moretti's avatar
Sebastien Moretti committed
533
    unless ($quiet=~ /on/i) { 
Sebastien Moretti's avatar
Sebastien Moretti committed
534
535
536
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
537
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
538
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
539
540
541
542
543
    shift (@list_pdb);  
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
544
sub PARSING {    
Sebastien Moretti's avatar
Sebastien Moretti committed
545
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
546

547
548
549
550
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
551

552
553
554
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
555

556
557
558
559
560
561
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
562

563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
            open (F3, '<', 'web_tempo.result') or die ;
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
		        print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
583
584
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
585
             Posted date: $poste_d\n\n";
586
587
588
589
590
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
591

592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
674
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
675
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
676
    undef (@list_pdb);
677

678
679
680
681
682
683
684
685
686
687
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
688
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
}

#-------------------------------------------------------------------------------------------------------------------

sub MULTI_EQUIVALENT
{
    my($query,$identity,$recouvrement,$bits,$evalue,$intra_res)=@_;

    my @result=();
    while  ($intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g) 
    { 
	my $databank =$1;
	my $last     =$4;
	my $refseq   =$2;
	
704
	if ($databank eq 'pdb') { $refseq.=$last } 
Sebastien Moretti's avatar
Sebastien Moretti committed
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
	$refseq=~ s/\.\d+$//;
	push (@result,"$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
sub AFFICHAGE_REFSEQ_PARSING
{
    my($result_sort,$cover_tresh,$identity_treshold,$out_file)=@_;
    my(@result_sort)=@$result_sort, my(@name_gid)=();my@resultats=();my $afficher="";
   
(my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s","Sequence Name","Accession number","Databank","%Identity","%Cover","BITS","Evalue")); 
    
    foreach my $result_sort(@result_sort)
    {     
720
721
	my($seq_name,$refseq_name,$identiq,$cover,$bits,$evalue,$bank)= split("\t",$result_sort);
    $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
Sebastien Moretti's avatar
Sebastien Moretti committed
722
723
724
725
726
727
728
729
730
731
732
733
734
735
	($identiq)= split(/%/,$identiq);
	
	if ($identiq >= $identity_treshold && $cover >= $cover_tresh)
	{
	    push (@name_gid,">$seq_name\@$bank\_\_$refseq_name\n");
	    (($afficher).=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ",$seq_name,$refseq_name,$bank,$identiq,$cover,$bits,$evalue));
	    $afficher.="\n";
	} 
	else {next;}	
    }

if ($afficher) { print "\n$entete\n\n"; print $afficher; }


736
737
738
739
if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
print {*STDOUT} "\n", @name_gid;
close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
740
741
742
743
744
}
#-------------------------------------------------------------------------------------------------------------

sub AFFICHAGE_PDB_PARSING 
{
745

Sebastien Moretti's avatar
Sebastien Moretti committed
746
747
    my($result_sort,$cover_tresh,$identity_treshold,$out_file)=@_;
    my(@result_sort)=@$result_sort, my @sortie=();
748
749
    
    print {*STDOUT} "\n\n",(my($en_tete)= sprintf("%-40s %-10s %-10s %-12s %-10s","Sequence Name","PDB_id","Evalue","Identity(%)","Cover(%)")),"\n\n"; 
Sebastien Moretti's avatar
Sebastien Moretti committed
750
751
752
753
754
755
756
757
758
    
    foreach my $result_sort(@result_sort)
    {     
	my($seq_name,$pdb_name,$EValue,$identiq,$cover)= split("\t",$result_sort);  	    
	($identiq)= split(/%/,$identiq);
	
	if ($identiq >= $identity_treshold && $cover >= $cover_tresh)
	{
	    push (@pdb_list,$pdb_name);
759
760
        $EValue =~ s/,$//;
	    print {*STDOUT} ((my $afficher)= sprintf("%-40s %-10s %-10s %-12s %-10s",$seq_name,$pdb_name,$EValue,$identiq,$cover)),"\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
761
762
	    push (@sortie,">$seq_name _P_ $pdb_name\n");
	} 
763
	else {next;}		
Sebastien Moretti's avatar
Sebastien Moretti committed
764
765
    }
    undef(@result_sort);
766
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
767
768
769

#-- OUTFILE /STDOUT
    if   ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
770
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
771
772
773
774
775
776
    close SOR;

}
#-----------------------------------------------------------------------------------------------------------------------------------
sub PROFILE
{
777
778
779
    my($list_pdb,$out_file,$distant)=@_;
    my(@list_pdb)=@$list_pdb,  my(@sortie)=();
    my %names=();   my $i=0;    my($name)='';
Sebastien Moretti's avatar
Sebastien Moretti committed
780
781
782

    open (SOR1,">$out_file") or die;
    foreach my $pdb_result(@list_pdb)
783
    {     	
Sebastien Moretti's avatar
Sebastien Moretti committed
784
785
	if ($pdb_result =~ /No hits found/i) { next; }
	else
786
787
788
789
	{
	    ++$i;
	    if ($distant==1) {($name)   =($pdb_result =~ /Query=\s*(.+?)Length/smoi)  or die "\nparse error in distant profile\n";}
	    else             {($name)   =($pdb_result =~ /\s*(.+?)\(.*?letters/ismo)  or die "\nparse error in profile\n";}
Sebastien Moretti's avatar
Sebastien Moretti committed
790
	  
791
792
	    my($name1)= ($name=~ /(.+?)\s+$/);
 
Sebastien Moretti's avatar
Sebastien Moretti committed
793
	    open (SOR,">tempo_file_profile") or die "can not open tempo_file_profile";
794
	    print SOR "Query= $pdb_result";
Sebastien Moretti's avatar
Sebastien Moretti committed
795
796
	    close SOR;	    
	    
797
	    open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");	     
Sebastien Moretti's avatar
Sebastien Moretti committed
798
	    close COM;
799
	    push (@sortie,">$name1 _R_ ${i}.profile\n");
Sebastien Moretti's avatar
Sebastien Moretti committed
800
801
802
803
804
805
     
	} 	
    }    
    unlink("tempo_file_profile");
    undef(@list_pdb); 
  
806
    print {*STDERR} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
807
808
#-- OUTFILE /STDOUT
    if   ($out_file) { open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
809
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
    close SOR1;
         
}

#--------------------------------------------------------------------------------------------------------------------------------
sub ORGN
{   
    my($organism)=@_;
    $organism=~ s/_/ /;

    my(%orgs)= (
		
		'Homo sapiens'         =>'1',
		'Bos taurus'             =>'1',
		'Gallus gallus'         =>'1',
		'Viruses'              =>'1',
		'Bacteria'            =>'1',           
		'Eukaryota'            =>'1',
		'Mammalia'              =>'1',
		'Vertebrata'              =>'1',
		'All organisms'          =>'1',
		'Fungi'                 =>'1',
		'Primates'             =>'1',
		'Archaea'               =>'1',
                'Arabidopsis thaliana'   =>'1',
                'Caenorhabditis elegans'  =>'1',
                'Escherichia coli'        =>'1',
		'Mus musculus'             =>'1',
                'Drosophila melanogaster'   =>'1',
		);

    if (exists $orgs{$organism}) 
    { $organism=~ s/ /+/g; return ($organism); }
843
    else { print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n"; &HELP(); }
Sebastien Moretti's avatar
Sebastien Moretti committed
844
845
846
847
848
849
   
} 
#------------------------------------------------------------------------------------------------------------------------------------

sub LIST_ORGA
{
Sebastien Moretti's avatar
Sebastien Moretti committed
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
        'Bacteria'                =>'1',           
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
869
870
		);
    
Sebastien Moretti's avatar
Sebastien Moretti committed
871
872
873
874
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
875
876
    return (@cle);
}
877