webblast.pl 35.4 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
#!/usr/bin/env perl
2

3
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
4
5
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
6
####### method genid, pdbid and profile
7

Sebastien Moretti's avatar
Sebastien Moretti committed
8
#############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
9
use LWP::UserAgent;
10
11
12
13
use HTTP::Request::Common qw(POST);
use Getopt::Long;
use strict;
use warnings;
14
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
15
16


17
18
19
20
21
22
############################  EXPRESSO PARAM  #################################################
my $database_expresso  = 'pdb';                                   #PDB database name          #
my $blast_dir_expresso = '/mnt/local/bin/blastall';               #blastall executable        #
my $BLASTMAT           = 'export BLASTMAT=/mnt/local/ncbi/data/'; #matrix directory for blast #
my $BLASTDB            = 'export BLASTDB=/db/blastnet/database/'; #PDB_seqres directoty       #
###############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
23

24
my $runblast = 'runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
25

Sebastien Moretti's avatar
Sebastien Moretti committed
26
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(), my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
27
28
my($ua)= LWP::UserAgent->new;

29

Sebastien Moretti's avatar
Sebastien Moretti committed
30
31
32
33
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
34

Sebastien Moretti's avatar
Sebastien Moretti committed
35
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
36

Sebastien Moretti's avatar
Sebastien Moretti committed
37
38
39
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
40

Sebastien Moretti's avatar
Sebastien Moretti committed
41
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
42

43
44
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
45

Sebastien Moretti's avatar
Sebastien Moretti committed
46
47
48
49
50
51
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
52
            print "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
53
        }
54
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
55
56
57
58
59
60
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
61
            print "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
62
        }
63
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
64
}
Sebastien Moretti's avatar
Sebastien Moretti committed
65
66
67
68
69
70
71
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
72
            print "\nRUN ON GIGABLASTER\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
73
74
75
76
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
77
            print "\nRUN BLAST AT THE NCBI\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
78
79
80
81
82
        }
    }
}

##- Define parameters through -method flag
83

Sebastien Moretti's avatar
Sebastien Moretti committed
84
85
86
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
87
            print {*STDERR} "\nprovide a valid database name to RUN ON GIGABLASTER: nr, pdb or refseq_protein\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
88
89
90
91
92
93
94
95
96
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
97
}
Sebastien Moretti's avatar
Sebastien Moretti committed
98
99
100
101
102
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
103
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
104
}
Sebastien Moretti's avatar
Sebastien Moretti committed
105
106
107
108
109
110
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
111
}
Sebastien Moretti's avatar
Sebastien Moretti committed
112
113
else {
    die "unknown method\n";
114
115
}

Sebastien Moretti's avatar
Sebastien Moretti committed
116
117
118
119
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
120

Sebastien Moretti's avatar
Sebastien Moretti committed
121
122
123
124
125
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
126
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
127
128
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
129
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
130
131
132
133
134
135
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
136
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
137
138
             Cover threshold :          $cover_tresh
";
139
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
140
141
142
143
144
145
146
147
148
149
150
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


151
    print "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
152
153
}

154

Sebastien Moretti's avatar
Sebastien Moretti committed
155
156
157
158
159
160
161
162
163
164
165
166
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
    die " Report bug to armougom\@igs.cnrs-mrs.fr\n";
}
Sebastien Moretti's avatar
Sebastien Moretti committed
167
168

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
169
170
171
172
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
173
174
175
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
176
177
178
179
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
180
181
182
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
183
184
185
186
187
188
189
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
190

Sebastien Moretti's avatar
Sebastien Moretti committed
191
exit 0;
192

Sebastien Moretti's avatar
Sebastien Moretti committed
193
194
195
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
212

213
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
214
215
216
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
237
}
Sebastien Moretti's avatar
Sebastien Moretti committed
238
#------------------------------------------------------------------------------------------------------------------------
239
sub HELP {
Sebastien Moretti's avatar
Sebastien Moretti committed
240
241
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
242

243
244
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
245
246

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
247
                                            Default = blastp
248
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
249
250
251
252
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
253
254
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
255
                                            Default BLOSUM62
256
                            -method ....... geneid, pdbid, profile
257
                            -gigablast .... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
258
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/public_g/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
259
                                            Default no
260
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
261
                                            Default = Off
262
263
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
264
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
265
266
267
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
268
                                            Default = 1
269
270
                            -processor .... Number of processors to use
                                            Default = 1
271
                            -blast_dir .... Indicates where your BLAST directory is installed locally
272
273
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
274
275
276

                     Environement Variables
                     These variables can be set from the environement
277
278
           DATABASE......................[Indicates where your database file must be fetched (locally)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed locally]
279

Sebastien Moretti's avatar
Sebastien Moretti committed
280
";
Sebastien Moretti's avatar
Sebastien Moretti committed
281
282
    exit 1;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
283
284
}
#-----------------------------------------------------------------------------------------
285
sub OPTIONS_GET {
Sebastien Moretti's avatar
Sebastien Moretti committed
286
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
287
288

    GetOptions
Sebastien Moretti's avatar
Sebastien Moretti committed
289
              (
290
291
292
293
294
295
296
297
298
299
300
301
           'infile=s'    =>\$opt{infile},
           'outfile=s'    =>\$opt{outfile},
           'program=s'     =>\$opt{program},
           'database=s'     =>\$opt{database},
           'blast_dir=s'     =>\$opt{blast_dir},
           'identity=f'       =>\$opt{treshold},
           'cover=f'           =>\$opt{cover},
           'evalue=f'           =>\$opt{evalue},
           'hits=i'              =>\$opt{hits},
           'matrix=s'             =>\$opt{matrix},
           'filter=s'              =>\$opt{filter},
           'method=s'               =>\$opt{method},
302
           'organism=s'              =>\$opt{organism},
303
304
305
306
           'processor=i'              =>\$opt{processor},
           'quiet=s'                   =>\$opt{quiet},
           'gigablast=s'                =>\$opt{gigablast},
           );
Sebastien Moretti's avatar
Sebastien Moretti committed
307
308
309
310

    if ( $ARGV[0] ){
        print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
        &HELP();
311
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh)        { $evalue_tresh = 1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh) { $cover_tresh = 30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)          { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)              { $outfil = '';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)    { $treshold = 50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)           { $blast_dir = '';};
    my($database)     = $opt{'database'};           unless ($database)            { $database = '';};
    my($program)      = $opt{'program'};             unless ($program)             { $program = 'blastp';};
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align = 1;};
    my($matrix)       = $opt{'matrix'};                unless ($matrix)              { $matrix = 'BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)              { $filter = 'F';};
    my($method)       = $opt{'method'};                  unless ($method)              {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)            { $organism = 'All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)             { $process = 1;};
    my($param)        = $opt{'quiet'};                      unless ($param)               { $param = 'off';};
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)           {$gigablast = 'no';};

    if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
        print {*STDERR} "unknown method for the flag -method\n";
        &HELP();
    }
334
    if ( $treshold<0 || $treshold>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
335
336
337
        print {*STDERR} "\nout of range for the option -treshold \n";
        &HELP();
    }
338
    if ( $cover_tresh<0 || $cover_tresh>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
339
340
341
        print {*STDERR} "\nout of range for the option -cover \n";
        &HELP();
    }
342
    if ( $align<0 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
343
344
345
        print {*STDERR} "\n error with option align\n";
        &HELP();
    }
346
    if ( $gigablast !~ /^yes$|^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
347
348
349
        print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
        exit 1;
    }
350
    if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
351
352
353
        print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
        exit 1;
    }
354
    if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
Sebastien Moretti's avatar
Sebastien Moretti committed
355
356
357
        print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
        exit 1;
    }
358
    if ( $outfil eq '' && $method=~ /^profile$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
359
360
361
        $outfil = 'default_profile.template';
    }

362
    if ( $param!~ /^on$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
363
364
365
366
367
368
369
        print {*STDERR} "valid values for -quiet is on or off\n";
        exit 1;
    }
    my ($orgn, @all_orgn) = &ORGN($organism);
    return ($program,$database,$blast_dir,$query_file,
            $outfil,$treshold,$cover_tresh,$evalue_tresh,
            $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
370
371
372
}

#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
373
374
sub RECOVER {
    my ($pdb_result, $aln_length, $length_query) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
375
    my $nb_gap=0;
Sebastien Moretti's avatar
Sebastien Moretti committed
376
377
378
379
380

    if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
        $pdb_result = $1;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
381
    $length_query  =~ s/,//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
382
    my ($requete)  =  join('', ($pdb_result=~/^Query(.*)\n/gm) );
Sebastien Moretti's avatar
Sebastien Moretti committed
383
    $requete       =~ s/[^A-Z-]//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
384
385
386
387
388
389
    my (@sequence) =  split('', $requete);

    for (my $i=0; $i<=$#sequence; $i++){
        if ( $sequence[$i] eq '-'){
            ++$nb_gap;
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
390
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
391
    my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
Sebastien Moretti's avatar
Sebastien Moretti committed
392
    undef(@sequence);
Sebastien Moretti's avatar
Sebastien Moretti committed
393
    return ($recouvrement, $nb_gap);
Sebastien Moretti's avatar
Sebastien Moretti committed
394
395
}
#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
sub WEB_BLAST {
    open (my $SOR1, '>', 'web_tempo.result') or die;
    my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
    my $aln_view;
    my $format = 'Txt';
    my ($description) = $align;
    if ( $method=~/^profile$/i ){
        $aln_view = 'FlatQueryAnchoredNoIdentities';
    }
    else {
        $aln_view = 'Pairwise';
    }

    if ( $filter eq 'F' ){
        $filter = 'off';
    }

    $/ = '>';
    open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
    my (@sequences) = <$FIC>;
    close $FIC;
Sebastien Moretti's avatar
Sebastien Moretti committed
417
418
    shift(@sequences);

Sebastien Moretti's avatar
Sebastien Moretti committed
419
420
421
422
423
424
425
426
427
428
    foreach my $sequence(@sequences){
        $sequence =~ s/>//g;
        $sequence = ">$sequence";

        my ($name) = ($sequence =~ /^>(.+)\n/);
        push(@names, $name);
        my ($encoded_query) = uri_escape($sequence);
        push (@list_encoded, $encoded_query);
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
429
    undef(@sequences);
Sebastien Moretti's avatar
Sebastien Moretti committed
430
431
432
433
434
    if ( scalar (@names != @list_encoded) ){
        die "error $!";
    }
    foreach my $encoded_seq(@list_encoded){
        my $nb = 0;
435
        print "BLAST $names[$i]...";
Sebastien Moretti's avatar
Sebastien Moretti committed
436

Sebastien Moretti's avatar
Sebastien Moretti committed
437
#-- BUILD THE REQUEST
Sebastien Moretti's avatar
Sebastien Moretti committed
438
439
        my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;

Sebastien Moretti's avatar
Sebastien Moretti committed
440
441
#        my ($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi'; #old URL
        my ($req) = new HTTP::Request POST => 'http://blast.ncbi.nlm.nih.gov/Blast.cgi';
Sebastien Moretti's avatar
Sebastien Moretti committed
442
443
444
        $req -> content_type('application/x-www-form-urlencoded');
        $req -> content($arguments);

Sebastien Moretti's avatar
Sebastien Moretti committed
445
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
Sebastien Moretti's avatar
Sebastien Moretti committed
446
447
448
449
450
451
452
453
454
455
456
457
458
459
        my ($response) = $ua -> request($req);

        if ( $response -> content =~ /Server Error/i ){
            die "Server Error at NCBI!!Sorry try later\n";
        }
        $response -> content =~ /^\s{4}RID = (.*)$/m;
        my ($rid) = $1;

        $response -> content =~ /^\s{4}RTOE = (.*)$/m;
        my ($wait)= $1;
        unless ($rid && $wait){
            die "parse error: $!";
        }
        for (my $j=0; $j<=$wait/2; $j++){
460
            print '.';
Sebastien Moretti's avatar
Sebastien Moretti committed
461
462
463
464
465
466
467
            sleep 2;
        }

        my ($verif) = 0;

        while (){
            for (my $j=0; $j<=5; $j++){
468
                print '.';
Sebastien Moretti's avatar
Sebastien Moretti committed
469
470
471
472
                sleep 1;
            }

            $req = new HTTP::Request GET =>
Sebastien Moretti's avatar
Sebastien Moretti committed
473
474
                "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
#                "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid"; #old url
Sebastien Moretti's avatar
Sebastien Moretti committed
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
            $response = $ua->request($req);
            if ( $response->content =~ /Status=WAITING/im ){
                next;
            }
            elsif ( $response->content =~ /Status=FAILED/im ){
                print {*STDERR} "Search $rid failed\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=UNKNOWN/im ){
                print {*STDERR} "Search $rid expired\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=READY/im ){
                if ( $response->content =~ /ThereAreHits=yes/im ){
                    last;
                }
                else {
494
                    print {*STDERR} "No hits found\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
495
496
497
498
499
                    $verif = 1;
                    last;
                }
            }
            elsif ( $response->content =~ /can\'t connect/im ){
Sebastien Moretti's avatar
Sebastien Moretti committed
500
                print {*STDERR} "\nCan't connect to blast.ncbi.nlm.nih.gov:80...new attempt";
Sebastien Moretti's avatar
Sebastien Moretti committed
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
                if ( $nb <3 ){
                    ++$nb;
                    next;
                }
                else {
                    print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
                    $verif = 1;
                    last;
                }
            }
            else {
                print {*STDERR} "unknown error\n";
                $verif = 1;
                last;
            }
        }

        if( $verif==1 ){
            ++$i;
            next;
        }

Sebastien Moretti's avatar
Sebastien Moretti committed
523
#-- GET RESULT
Sebastien Moretti's avatar
Sebastien Moretti committed
524
525
526

        while (){
            sleep 3;
Sebastien Moretti's avatar
Sebastien Moretti committed
527
528
#        $req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
        $req = new HTTP::Request GET => "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
529
530
        $response = $ua -> request($req);

531
        if   ($response->content =~ /Altschul/i) {  print "Search Complete\n"; push(@list_pdb,$response -> content);last; }
532
533
534
535
536
537
        else { next; }
    }
    print {$SOR1} (@list_pdb);
    ++$i;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
538
    undef (@list_encoded);
539

Sebastien Moretti's avatar
Sebastien Moretti committed
540
    close $SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
541
    return (@list_pdb);
542

Sebastien Moretti's avatar
Sebastien Moretti committed
543
544
545
546
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
547
548
549
550
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
551
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
552
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
553
    }
554
555
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
556
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
557
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
558
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
559
    {
560
        unless ( $database eq 'nr' || $database eq 'pdb' || $database eq 'refseq_protein' || $database eq 'pdbaa' ){
Sebastien Moretti's avatar
Sebastien Moretti committed
561
562
563
564
565
566
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
        if ( $database eq 'refseq_protein') { $database = 'refprot';}
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
567
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
568
569
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
Sebastien Moretti's avatar
Sebastien Moretti committed
570
    }
571
572
573
574
575
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
        #BLAST pour Expresso
        open (COM,"$BLASTMAT; $BLASTDB; $blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
    }
    else{
Sebastien Moretti's avatar
Sebastien Moretti committed
576
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
577
    }
578

579
    unless ( $quiet=~ /on/ ) { print "\nrun BLAST..."; }
580

Sebastien Moretti's avatar
Sebastien Moretti committed
581
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
582

Sebastien Moretti's avatar
Sebastien Moretti committed
583
    open (my $SOR2, '>', 'blast_result.txt') or die;
584

Sebastien Moretti's avatar
Sebastien Moretti committed
585
586
587
588
589
590
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
591
        push (@list_pdb, $_);
592
#        if ( $_=~ /\s*(.+?)\s/ ){
593
#            print "\n$1 done";
594
#        }
Sebastien Moretti's avatar
Sebastien Moretti committed
595
596
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
597
    close $SOR2;
598
#    print "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
599

Sebastien Moretti's avatar
Sebastien Moretti committed
600
    $name_database = $database if ( $name_database =~ m{/} );
601
    unless ($quiet=~ /on/i) {
Sebastien Moretti's avatar
Sebastien Moretti committed
602
603
604
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
605
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
606
    }
607
    shift (@list_pdb);
Sebastien Moretti's avatar
Sebastien Moretti committed
608
609
610
611
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
612
sub PARSING {
Sebastien Moretti's avatar
Sebastien Moretti committed
613
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
614

615
616
617
618
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
619

620
621
622
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
623

624
625
626
627
628
629
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
630

631
632
633
634
635
636
637
638
639
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
640
            open (F3, '<', 'web_tempo.result') or die;
641
642
643
644
645
646
647
648
649
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
650
                print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
651
652
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
653
             Posted date: $poste_d\n\n";
654
655
656
657
658
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
659

660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
742
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
743
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
744
    undef (@list_pdb);
745

746
747
748
749
750
751
752
753
754
755
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
756
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
757
758
759
760
}

#-------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
761
762
sub MULTI_EQUIVALENT {
    my($query, $identity, $recouvrement, $bits, $evalue, $intra_res) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
763
764

    my @result=();
Sebastien Moretti's avatar
Sebastien Moretti committed
765
766
767
768
    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g ){
        my $databank = $1;
        my $last     = $4;
        my $refseq   = $2;
769

Sebastien Moretti's avatar
Sebastien Moretti committed
770
771
772
        if ( $databank eq 'pdb' ){ $refseq .= $last; }
        $refseq =~ s/\.\d+$//;
        push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
Sebastien Moretti's avatar
Sebastien Moretti committed
773
774
775
776
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
sub AFFICHAGE_REFSEQ_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
    my(@result_sort) = @$result_sort, my(@name_gid) = ();
    my@resultats = ();
    my $afficher = '';

    (my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s", 'Sequence Name', 'Accession number', 'Databank', "%Identity", "%Cover", 'BITS', 'Evalue'));

    foreach my $result_sort(@result_sort){
        my($seq_name, $refseq_name, $identiq, $cover, $bits, $evalue, $bank) = split("\t", $result_sort);
        $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
        ($identiq) = split(/%/,$identiq);

        if ($identiq >= $identity_treshold && $cover >= $cover_tresh){
            push (@name_gid, ">$seq_name\@$bank\_\_$refseq_name\n");
            (($afficher) .=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ", $seq_name, $refseq_name, $bank, $identiq, $cover, $bits, $evalue));
            $afficher .= "\n";
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
796
797
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
798
    if ($afficher) { print "\n$entete\n\n"; print $afficher; }
Sebastien Moretti's avatar
Sebastien Moretti committed
799
800


Sebastien Moretti's avatar
Sebastien Moretti committed
801
802
803
804
805
    if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
    if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
    print {*STDOUT} "\n", @name_gid;
    close SOR;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
806
807
808
}
#-------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
809
810
sub AFFICHAGE_PDB_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
811

Sebastien Moretti's avatar
Sebastien Moretti committed
812
    my(@result_sort) = @$result_sort, my @sortie=();
813

Sebastien Moretti's avatar
Sebastien Moretti committed
814
    print {*STDOUT} "\n\n",(my($en_tete) = sprintf("%-40s %-10s %-10s %-12s %-10s", 'Sequence Name', 'PDB_id', 'Evalue', 'Identity(%)', 'Cover(%)')),"\n\n";
815

Sebastien Moretti's avatar
Sebastien Moretti committed
816
817
818
    foreach my $result_sort(@result_sort){
        my($seq_name, $pdb_name, $EValue, $identiq, $cover) = split("\t", $result_sort);
        ($identiq)                                          = split(/%/, $identiq);
819

Sebastien Moretti's avatar
Sebastien Moretti committed
820
821
822
823
824
825
826
        if ( $identiq >= $identity_treshold && $cover >= $cover_tresh ){
            push (@pdb_list, $pdb_name);
            $EValue =~ s/,$//;
            print {*STDOUT} ((my $afficher) = sprintf("%-40s %-10s %-10s %-12s %-10s", $seq_name, $pdb_name, $EValue, $identiq, $cover)),"\n";
            push (@sortie, ">$seq_name _P_ $pdb_name\n");
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
827
828
    }
    undef(@result_sort);
829
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
830
831

#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
832
    if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
833
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
834
    close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
835
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
836
837
}
#-----------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
838
839
840
841
842
sub PROFILE {
    my($list_pdb, $out_file, $distant) = @_;

    my(@list_pdb) = @$list_pdb, my(@sortie)=();
    my %names = ();   my $i = 0;    my($name) = '';
Sebastien Moretti's avatar
Sebastien Moretti committed
843
844

    open (SOR1,">$out_file") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
845
846
847
848
849
850
    foreach my $pdb_result(@list_pdb){
        if ( $pdb_result =~ /No hits found/i ){ next; }
        else{
            ++$i;
            if ( $distant==1 ){($name) = ($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
            else              {($name) = ($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
851

Sebastien Moretti's avatar
Sebastien Moretti committed
852
            my($name1) = ($name=~ /(.+?)\s+$/);
853

Sebastien Moretti's avatar
Sebastien Moretti committed
854
855
856
            open (my $SOR, '>', "tempo_file_profile") or die "can not open tempo_file_profile";
            print {$SOR} "Query= $pdb_result";
            close $SOR;
857

Sebastien Moretti's avatar
Sebastien Moretti committed
858
859
860
            open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
            close COM;
            push (@sortie,">$name1 _R_ ${i}.profile\n");
861

Sebastien Moretti's avatar
Sebastien Moretti committed
862
        }
863
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
864
    unlink("tempo_file_profile");
865
866
    undef(@list_pdb);

867
    print "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
868
#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
869
    if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
870
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
871
    close SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
872
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
873
874
875
}

#--------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
876
877
878
879
880
881
882
883
884
885
886
887
888
889
sub ORGN {
    my($organism) = @_;

    $organism =~ s/_/ /;

    my(%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
        'Bacteria'                =>'1',
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
890
        'Vertebrata'              =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
891
892
893
894
895
896
897
898
899
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
900
        );
Sebastien Moretti's avatar
Sebastien Moretti committed
901

Sebastien Moretti's avatar
Sebastien Moretti committed
902
903
904
905
906
907
908
909
910
    if ( exists $orgs{$organism} ){
        $organism =~ s/ /+/g;
        return ($organism);
    }
    else{
        print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n";
        &HELP();
        return;
    }
911
}
Sebastien Moretti's avatar
Sebastien Moretti committed
912
913
#------------------------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
914
sub LIST_ORGA {
Sebastien Moretti's avatar
Sebastien Moretti committed
915
916
917
918
919
920
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
921
        'Bacteria'                =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
922
923
924
925
926
927
928
929
930
931
932
933
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
934
935
        );

Sebastien Moretti's avatar
Sebastien Moretti committed
936
937
938
939
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
940
941
    return (@cle);
}
942