webblast.pl 36 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
2
3
#!/usr/bin/env perl
#
#
4
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
5
6
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
7
####### method genid, pdbid and profile
Sebastien Moretti's avatar
Sebastien Moretti committed
8
9
10
11
#
#############################################################################################
#use Env qw(HOME);
#use lib "$HOME/.lib_webblast/";
Sebastien Moretti's avatar
Sebastien Moretti committed
12
13
14
use LWP::UserAgent;
use HTML::Parser;                                            # @@@@@@@ #
use HTTP::Request::Common qw(POST);                         # @/^   ^\@ #
Sebastien Moretti's avatar
Sebastien Moretti committed
15
16
17
18
use URI::Escape;                                           # @/ -   - \@ #
use Getopt::Long;                                         ##  \   ^   /  ##
use strict;                                              ##    |  0  |    ##
use warnings;                                           ####### \ _ / #######
19
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
20
21


22
23
24
25
26
27
############################  EXPRESSO PARAM  #################################################
my $database_expresso  = 'pdb';                                   #PDB database name          #
my $blast_dir_expresso = '/mnt/local/bin/blastall';               #blastall executable        #
my $BLASTMAT           = 'export BLASTMAT=/mnt/local/ncbi/data/'; #matrix directory for blast #
my $BLASTDB            = 'export BLASTDB=/db/blastnet/database/'; #PDB_seqres directoty       #
###############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
28

29
my $runblast = 'runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
30

Sebastien Moretti's avatar
Sebastien Moretti committed
31
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(), my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
32
33
my($ua)= LWP::UserAgent->new;

34

Sebastien Moretti's avatar
Sebastien Moretti committed
35
36
37
38
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
39

Sebastien Moretti's avatar
Sebastien Moretti committed
40
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
41

Sebastien Moretti's avatar
Sebastien Moretti committed
42
43
44
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
45

Sebastien Moretti's avatar
Sebastien Moretti committed
46
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
47

48
49
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
50

Sebastien Moretti's avatar
Sebastien Moretti committed
51
52
53
54
55
56
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
57
            print {*STDERR} "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
58
        }
59
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
60
61
62
63
64
65
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
66
            print {*STDERR} "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
67
        }
68
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
69
}
Sebastien Moretti's avatar
Sebastien Moretti committed
70
71
72
73
74
75
76
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
77
            print {*STDERR} "\nRUN ON GIGABLASTER\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
78
79
80
81
82
83
84
85
86
87
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
            print {*STDERR} "\nRUN BLAST AT THE NCBI\n";
        }
    }
}

##- Define parameters through -method flag
88

Sebastien Moretti's avatar
Sebastien Moretti committed
89
90
91
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
92
            print {*STDERR} "\nprovide a valid database name to RUN ON GIGABLASTER: nr, pdb or refseq_protein\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
93
94
95
96
97
98
99
100
101
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
102
}
Sebastien Moretti's avatar
Sebastien Moretti committed
103
104
105
106
107
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
108
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
109
}
Sebastien Moretti's avatar
Sebastien Moretti committed
110
111
112
113
114
115
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
116
}
Sebastien Moretti's avatar
Sebastien Moretti committed
117
118
else {
    die "unknown method\n";
119
120
}

Sebastien Moretti's avatar
Sebastien Moretti committed
121
122
123
124
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
125

Sebastien Moretti's avatar
Sebastien Moretti committed
126
127
128
129
130
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
131
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
132
133
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
134
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
135
136
137
138
139
140
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
141
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
142
143
             Cover threshold :          $cover_tresh
";
144
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
145
146
147
148
149
150
151
152
153
154
155
156
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


    print {*STDERR} "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
157
158
}

159

Sebastien Moretti's avatar
Sebastien Moretti committed
160
161
162
163
164
165
166
167
168
169
170
171
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
    die " Report bug to armougom\@igs.cnrs-mrs.fr\n";
}
Sebastien Moretti's avatar
Sebastien Moretti committed
172
173

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
174
175
176
177
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
178
179
180
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
181
182
183
184
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
185
186
187
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
188
189
190
191
192
193
194
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
195

Sebastien Moretti's avatar
Sebastien Moretti committed
196
exit 0;
197

Sebastien Moretti's avatar
Sebastien Moretti committed
198
199
200
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
217

218
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
219
220
221
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
242
}
Sebastien Moretti's avatar
Sebastien Moretti committed
243
#------------------------------------------------------------------------------------------------------------------------
244
sub HELP {
Sebastien Moretti's avatar
Sebastien Moretti committed
245
246
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
247

248
249
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
250
251

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
252
                                            Default = blastp
253
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
254
255
256
257
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
258
259
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
260
                                            Default BLOSUM62
261
                            -method ....... geneid, pdbid, profile
262
                            -gigablast .... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
263
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/Giga2/~database/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
264
                                            Default no
265
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
266
                                            Default = Off
267
268
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
269
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
270
271
272
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
273
                                            Default = 1
274
275
                            -processor .... Number of processors to use
                                            Default = 1
276
                            -blast_dir .... Indicates where your BLAST directory is installed locally
277
278
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
279
280
281

                     Environement Variables
                     These variables can be set from the environement
282
283
           DATABASE......................[Indicates where your database file must be fetched (locally)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed locally]
284

Sebastien Moretti's avatar
Sebastien Moretti committed
285
";
Sebastien Moretti's avatar
Sebastien Moretti committed
286
287
    exit 1;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
288
289
}
#-----------------------------------------------------------------------------------------
290
sub OPTIONS_GET {
Sebastien Moretti's avatar
Sebastien Moretti committed
291
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
292
293

    GetOptions
Sebastien Moretti's avatar
Sebastien Moretti committed
294
              (
295
296
297
298
299
300
301
302
303
304
305
306
           'infile=s'    =>\$opt{infile},
           'outfile=s'    =>\$opt{outfile},
           'program=s'     =>\$opt{program},
           'database=s'     =>\$opt{database},
           'blast_dir=s'     =>\$opt{blast_dir},
           'identity=f'       =>\$opt{treshold},
           'cover=f'           =>\$opt{cover},
           'evalue=f'           =>\$opt{evalue},
           'hits=i'              =>\$opt{hits},
           'matrix=s'             =>\$opt{matrix},
           'filter=s'              =>\$opt{filter},
           'method=s'               =>\$opt{method},
307
           'organism=s'              =>\$opt{organism},
308
309
310
311
           'processor=i'              =>\$opt{processor},
           'quiet=s'                   =>\$opt{quiet},
           'gigablast=s'                =>\$opt{gigablast},
           );
Sebastien Moretti's avatar
Sebastien Moretti committed
312
313
314
315

    if ( $ARGV[0] ){
        print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
        &HELP();
316
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh)        { $evalue_tresh = 1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh) { $cover_tresh = 30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)          { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)              { $outfil = '';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)    { $treshold = 50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)           { $blast_dir = '';};
    my($database)     = $opt{'database'};           unless ($database)            { $database = '';};
    my($program)      = $opt{'program'};             unless ($program)             { $program = 'blastp';};
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align = 1;};
    my($matrix)       = $opt{'matrix'};                unless ($matrix)              { $matrix = 'BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)              { $filter = 'F';};
    my($method)       = $opt{'method'};                  unless ($method)              {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)            { $organism = 'All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)             { $process = 1;};
    my($param)        = $opt{'quiet'};                      unless ($param)               { $param = 'off';};
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)           {$gigablast = 'no';};

    if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
        print {*STDERR} "unknown method for the flag -method\n";
        &HELP();
    }
339
    if ( $treshold<0 || $treshold>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
340
341
342
        print {*STDERR} "\nout of range for the option -treshold \n";
        &HELP();
    }
343
    if ( $cover_tresh<0 || $cover_tresh>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
344
345
346
        print {*STDERR} "\nout of range for the option -cover \n";
        &HELP();
    }
347
    if ( $align<0 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
348
349
350
        print {*STDERR} "\n error with option align\n";
        &HELP();
    }
351
    if ( $gigablast !~ /^yes$|^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
352
353
354
        print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
        exit 1;
    }
355
    if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
356
357
358
        print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
        exit 1;
    }
359
    if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
Sebastien Moretti's avatar
Sebastien Moretti committed
360
361
362
        print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
        exit 1;
    }
363
    if ( $outfil eq '' && $method=~ /^profile$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
364
365
366
        $outfil = 'default_profile.template';
    }

367
    if ( $param!~ /^on$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
368
369
370
371
372
373
374
        print {*STDERR} "valid values for -quiet is on or off\n";
        exit 1;
    }
    my ($orgn, @all_orgn) = &ORGN($organism);
    return ($program,$database,$blast_dir,$query_file,
            $outfil,$treshold,$cover_tresh,$evalue_tresh,
            $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
375
376
377
}

#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
378
379
sub RECOVER {
    my ($pdb_result, $aln_length, $length_query) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
380
    my $nb_gap=0;
Sebastien Moretti's avatar
Sebastien Moretti committed
381
382
383
384
385

    if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
        $pdb_result = $1;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
386
    $length_query  =~ s/,//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
387
    my ($requete)  =  join('', ($pdb_result=~/^Query(.*)\n/gm) );
Sebastien Moretti's avatar
Sebastien Moretti committed
388
    $requete       =~ s/[^A-Z-]//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
389
390
391
392
393
394
    my (@sequence) =  split('', $requete);

    for (my $i=0; $i<=$#sequence; $i++){
        if ( $sequence[$i] eq '-'){
            ++$nb_gap;
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
395
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
396
    my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
Sebastien Moretti's avatar
Sebastien Moretti committed
397
    undef(@sequence);
Sebastien Moretti's avatar
Sebastien Moretti committed
398
    return ($recouvrement, $nb_gap);
Sebastien Moretti's avatar
Sebastien Moretti committed
399
400
}
#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
sub WEB_BLAST {
    open (my $SOR1, '>', 'web_tempo.result') or die;
    my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
    my $aln_view;
    my $format = 'Txt';
    my ($description) = $align;
    if ( $method=~/^profile$/i ){
        $aln_view = 'FlatQueryAnchoredNoIdentities';
    }
    else {
        $aln_view = 'Pairwise';
    }

    if ( $filter eq 'F' ){
        $filter = 'off';
    }

    $/ = '>';
    open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
    my (@sequences) = <$FIC>;
    close $FIC;
Sebastien Moretti's avatar
Sebastien Moretti committed
422
423
    shift(@sequences);

Sebastien Moretti's avatar
Sebastien Moretti committed
424
425
426
427
428
429
430
431
432
433
    foreach my $sequence(@sequences){
        $sequence =~ s/>//g;
        $sequence = ">$sequence";

        my ($name) = ($sequence =~ /^>(.+)\n/);
        push(@names, $name);
        my ($encoded_query) = uri_escape($sequence);
        push (@list_encoded, $encoded_query);
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
434
    undef(@sequences);
Sebastien Moretti's avatar
Sebastien Moretti committed
435
436
437
438
439
440
441
    if ( scalar (@names != @list_encoded) ){
        die "error $!";
    }
    foreach my $encoded_seq(@list_encoded){
        my $nb = 0;
        print {*STDERR} "BLAST $names[$i]...";

Sebastien Moretti's avatar
Sebastien Moretti committed
442
#-- BUILD THE REQUEST
Sebastien Moretti's avatar
Sebastien Moretti committed
443
444
        my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;

Sebastien Moretti's avatar
Sebastien Moretti committed
445
446
#        my ($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi'; #old URL
        my ($req) = new HTTP::Request POST => 'http://blast.ncbi.nlm.nih.gov/Blast.cgi';
Sebastien Moretti's avatar
Sebastien Moretti committed
447
448
449
        $req -> content_type('application/x-www-form-urlencoded');
        $req -> content($arguments);

Sebastien Moretti's avatar
Sebastien Moretti committed
450
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
Sebastien Moretti's avatar
Sebastien Moretti committed
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
        my ($response) = $ua -> request($req);

        if ( $response -> content =~ /Server Error/i ){
            die "Server Error at NCBI!!Sorry try later\n";
        }
        $response -> content =~ /^\s{4}RID = (.*)$/m;
        my ($rid) = $1;

        $response -> content =~ /^\s{4}RTOE = (.*)$/m;
        my ($wait)= $1;
        unless ($rid && $wait){
            die "parse error: $!";
        }
        for (my $j=0; $j<=$wait/2; $j++){
            print {*STDERR} '.';
            sleep 2;
        }

        my ($verif) = 0;

        while (){
            for (my $j=0; $j<=5; $j++){
                print  {*STDERR} '.';
                sleep 1;
            }

            $req = new HTTP::Request GET =>
Sebastien Moretti's avatar
Sebastien Moretti committed
478
479
                "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
#                "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid"; #old url
Sebastien Moretti's avatar
Sebastien Moretti committed
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
            $response = $ua->request($req);
            if ( $response->content =~ /Status=WAITING/im ){
                next;
            }
            elsif ( $response->content =~ /Status=FAILED/im ){
                print {*STDERR} "Search $rid failed\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=UNKNOWN/im ){
                print {*STDERR} "Search $rid expired\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=READY/im ){
                if ( $response->content =~ /ThereAreHits=yes/im ){
                    last;
                }
                else {
                    print {*STDERR} "No hits found.\n";
                    $verif = 1;
                    last;
                }
            }
            elsif ( $response->content =~ /can\'t connect/im ){
Sebastien Moretti's avatar
Sebastien Moretti committed
505
                print {*STDERR} "\nCan't connect to blast.ncbi.nlm.nih.gov:80...new attempt";
Sebastien Moretti's avatar
Sebastien Moretti committed
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
                if ( $nb <3 ){
                    ++$nb;
                    next;
                }
                else {
                    print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
                    $verif = 1;
                    last;
                }
            }
            else {
                print {*STDERR} "unknown error\n";
                $verif = 1;
                last;
            }
        }

        if( $verif==1 ){
            ++$i;
            next;
        }

Sebastien Moretti's avatar
Sebastien Moretti committed
528
#-- GET RESULT
Sebastien Moretti's avatar
Sebastien Moretti committed
529
530
531

        while (){
            sleep 3;
Sebastien Moretti's avatar
Sebastien Moretti committed
532
533
#        $req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
        $req = new HTTP::Request GET => "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
534
535
536
537
538
539
540
541
542
        $response = $ua -> request($req);

        if   ($response->content =~ /Altschul/i) {  print {*STDERR} "Search Complete\n"; push(@list_pdb,$response -> content);last; }
        else { next; }
    }
    print {$SOR1} (@list_pdb);
    ++$i;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
543
    undef (@list_encoded);
544

Sebastien Moretti's avatar
Sebastien Moretti committed
545
    close $SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
546
    return (@list_pdb);
547

Sebastien Moretti's avatar
Sebastien Moretti committed
548
549
550
551
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
552
553
554
555
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
556
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
557
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
558
    }
559
560
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
561
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
562
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
563
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
564
    {
565
        unless ( $database eq 'nr' || $database eq 'pdb' || $database eq 'refseq_protein' || $database eq 'pdbaa' ){
Sebastien Moretti's avatar
Sebastien Moretti committed
566
567
568
569
570
571
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
        if ( $database eq 'refseq_protein') { $database = 'refprot';}
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
572
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
573
574
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
Sebastien Moretti's avatar
Sebastien Moretti committed
575
    }
576
577
578
579
580
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
        #BLAST pour Expresso
        open (COM,"$BLASTMAT; $BLASTDB; $blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
    }
    else{
Sebastien Moretti's avatar
Sebastien Moretti committed
581
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
582
    }
583
584
585

    unless ( $quiet=~ /on/ ) { print {*STDERR} "\nrun BLAST..."; }

Sebastien Moretti's avatar
Sebastien Moretti committed
586
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
587

Sebastien Moretti's avatar
Sebastien Moretti committed
588
    open (my $SOR2, '>', 'blast_result.txt') or die;
589

Sebastien Moretti's avatar
Sebastien Moretti committed
590
591
592
593
594
595
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
596
        push (@list_pdb, $_);
597
598
599
#        if ( $_=~ /\s*(.+?)\s/ ){
#            print {*STDERR} "\n$1 done";
#        }
Sebastien Moretti's avatar
Sebastien Moretti committed
600
601
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
602
    close $SOR2;
603
#    print {*STDERR} "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
604

Sebastien Moretti's avatar
Sebastien Moretti committed
605
    $name_database = $database if ( $name_database =~ m{/} );
606
    unless ($quiet=~ /on/i) {
Sebastien Moretti's avatar
Sebastien Moretti committed
607
608
609
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
610
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
611
    }
612
    shift (@list_pdb);
Sebastien Moretti's avatar
Sebastien Moretti committed
613
614
615
616
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
617
sub PARSING {
Sebastien Moretti's avatar
Sebastien Moretti committed
618
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
619

620
621
622
623
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
624

625
626
627
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
628

629
630
631
632
633
634
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
635

636
637
638
639
640
641
642
643
644
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
645
            open (F3, '<', 'web_tempo.result') or die;
646
647
648
649
650
651
652
653
654
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
655
                print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
656
657
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
658
             Posted date: $poste_d\n\n";
659
660
661
662
663
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
664

665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
747
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
748
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
749
    undef (@list_pdb);
750

751
752
753
754
755
756
757
758
759
760
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
761
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
762
763
764
765
}

#-------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
766
767
sub MULTI_EQUIVALENT {
    my($query, $identity, $recouvrement, $bits, $evalue, $intra_res) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
768
769

    my @result=();
Sebastien Moretti's avatar
Sebastien Moretti committed
770
771
772
773
    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g ){
        my $databank = $1;
        my $last     = $4;
        my $refseq   = $2;
774

Sebastien Moretti's avatar
Sebastien Moretti committed
775
776
777
        if ( $databank eq 'pdb' ){ $refseq .= $last; }
        $refseq =~ s/\.\d+$//;
        push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
Sebastien Moretti's avatar
Sebastien Moretti committed
778
779
780
781
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
sub AFFICHAGE_REFSEQ_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
    my(@result_sort) = @$result_sort, my(@name_gid) = ();
    my@resultats = ();
    my $afficher = '';

    (my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s", 'Sequence Name', 'Accession number', 'Databank', "%Identity", "%Cover", 'BITS', 'Evalue'));

    foreach my $result_sort(@result_sort){
        my($seq_name, $refseq_name, $identiq, $cover, $bits, $evalue, $bank) = split("\t", $result_sort);
        $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
        ($identiq) = split(/%/,$identiq);

        if ($identiq >= $identity_treshold && $cover >= $cover_tresh){
            push (@name_gid, ">$seq_name\@$bank\_\_$refseq_name\n");
            (($afficher) .=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ", $seq_name, $refseq_name, $bank, $identiq, $cover, $bits, $evalue));
            $afficher .= "\n";
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
801
802
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
803
    if ($afficher) { print "\n$entete\n\n"; print $afficher; }
Sebastien Moretti's avatar
Sebastien Moretti committed
804
805


Sebastien Moretti's avatar
Sebastien Moretti committed
806
807
808
809
810
    if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
    if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
    print {*STDOUT} "\n", @name_gid;
    close SOR;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
811
812
813
}
#-------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
814
815
sub AFFICHAGE_PDB_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
816

Sebastien Moretti's avatar
Sebastien Moretti committed
817
    my(@result_sort) = @$result_sort, my @sortie=();
818

Sebastien Moretti's avatar
Sebastien Moretti committed
819
    print {*STDOUT} "\n\n",(my($en_tete) = sprintf("%-40s %-10s %-10s %-12s %-10s", 'Sequence Name', 'PDB_id', 'Evalue', 'Identity(%)', 'Cover(%)')),"\n\n";
820

Sebastien Moretti's avatar
Sebastien Moretti committed
821
822
823
    foreach my $result_sort(@result_sort){
        my($seq_name, $pdb_name, $EValue, $identiq, $cover) = split("\t", $result_sort);
        ($identiq)                                          = split(/%/, $identiq);
824

Sebastien Moretti's avatar
Sebastien Moretti committed
825
826
827
828
829
830
831
        if ( $identiq >= $identity_treshold && $cover >= $cover_tresh ){
            push (@pdb_list, $pdb_name);
            $EValue =~ s/,$//;
            print {*STDOUT} ((my $afficher) = sprintf("%-40s %-10s %-10s %-12s %-10s", $seq_name, $pdb_name, $EValue, $identiq, $cover)),"\n";
            push (@sortie, ">$seq_name _P_ $pdb_name\n");
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
832
833
    }
    undef(@result_sort);
834
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
835
836

#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
837
    if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
838
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
839
    close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
840
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
841
842
}
#-----------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
843
844
845
846
847
sub PROFILE {
    my($list_pdb, $out_file, $distant) = @_;

    my(@list_pdb) = @$list_pdb, my(@sortie)=();
    my %names = ();   my $i = 0;    my($name) = '';
Sebastien Moretti's avatar
Sebastien Moretti committed
848
849

    open (SOR1,">$out_file") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
850
851
852
853
854
855
    foreach my $pdb_result(@list_pdb){
        if ( $pdb_result =~ /No hits found/i ){ next; }
        else{
            ++$i;
            if ( $distant==1 ){($name) = ($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
            else              {($name) = ($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
856

Sebastien Moretti's avatar
Sebastien Moretti committed
857
            my($name1) = ($name=~ /(.+?)\s+$/);
858

Sebastien Moretti's avatar
Sebastien Moretti committed
859
860
861
            open (my $SOR, '>', "tempo_file_profile") or die "can not open tempo_file_profile";
            print {$SOR} "Query= $pdb_result";
            close $SOR;
862

Sebastien Moretti's avatar
Sebastien Moretti committed
863
864
865
            open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
            close COM;
            push (@sortie,">$name1 _R_ ${i}.profile\n");
866

Sebastien Moretti's avatar
Sebastien Moretti committed
867
        }
868
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
869
    unlink("tempo_file_profile");
870
871
    undef(@list_pdb);

872
    print {*STDERR} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
873
#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
874
    if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
875
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
876
    close SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
877
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
878
879
880
}

#--------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
881
882
883
884
885
886
887
888
889
890
891
892
893
894
sub ORGN {
    my($organism) = @_;

    $organism =~ s/_/ /;

    my(%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
        'Bacteria'                =>'1',
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
895
        'Vertebrata'              =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
896
897
898
899
900
901
902
903
904
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
905
        );
Sebastien Moretti's avatar
Sebastien Moretti committed
906

Sebastien Moretti's avatar
Sebastien Moretti committed
907
908
909
910
911
912
913
914
915
    if ( exists $orgs{$organism} ){
        $organism =~ s/ /+/g;
        return ($organism);
    }
    else{
        print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n";
        &HELP();
        return;
    }
916
}
Sebastien Moretti's avatar
Sebastien Moretti committed
917
918
#------------------------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
919
sub LIST_ORGA {
Sebastien Moretti's avatar
Sebastien Moretti committed
920
921
922
923
924
925
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
926
        'Bacteria'                =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
927
928
929
930
931
932
933
934
935
936
937
938
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
939
940
        );

Sebastien Moretti's avatar
Sebastien Moretti committed
941
942
943
944
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
945
946
    return (@cle);
}
947