webblast.pl 35.2 KB
Newer Older
Sebastien Moretti's avatar
Sebastien Moretti committed
1
#!/usr/bin/env perl
2

3
#date : 2007/11/19
Sebastien Moretti's avatar
Sebastien Moretti committed
4
5
#prog : webblast.pl
#subj : make a BLAST/WU-BLAST (by HTTP request or locally) against a database with a file containing sequences in fasta format
Sebastien Moretti's avatar
Sebastien Moretti committed
6
####### method genid, pdbid and profile
7

Sebastien Moretti's avatar
Sebastien Moretti committed
8
#############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
9
use LWP::UserAgent;
10
11
12
13
use HTTP::Request::Common qw(POST);
use Getopt::Long;
use strict;
use warnings;
14
##############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
15
16


17
18
############################  EXPRESSO PARAM  #################################################
my $database_expresso  = 'pdb';                                   #PDB database name          #
19
my $blast_dir_expresso = 'blastall';                              #blastall executable        #
20
###############################################################################################
Sebastien Moretti's avatar
Sebastien Moretti committed
21

22
my $runblast = 'runblast.pl';
Sebastien Moretti's avatar
Sebastien Moretti committed
23

Sebastien Moretti's avatar
Sebastien Moretti committed
24
my(@list_encoded)=(), my(@list_pdb)=(), my(%deja_vu)=(), my(@pdb_list)=(), my($i)=0, my(@names)=(), my $locale=0, my $distant=0, my $database, my $blast_way;
Sebastien Moretti's avatar
Sebastien Moretti committed
25
26
my($ua)= LWP::UserAgent->new;

27

Sebastien Moretti's avatar
Sebastien Moretti committed
28
29
30
31
##-- Environmental Variables

my ($database_var) = $ENV { 'DATABASE' };
my ($blast_var)    = $ENV { 'BLAST_DIRECTORY' };
Sebastien Moretti's avatar
Sebastien Moretti committed
32

Sebastien Moretti's avatar
Sebastien Moretti committed
33
##-- Get BLAST Options/parameters && check user options
Sebastien Moretti's avatar
Sebastien Moretti committed
34

Sebastien Moretti's avatar
Sebastien Moretti committed
35
36
37
my ($program, $database_line, $blast_line, $query_file,
   $out_file, $identity_treshold, $cover_tresh, $Eval,
   $align, $matrix, $filter, $method, $orgn, $process, $quiet, $gigablast) = &OPTIONS_GET();
Sebastien Moretti's avatar
Sebastien Moretti committed
38

Sebastien Moretti's avatar
Sebastien Moretti committed
39
##-- Define local or remote BLAST && Check database/program
Sebastien Moretti's avatar
Sebastien Moretti committed
40

41
42
unless (-e $query_file ) { print {*STDERR} "\nfile does not exist!\n";exit;}
unless (-s $query_file ) { print {*STDERR} "\nyour file is empty!\n";exit; }
Sebastien Moretti's avatar
Sebastien Moretti committed
43

Sebastien Moretti's avatar
Sebastien Moretti committed
44
45
46
47
48
49
if ( ($database_line || $database_var) && ($blast_line || $blast_var) ){
    if ( $database_line=~/expressopdb/ && $blast_line=~/blastexpresso/ ){
        #special mode for configuration file of Expresso server
        $locale   = 1;
        $database = 'expressopdb';
        unless ( $quiet=~ /on/i ){
50
            print "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
51
        }
52
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
53
54
55
56
57
58
    else{
        ($database)    = $database_line || $database_var;
        my ($blast_tp) = $blast_var || $blast_line;
        $locale = 1;
        $blast_way = &CONTROLE_DB_PG($database, $blast_tp, $program);
        unless ( $quiet=~ /on/i ){
59
            print "\nRUN BLAST LOCALLY\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
60
        }
61
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
62
}
Sebastien Moretti's avatar
Sebastien Moretti committed
63
64
65
66
67
68
69
else{
    $database = &NCBI_DATABASE($database_line);
    $distant  = 1;
    if ( $gigablast=~ /^yes$/i ){
        $locale  = 2;
        $distant = 0;
        unless ( $quiet=~ /on/i ){
70
            print "\nRUN ON GIGABLASTER\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
71
72
73
74
        }
    }
    else{
        unless ( $quiet=~ /on/i ){
75
            print "\nRUN BLAST AT THE NCBI\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
76
77
78
79
80
        }
    }
}

##- Define parameters through -method flag
81

Sebastien Moretti's avatar
Sebastien Moretti committed
82
83
84
if ( $method=~ /^pdbid$/i ){
    if ( $gigablast=~ /^yes$/i ){
        if ( $database ne 'pdb' ){
85
            print {*STDERR} "\nprovide a valid database name to RUN ON GIGABLASTER: nr, pdb or refseq_protein\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
86
87
88
89
90
91
92
93
94
            exit 1;
        }
        else {
            $database = 'pdbaa';
        }
    }
    elsif ( $gigablast=~ /^no$/i && $distant==1 ){
        $database = 'pdb';
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
95
}
Sebastien Moretti's avatar
Sebastien Moretti committed
96
97
98
99
100
elsif ( $method=~ /^geneid$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'pdb'){
            $database = 'refseq_protein';
        }
101
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
102
}
Sebastien Moretti's avatar
Sebastien Moretti committed
103
104
105
106
107
108
elsif ( $method=~/^profile$/i ){
    if ( $distant==1 ){
        unless ( $database eq 'nr' || $database eq 'swissprot' || $database eq 'refseq_protein' ){
            $database = 'pdb';
        }
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
109
}
Sebastien Moretti's avatar
Sebastien Moretti committed
110
111
else {
    die "unknown method\n";
112
113
}

Sebastien Moretti's avatar
Sebastien Moretti committed
114
115
116
117
if ( $orgn !~ /All\+organisms/ && $locale=~/1|2/ ){
    print {*STDERR} "-organism option can't be used locally or with -gigablast option!\n";
    exit 1;
}
Sebastien Moretti's avatar
Sebastien Moretti committed
118

Sebastien Moretti's avatar
Sebastien Moretti committed
119
120
121
122
123
##---PRINT option values
unless ( $quiet =~ /on/i ){
    print {*STDERR} "

             Program :  $program
Sebastien Moretti's avatar
Sebastien Moretti committed
124
             Database : $database
Sebastien Moretti's avatar
Sebastien Moretti committed
125
126
             Method :   $method

Sebastien Moretti's avatar
Sebastien Moretti committed
127
             Query_file : $query_file
Sebastien Moretti's avatar
Sebastien Moretti committed
128
129
130
131
132
133
             Out_file :   $out_file
";
    print {*STDOUT} "
             Evalue threshold :         $Eval
             Matrix :                   $matrix
             Filter :                   $filter
Sebastien Moretti's avatar
Sebastien Moretti committed
134
             Blast_identity_threshold : $identity_treshold
Sebastien Moretti's avatar
Sebastien Moretti committed
135
136
             Cover threshold :          $cover_tresh
";
137
    print {*STDERR} "
Sebastien Moretti's avatar
Sebastien Moretti committed
138
139
140
141
142
143
144
145
146
147
148
             Number of hits :            $align
             Number of processors used : $process
";
    if ( $gigablast=~ /^yes$/i ){
        print {*STDERR} "\n             gigablast: yes\n";
    }
    unless ($locale) {
        print {*STDERR} "\n             Organism : $orgn\n";
    }


149
    print "\n***************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
150
151
}

152

Sebastien Moretti's avatar
Sebastien Moretti committed
153
154
155
156
157
158
159
160
161
162
163
164
#-- Local/Remote BLASTP

if ( $locale==1 || $locale==2 ){
    @list_pdb = &LOCAL_BLAST($blast_way, $database, $query_file, $Eval, $align, $method, $matrix, $filter,
                             $process, $gigablast, $database_expresso, $blast_dir_expresso, $runblast);
}
elsif ( $distant==1 ){
    @list_pdb = &WEB_BLAST($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter);
}
else {
    die " Report bug to armougom\@igs.cnrs-mrs.fr\n";
}
Sebastien Moretti's avatar
Sebastien Moretti committed
165
166

#-- PARSE BLAST RESULTS -> MAKE A PDB_ID LIST
Sebastien Moretti's avatar
Sebastien Moretti committed
167
168
169
170
if ( $method =~ /^pdbid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_PDB_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
171
172
173
}

#-- PARSE BLAST RESULT -> MAKE LIST OF REFSEQ ID
Sebastien Moretti's avatar
Sebastien Moretti committed
174
175
176
177
elsif ( $method =~ /^geneid$/i ){
    my (@result_sort) = &PARSING(\@list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast);
    &AFFICHAGE_REFSEQ_PARSING(\@result_sort, $cover_tresh, $identity_treshold, $out_file);
    exit 0;
Sebastien Moretti's avatar
Sebastien Moretti committed
178
179
180
}

#-- PARSE BLAST RESULT -> MAKE PROFILE
Sebastien Moretti's avatar
Sebastien Moretti committed
181
182
183
184
185
186
187
elsif ( $method=~ /^profile$/i ){
    &PROFILE(\@list_pdb, $out_file, $distant);
    exit 0;
}
else {
    die " \nFATAL ERROR : Method or database error\n";
}
188

Sebastien Moretti's avatar
Sebastien Moretti committed
189
exit 0;
190

Sebastien Moretti's avatar
Sebastien Moretti committed
191
192
193
                                               ##############
###############################################  FONCTIONS  ####################################################################
                                              ##############
Sebastien Moretti's avatar
Sebastien Moretti committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
sub CONTROLE_DB_PG {
    my ($database, $blast_dir, $program) = @_;

    if ( !-e "$database" && !-e "$database.pin"){
        die "$database file does not exist\n";
    }
    if ( -d $database ){
        die "$database must be a file, not a directory\n";
    }
    if ( $blast_dir !~ /\/$/ ){
        $blast_dir .= '/';
    }
    my ($blastall) = $blast_dir.'blastall';
    if ( !-e $blastall){
        die "$blastall program not found \n";
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
210

211
    return ($blastall);
Sebastien Moretti's avatar
Sebastien Moretti committed
212
213
214
}

#-------------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
sub NCBI_DATABASE {
    my ($ncbi_db) = @_;

    my (%all_db) =
    (
            'nr'             =>'1',
            'pdb'            =>'1',
            'swissprot'      =>'1',
            'refseq_protein' =>'1',
    );

    if ( exists $all_db{$ncbi_db} ){
        return($ncbi_db);
    }
    elsif ( $ncbi_db eq '' ){
        return ('');
    }
    else{
        return (1);
    }
235
}
Sebastien Moretti's avatar
Sebastien Moretti committed
236
#------------------------------------------------------------------------------------------------------------------------
237
sub HELP {
Sebastien Moretti's avatar
Sebastien Moretti committed
238
239
    my ($org, @orga) = &LIST_ORGA();
    my ($list_orga)  = join(', ', @orga);
Sebastien Moretti's avatar
Sebastien Moretti committed
240

241
242
    print {*STDERR} "
                      usage: $0 -infile <fasta file> -method <pdbid/geneid or profile> options []\n
Sebastien Moretti's avatar
Sebastien Moretti committed
243
244

                            -program ...... Program Name (blastp)
Sebastien Moretti's avatar
Sebastien Moretti committed
245
                                            Default = blastp
246
                            -database ..... Database at NCBI (nr, pdb, swissprot, refseq_protein) or indicate a local fasta file
Sebastien Moretti's avatar
Sebastien Moretti committed
247
248
249
250
                                            Default = pdb at NCBI
                            -infile ....... Query_file = a list of sequences in fasta format
                            -outfile ...... Name the outfile to make a template file for t_coffee
                                            Default = STDOUT or default.profile if method is profile
251
252
                            -evalue ....... Evalue threshold Default = 1
                            -matrix ....... PAM30 PAM70 BLOSUM45 BLOSUM80
Sebastien Moretti's avatar
Sebastien Moretti committed
253
                                            Default BLOSUM62
254
                            -method ....... geneid, pdbid, profile
255
                            -gigablast .... yes/no FASTER REMOTE BLAST with Gigablaster
Sebastien Moretti's avatar
Sebastien Moretti committed
256
                                            (Stephane Audic program: http://www.igs.cnrs-mrs.fr/public_g/remoteblast.cgi)
Sebastien Moretti's avatar
Sebastien Moretti committed
257
                                            Default no
258
                            -filter ....... T or F locally, L or R or M or C or V for distant blast
Sebastien Moretti's avatar
Sebastien Moretti committed
259
                                            Default = Off
260
261
                            -organism ..... $list_orga are available
                                            Default is All_organisms
Sebastien Moretti's avatar
Sebastien Moretti committed
262
                            -identity ..... blast identity threshold = provide a % for view only the results upper or equal to the threshold
263
264
265
                                            Default 50
                            -cover ........ Cover threshold = provide a % : sequence covering Default: 30
                            -hits ........  Number of hits
Sebastien Moretti's avatar
Sebastien Moretti committed
266
                                            Default = 1
267
268
                            -processor .... Number of processors to use
                                            Default = 1
269
                            -blast_dir .... Indicates where your BLAST directory is installed locally
270
271
                            -quiet ........ on : do not display all the default/defined blast parameters
                                            Default off
Sebastien Moretti's avatar
Sebastien Moretti committed
272
273
274

                     Environement Variables
                     These variables can be set from the environement
275
276
           DATABASE......................[Indicates where your database file must be fetched (locally)]
           BLAST_DIRECTORY...............[Indicates where your BLAST directory is installed locally]
277

Sebastien Moretti's avatar
Sebastien Moretti committed
278
";
Sebastien Moretti's avatar
Sebastien Moretti committed
279
280
    exit 1;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
281
282
}
#-----------------------------------------------------------------------------------------
283
sub OPTIONS_GET {
Sebastien Moretti's avatar
Sebastien Moretti committed
284
    my %opt = ();
Sebastien Moretti's avatar
Sebastien Moretti committed
285
286

    GetOptions
Sebastien Moretti's avatar
Sebastien Moretti committed
287
              (
288
289
290
291
292
293
294
295
296
297
298
299
           'infile=s'    =>\$opt{infile},
           'outfile=s'    =>\$opt{outfile},
           'program=s'     =>\$opt{program},
           'database=s'     =>\$opt{database},
           'blast_dir=s'     =>\$opt{blast_dir},
           'identity=f'       =>\$opt{treshold},
           'cover=f'           =>\$opt{cover},
           'evalue=f'           =>\$opt{evalue},
           'hits=i'              =>\$opt{hits},
           'matrix=s'             =>\$opt{matrix},
           'filter=s'              =>\$opt{filter},
           'method=s'               =>\$opt{method},
300
           'organism=s'              =>\$opt{organism},
301
302
303
304
           'processor=i'              =>\$opt{processor},
           'quiet=s'                   =>\$opt{quiet},
           'gigablast=s'                =>\$opt{gigablast},
           );
Sebastien Moretti's avatar
Sebastien Moretti committed
305
306
307
308

    if ( $ARGV[0] ){
        print "Unprocessed by Getopt::Long\n $ARGV[0]\n";
        &HELP();
309
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331

    my($evalue_tresh) = $opt{'evalue'};       unless ($evalue_tresh)        { $evalue_tresh = 1;};
    my($cover_tresh)  = $opt{'cover'};         unless (defined $cover_tresh) { $cover_tresh = 30;};
    my($query_file)   = $opt{'infile'};         unless ($query_file)          { print {*STDERR} "Flag -infile must be defined\n"; &HELP();};
    my($outfil)       = $opt{'outfile'};         unless ($outfil)              { $outfil = '';};
    my($treshold)     = $opt{'treshold'};         unless (defined $treshold)    { $treshold = 50;};
    my($blast_dir)    = $opt{'blast_dir'};         unless ($blast_dir)           { $blast_dir = '';};
    my($database)     = $opt{'database'};           unless ($database)            { $database = '';};
    my($program)      = $opt{'program'};             unless ($program)             { $program = 'blastp';};
    my($align)        = $opt{'hits'};                 unless (defined $align)       { $align = 1;};
    my($matrix)       = $opt{'matrix'};                unless ($matrix)              { $matrix = 'BLOSUM62';};
    my($filter)       = $opt{'filter'};                 unless ($filter)              { $filter = 'F';};
    my($method)       = $opt{'method'};                  unless ($method)              {print {*STDERR} "Flag -method must be defined\n"; &HELP();};
    my($organism)     = $opt{'organism'};                 unless ($organism)            { $organism = 'All organisms';};
    my($process)      = $opt{'processor'};                 unless ($process)             { $process = 1;};
    my($param)        = $opt{'quiet'};                      unless ($param)               { $param = 'off';};
    my($gigablast)    = $opt{'gigablast'};                   unless ($gigablast)           {$gigablast = 'no';};

    if ( $method !~ /(^geneid$|^pdbid$|^profile$)/i ){
        print {*STDERR} "unknown method for the flag -method\n";
        &HELP();
    }
332
    if ( $treshold<0 || $treshold>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
333
334
335
        print {*STDERR} "\nout of range for the option -treshold \n";
        &HELP();
    }
336
    if ( $cover_tresh<0 || $cover_tresh>100 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
337
338
339
        print {*STDERR} "\nout of range for the option -cover \n";
        &HELP();
    }
340
    if ( $align<0 ){
Sebastien Moretti's avatar
Sebastien Moretti committed
341
342
343
        print {*STDERR} "\n error with option align\n";
        &HELP();
    }
344
    if ( $gigablast !~ /^yes$|^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
345
346
347
        print {*STDERR} "invalid argument for gigaglast option: yes/no\n";
        exit 1;
    }
348
    if ( $filter !~ /^[TFRLMCV]{1}$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
349
350
351
        print {*STDERR} "valid values for -filter are T,F,R,L,M,C,or V!\n";
        exit 1;
    }
352
    if ( $matrix !~ /PAM30|PAM70|BLOSUM45|BLOSUM80|BLOSUM62/ ){
Sebastien Moretti's avatar
Sebastien Moretti committed
353
354
355
        print {*STDERR} "valid values for -matrix are PAM30,PAM70,BLOSUM45,BLOSUM80 or BLOSUM62\n";
        exit 1;
    }
356
    if ( $outfil eq '' && $method=~ /^profile$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
357
358
359
        $outfil = 'default_profile.template';
    }

360
    if ( $param!~ /^on$|^off$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
361
362
363
364
365
366
367
        print {*STDERR} "valid values for -quiet is on or off\n";
        exit 1;
    }
    my ($orgn, @all_orgn) = &ORGN($organism);
    return ($program,$database,$blast_dir,$query_file,
            $outfil,$treshold,$cover_tresh,$evalue_tresh,
            $align,$matrix,$filter,$method,$orgn,$process,$param,$gigablast);
Sebastien Moretti's avatar
Sebastien Moretti committed
368
369
370
}

#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
371
372
sub RECOVER {
    my ($pdb_result, $aln_length, $length_query) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
373
    my $nb_gap=0;
Sebastien Moretti's avatar
Sebastien Moretti committed
374
375
376
377
378

    if ( $pdb_result=~ /(score.+?\n\n\n).+?score/ism ){#If several HSP, take the 1st
        $pdb_result = $1;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
379
    $length_query  =~ s/,//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
380
    my ($requete)  =  join('', ($pdb_result=~/^Query(.*)\n/gm) );
Sebastien Moretti's avatar
Sebastien Moretti committed
381
    $requete       =~ s/[^A-Z-]//g;
Sebastien Moretti's avatar
Sebastien Moretti committed
382
383
384
385
386
387
    my (@sequence) =  split('', $requete);

    for (my $i=0; $i<=$#sequence; $i++){
        if ( $sequence[$i] eq '-'){
            ++$nb_gap;
        }
Sebastien Moretti's avatar
Sebastien Moretti committed
388
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
389
    my ($recouvrement) = sprintf("%-3d", (($aln_length-$nb_gap)/$length_query)*100 );
Sebastien Moretti's avatar
Sebastien Moretti committed
390
    undef(@sequence);
Sebastien Moretti's avatar
Sebastien Moretti committed
391
    return ($recouvrement, $nb_gap);
Sebastien Moretti's avatar
Sebastien Moretti committed
392
393
}
#--------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
sub WEB_BLAST {
    open (my $SOR1, '>', 'web_tempo.result') or die;
    my ($query_file, $Eval, $program, $database, $matrix, $method, $align, $orgn, $filter) = @_;
    my $aln_view;
    my $format = 'Txt';
    my ($description) = $align;
    if ( $method=~/^profile$/i ){
        $aln_view = 'FlatQueryAnchoredNoIdentities';
    }
    else {
        $aln_view = 'Pairwise';
    }

    if ( $filter eq 'F' ){
        $filter = 'off';
    }

    $/ = '>';
    open( my $FIC, '<', $query_file) or die "cannot open $query_file $!\n";
    my (@sequences) = <$FIC>;
    close $FIC;
Sebastien Moretti's avatar
Sebastien Moretti committed
415
416
    shift(@sequences);

Sebastien Moretti's avatar
Sebastien Moretti committed
417
418
419
420
421
422
423
424
425
426
    foreach my $sequence(@sequences){
        $sequence =~ s/>//g;
        $sequence = ">$sequence";

        my ($name) = ($sequence =~ /^>(.+)\n/);
        push(@names, $name);
        my ($encoded_query) = uri_escape($sequence);
        push (@list_encoded, $encoded_query);
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
427
    undef(@sequences);
Sebastien Moretti's avatar
Sebastien Moretti committed
428
429
430
431
432
    if ( scalar (@names != @list_encoded) ){
        die "error $!";
    }
    foreach my $encoded_seq(@list_encoded){
        my $nb = 0;
433
        print "BLAST $names[$i]...";
Sebastien Moretti's avatar
Sebastien Moretti committed
434

Sebastien Moretti's avatar
Sebastien Moretti committed
435
#-- BUILD THE REQUEST
Sebastien Moretti's avatar
Sebastien Moretti committed
436
437
        my ($arguments) = "CMD=Put&ENTREZ_QUERY=$orgn&CDD_SEARCH=off&FILTER=$filter&MATRIX_NAME=$matrix&PROGRAM=$program&DATABASE=$database&QUERY=" . $encoded_seq;

Sebastien Moretti's avatar
Sebastien Moretti committed
438
439
#        my ($req) = new HTTP::Request POST => 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi'; #old URL
        my ($req) = new HTTP::Request POST => 'http://blast.ncbi.nlm.nih.gov/Blast.cgi';
Sebastien Moretti's avatar
Sebastien Moretti committed
440
441
442
        $req -> content_type('application/x-www-form-urlencoded');
        $req -> content($arguments);

Sebastien Moretti's avatar
Sebastien Moretti committed
443
#-- GET THE RESPONSE : PARSE OUT THE REQUEST ID and THE ESTIMATED TIME
Sebastien Moretti's avatar
Sebastien Moretti committed
444
445
446
447
448
449
450
451
452
453
454
455
456
457
        my ($response) = $ua -> request($req);

        if ( $response -> content =~ /Server Error/i ){
            die "Server Error at NCBI!!Sorry try later\n";
        }
        $response -> content =~ /^\s{4}RID = (.*)$/m;
        my ($rid) = $1;

        $response -> content =~ /^\s{4}RTOE = (.*)$/m;
        my ($wait)= $1;
        unless ($rid && $wait){
            die "parse error: $!";
        }
        for (my $j=0; $j<=$wait/2; $j++){
458
            print '.';
Sebastien Moretti's avatar
Sebastien Moretti committed
459
460
461
462
463
464
465
            sleep 2;
        }

        my ($verif) = 0;

        while (){
            for (my $j=0; $j<=5; $j++){
466
                print '.';
Sebastien Moretti's avatar
Sebastien Moretti committed
467
468
469
470
                sleep 1;
            }

            $req = new HTTP::Request GET =>
Sebastien Moretti's avatar
Sebastien Moretti committed
471
472
                "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid";
#                "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid"; #old url
Sebastien Moretti's avatar
Sebastien Moretti committed
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
            $response = $ua->request($req);
            if ( $response->content =~ /Status=WAITING/im ){
                next;
            }
            elsif ( $response->content =~ /Status=FAILED/im ){
                print {*STDERR} "Search $rid failed\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=UNKNOWN/im ){
                print {*STDERR} "Search $rid expired\n";
                $verif = 1;
                last;
            }
            elsif ( $response->content =~ /Status=READY/im ){
                if ( $response->content =~ /ThereAreHits=yes/im ){
                    last;
                }
                else {
492
                    print {*STDERR} "No hits found\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
493
494
495
496
497
                    $verif = 1;
                    last;
                }
            }
            elsif ( $response->content =~ /can\'t connect/im ){
Sebastien Moretti's avatar
Sebastien Moretti committed
498
                print {*STDERR} "\nCan't connect to blast.ncbi.nlm.nih.gov:80...new attempt";
Sebastien Moretti's avatar
Sebastien Moretti committed
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
                if ( $nb <3 ){
                    ++$nb;
                    next;
                }
                else {
                    print {*STDERR} "sorry, BLAST $names[$i] failed after 3 attempts!!\n";
                    $verif = 1;
                    last;
                }
            }
            else {
                print {*STDERR} "unknown error\n";
                $verif = 1;
                last;
            }
        }

        if( $verif==1 ){
            ++$i;
            next;
        }

Sebastien Moretti's avatar
Sebastien Moretti committed
521
#-- GET RESULT
Sebastien Moretti's avatar
Sebastien Moretti committed
522
523
524

        while (){
            sleep 3;
Sebastien Moretti's avatar
Sebastien Moretti committed
525
526
#        $req = new HTTP::Request GET => "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
        $req = new HTTP::Request GET => "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&FORMAT_TYPE=$format&FILTER=off&EXPECT=$Eval&ALIGNMENTS=$align&DESCRIPTIONS=$align&ALIGNMENT_VIEW=$aln_view&RID=$rid";
527
528
        $response = $ua -> request($req);

529
        if   ($response->content =~ /Altschul/i) {  print "Search Complete\n"; push(@list_pdb,$response -> content);last; }
530
531
532
533
534
535
        else { next; }
    }
    print {$SOR1} (@list_pdb);
    ++$i;
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
536
    undef (@list_encoded);
537

Sebastien Moretti's avatar
Sebastien Moretti committed
538
    close $SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
539
    return (@list_pdb);
540

Sebastien Moretti's avatar
Sebastien Moretti committed
541
542
543
544
}
#-----------------------------------------------------------------------------------------------------------------------
sub LOCAL_BLAST
{
Sebastien Moretti's avatar
Sebastien Moretti committed
545
546
547
548
    my ($blast_dir, $database, $query_file, $Eval, $align,
        $method, $matrix, $filter, $process, $gigablast,
        $database_expresso, $blast_dir_expresso, $runblast) = @_;
    my $n = 0;
549
    if ( $method=~ /^profile$/i && $gigablast=~ /^no$/i ){
Sebastien Moretti's avatar
Sebastien Moretti committed
550
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -m 6 -M $matrix -v $align -b $align -F $filter -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
551
    }
552
553
    elsif ( $method=~ /^geneid$/i && $gigablast=~ /^no$/i )
    {
Sebastien Moretti's avatar
Sebastien Moretti committed
554
        open (COM,"$blast_dir -p blastp -d $database -i $query_file  -v $align  -b $align -F $filter -M $matrix -e $Eval -a $process|") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
555
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
556
    elsif ( $method=~ /^geneid$|^pdbid$/i && $gigablast=~ /^yes$/i )
Sebastien Moretti's avatar
Sebastien Moretti committed
557
    {
558
        unless ( $database eq 'nr' || $database eq 'pdb' || $database eq 'refseq_protein' || $database eq 'pdbaa' ){
Sebastien Moretti's avatar
Sebastien Moretti committed
559
560
561
562
563
564
            print {*STDERR} "\nsorry invalid database for gigablast\n";exit 1;
        }
        if ( $database eq 'pdb')            { $database = 'pdbaa';}
        if ( $database eq 'refseq_protein') { $database = 'refprot';}
        if ( $database eq '')               { print {*STDERR} "provide a valid database!\n"; exit;}
        open (COM,"$runblast -d $database -p blastp  -e $Eval -v $align -F F \<$query_file |");
565
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
566
567
    elsif ( $method=~ /^profile$/i && $gigablast=~ /^yes$/i){
        print {*STDERR} "\nSorry method profile can't be used with -gigablast option\n";exit;
Sebastien Moretti's avatar
Sebastien Moretti committed
568
    }
569
570
571
572
573
    elsif ( $method=~ /^pdbid$/i && $database=~ /expressopdb/ ){
        #BLAST pour Expresso
        open (COM,"$BLASTMAT; $BLASTDB; $blast_dir_expresso -p blastp -d $database_expresso -i $query_file -F $filter -e $Eval -M $matrix -v $align -b $align |") or die;
    }
    else{
Sebastien Moretti's avatar
Sebastien Moretti committed
574
        open (COM,"$blast_dir -p blastp -d $database -i $query_file -v 1 -b 1 -F $filter -e $Eval -M $matrix -v $align -b $align -a $process |") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
575
    }
576

577
    unless ( $quiet=~ /on/ ) { print "\nrun BLAST..."; }
578

Sebastien Moretti's avatar
Sebastien Moretti committed
579
    my ($name_database, $posted, $version) = ('', '', '');
Sebastien Moretti's avatar
Sebastien Moretti committed
580

Sebastien Moretti's avatar
Sebastien Moretti committed
581
    open (my $SOR2, '>', 'blast_result.txt') or die;
582

Sebastien Moretti's avatar
Sebastien Moretti committed
583
584
585
586
587
588
    $/ = 'Query=';
    while (<COM>){
        if ( $_=~ /Database:\s+(\S+)/g )     { $name_database = $1;}
        if ( $_=~ /Posted date:\s+(.+?)\n/ ) { $posted        = $1;}
        if ( $_=~ /(BLASTP\s+\S+)/o )        { $version       = $1;}
        print {$SOR2} $_;
Sebastien Moretti's avatar
Sebastien Moretti committed
589
        push (@list_pdb, $_);
590
#        if ( $_=~ /\s*(.+?)\s/ ){
591
#            print "\n$1 done";
592
#        }
Sebastien Moretti's avatar
Sebastien Moretti committed
593
594
    }
    close COM;
Sebastien Moretti's avatar
Sebastien Moretti committed
595
    close $SOR2;
596
#    print "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
597

Sebastien Moretti's avatar
Sebastien Moretti committed
598
    $name_database = $database if ( $name_database =~ m{/} );
599
    unless ($quiet=~ /on/i) {
Sebastien Moretti's avatar
Sebastien Moretti committed
600
601
602
        print {*STDOUT} "
             Version:     $version
             Database:    $name_database
Sebastien Moretti's avatar
Sebastien Moretti committed
603
             Posted date: $posted\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
604
    }
605
    shift (@list_pdb);
Sebastien Moretti's avatar
Sebastien Moretti committed
606
607
608
609
    return (@list_pdb);
}

#-----------------------------------------------------------------------------------------------------------------------------
610
sub PARSING {
Sebastien Moretti's avatar
Sebastien Moretti committed
611
    my ($list_pdb, $locale, $distant, $method, $quiet, $database, $gigablast) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
612

613
614
615
616
    my (@list_pdb)        = @$list_pdb;
    my (@result_not_sort) = ();
    my $n = 0;
    open (my $SOR, '>', 'webblast.log') or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
617

618
619
620
    if ( $gigablast=~ /^yes$/i ) { $locale = 2;$distant = 0;}
    if ( $gigablast=~ /^no$/i )  { $locale = 1;}
    if ( $distant==1 )           { $locale = 0;}
Sebastien Moretti's avatar
Sebastien Moretti committed
621

622
623
624
625
626
627
    foreach my $pdb_result(@list_pdb){
        my $query, my $length_query, my($pdb_id), my $comp=0;
        if ( $pdb_result=~/No hits found/m ){
            print {$SOR} $pdb_result;
            next;
        }
628

629
630
631
632
633
634
635
636
637
        $pdb_result =~ s/ALIGNMENTS//;
        local $/ = undef;
        my (@intra_res) = split(/(?=\n\n>)/s, $pdb_result);


        if ( $distant==1 ){
            my $version_d, my $database_d, my $poste_d;
            undef $/; ($query, $length_query) = ( $intra_res[0] =~ /Query=\s+(\S+)\s+Length=\s*(\d+)/smo );
            $/ = "\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
638
            open (F3, '<', 'web_tempo.result') or die;
639
640
641
642
643
644
645
646
647
            while ($_=<F3>){
                if ($_=~ /(BLASTP\s+\S+)/o)        { $version_d  = $1;}
                if ($_=~ /Database:\s+(.+?)$/o)    { $database_d = $1;}
                if ($_=~ /Posted date:\s*(.+?)$/o) { $poste_d    = $1; last;}
            }
            close F3;
#           $database_d = $database if ( $database_d =~ m{/} );
            unless ($quiet=~ /on/i || $n>0){
                ++$n;
648
                print {*STDOUT} "
Sebastien Moretti's avatar
Sebastien Moretti committed
649
650
             Version:     $version_d
             Database:    $database_d
Sebastien Moretti's avatar
Sebastien Moretti committed
651
             Posted date: $poste_d\n\n";
652
653
654
655
656
            }
        }
        else{
            ($query, $length_query) = ( $intra_res[0] =~ /\s*(.+?)\s.+?\(([\d,]+) letters/smo );
        }
657

658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
        shift(@intra_res) if ( exists($intra_res[1]) );

        foreach my $intra_res(@intra_res){ #look for the different results of the query
            my($aln_length, $identity) = ( $intra_res=~ /^\sIdentities = \d+\/(\d+)\s\((.+?)\)/im );
            my($recouvrement, $gap)    = &RECOVER($intra_res, $aln_length, $length_query);
            my($evalue)                = ( $intra_res=~ /Expect = (.+?)\s/im );
            my($bits)                  = ( $intra_res=~ /Score =\s+([\d.]+)\s/im );

            unless ( $method !~ /^geneid$/i ){
                if ( $comp<=$bits ){
                    $comp=$bits;
                }
                else{
                    last;
                }
            }

            if ( $query eq '' || $length_query eq '' || $aln_length eq '' || $identity eq '' || $recouvrement eq '' || $gap eq '' ){
                print {$SOR} " can't parse $pdb_result";
                next;
            }

            if ( $method =~ /^pdbid$/i ){
                if ( $locale==1 ){
                    ($pdb_id) = ( $intra_res=~ /^>(.{6})/im );
                    $pdb_id   =~ s/_//;
                    $pdb_id   = uc($pdb_id);
                }
                else{
                    ($pdb_id) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                     $pdb_id  =~ s/\|//;
                }
                ($evalue) = ( $intra_res=~ /Expect = (.+?)\s/im );

                push (@result_not_sort, "$query\t$pdb_id\t$evalue\t$identity\t$recouvrement\t");
            }
            elsif ( $method =~/^geneid$/i ){
                if ( $database !~ /pdb/i && $database !~ /swiss/i && $locale =~ /1|2/){
                    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/sg ){
                        my $databank = $1;
                        my $last     = $4;
                        my $refseq   = $2;
                        if ( $databank eq 'pdb' ){
                            $refseq .= $last;
                        }
                        $refseq =~ s/\.\d+$//;
                        push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
                    }
                }
                elsif ( $database=~ /pdb|pdbaa/i && ($locale==1 || $locale==2) ){
                    my $refseq;
                    if ( $locale==1 ){
                        ($refseq) = ( $intra_res=~ />(.*?)\s/o );
                        $refseq   =~ s/_//;
                    }
                    else{
                        ($refseq) = ( $intra_res=~ /^>pdb\|(.{6})/im );
                        $refseq   =~ s/\|//;
                    }

                    unless ($refseq){
                        print {$SOR} $intra_res;
                        next;
                    }
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\tpdb");
                }
                elsif ( $distant==1 ){
                    my ($resul) = &MULTI_EQUIVALENT($query, $identity, $recouvrement, $bits, $evalue, $intra_res);
                    push (@result_not_sort, $resul);
                }
                elsif ( $database=~ /swiss/i ){
                    my ($refseq) = ( $intra_res=~ />.*?sp\|(.+?)\|/o );
                    unless ($refseq){
                        print {$SOR} $pdb_result;
                        next;
                    }
                    $refseq =~ s/\.\d+$//;
                    push (@result_not_sort, "$query\t$refseq\t$identity\t$recouvrement\t$bits\tswiss_prot");
                }
            }
            else {die;}
        }
740
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
741
    close $SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
742
    undef (@list_pdb);
743

744
745
746
747
748
749
750
751
752
753
    if ( $method =~/^geneid$/i ){
        return (@result_not_sort);
    }
    else{
        my(@result_sort) = map  { $_->[1] }
                           sort { $b->[0]<=>$a->[0] }
                           map  { [/\t([\d.]+)%/,$_] }
                           @result_not_sort;
        undef(@result_not_sort);
        return (@result_sort);
754
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
755
756
757
758
}

#-------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
759
760
sub MULTI_EQUIVALENT {
    my($query, $identity, $recouvrement, $bits, $evalue, $intra_res) = @_;
Sebastien Moretti's avatar
Sebastien Moretti committed
761
762

    my @result=();
Sebastien Moretti's avatar
Sebastien Moretti committed
763
764
765
766
    while ( $intra_res=~ />.*?(gb|prf|emb|sp|pir|tpe|ref|prf|dbj|ddbj|pdb)[\|]+([A-Za-z0-9_\.]+?)(\s|\|(.{1}))/g ){
        my $databank = $1;
        my $last     = $4;
        my $refseq   = $2;
767

Sebastien Moretti's avatar
Sebastien Moretti committed
768
769
770
        if ( $databank eq 'pdb' ){ $refseq .= $last; }
        $refseq =~ s/\.\d+$//;
        push (@result, "$query\t$refseq\t$identity\t$recouvrement\t$bits\t$evalue\t$databank");
Sebastien Moretti's avatar
Sebastien Moretti committed
771
772
773
774
    }
    return (@result);
}
#--------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
sub AFFICHAGE_REFSEQ_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
    my(@result_sort) = @$result_sort, my(@name_gid) = ();
    my@resultats = ();
    my $afficher = '';

    (my($entete)= sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s", 'Sequence Name', 'Accession number', 'Databank', "%Identity", "%Cover", 'BITS', 'Evalue'));

    foreach my $result_sort(@result_sort){
        my($seq_name, $refseq_name, $identiq, $cover, $bits, $evalue, $bank) = split("\t", $result_sort);
        $evalue =~ s/,$//; #To remove an additional comment with new blast release (2.2.17)
        ($identiq) = split(/%/,$identiq);

        if ($identiq >= $identity_treshold && $cover >= $cover_tresh){
            push (@name_gid, ">$seq_name\@$bank\_\_$refseq_name\n");
            (($afficher) .=  sprintf("%-40s %-25s %-10s %-12s %-10s %-10s %-10s ", $seq_name, $refseq_name, $bank, $identiq, $cover, $bits, $evalue));
            $afficher .= "\n";
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
794
795
    }

Sebastien Moretti's avatar
Sebastien Moretti committed
796
    if ($afficher) { print "\n$entete\n\n"; print $afficher; }
Sebastien Moretti's avatar
Sebastien Moretti committed
797
798


Sebastien Moretti's avatar
Sebastien Moretti committed
799
800
801
802
803
    if (@name_gid) { print {*STDOUT} "\n**********************************************************************\n\n"; }
    if ($out_file) { open (SOR,">$out_file") or die "can not open $out_file"; print SOR @name_gid; }
    print {*STDOUT} "\n", @name_gid;
    close SOR;
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
804
805
806
}
#-------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
807
808
sub AFFICHAGE_PDB_PARSING {
    my($result_sort, $cover_tresh, $identity_treshold, $out_file) = @_;
809

Sebastien Moretti's avatar
Sebastien Moretti committed
810
    my(@result_sort) = @$result_sort, my @sortie=();
811

Sebastien Moretti's avatar
Sebastien Moretti committed
812
    print {*STDOUT} "\n\n",(my($en_tete) = sprintf("%-40s %-10s %-10s %-12s %-10s", 'Sequence Name', 'PDB_id', 'Evalue', 'Identity(%)', 'Cover(%)')),"\n\n";
813

Sebastien Moretti's avatar
Sebastien Moretti committed
814
815
816
    foreach my $result_sort(@result_sort){
        my($seq_name, $pdb_name, $EValue, $identiq, $cover) = split("\t", $result_sort);
        ($identiq)                                          = split(/%/, $identiq);
817

Sebastien Moretti's avatar
Sebastien Moretti committed
818
819
820
821
822
823
824
        if ( $identiq >= $identity_treshold && $cover >= $cover_tresh ){
            push (@pdb_list, $pdb_name);
            $EValue =~ s/,$//;
            print {*STDOUT} ((my $afficher) = sprintf("%-40s %-10s %-10s %-12s %-10s", $seq_name, $pdb_name, $EValue, $identiq, $cover)),"\n";
            push (@sortie, ">$seq_name _P_ $pdb_name\n");
        }
        else {next;}
Sebastien Moretti's avatar
Sebastien Moretti committed
825
826
    }
    undef(@result_sort);
827
    print {*STDOUT} "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
828
829

#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
830
    if ($out_file){ open (SOR,">$out_file") or die "can not open $out_file"; print SOR @sortie; }
831
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
832
    close SOR;
Sebastien Moretti's avatar
Sebastien Moretti committed
833
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
834
835
}
#-----------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
836
837
838
839
840
sub PROFILE {
    my($list_pdb, $out_file, $distant) = @_;

    my(@list_pdb) = @$list_pdb, my(@sortie)=();
    my %names = ();   my $i = 0;    my($name) = '';
Sebastien Moretti's avatar
Sebastien Moretti committed
841
842

    open (SOR1,">$out_file") or die;
Sebastien Moretti's avatar
Sebastien Moretti committed
843
844
845
846
847
848
    foreach my $pdb_result(@list_pdb){
        if ( $pdb_result =~ /No hits found/i ){ next; }
        else{
            ++$i;
            if ( $distant==1 ){($name) = ($pdb_result =~ /Query=\s*(.+?)Length/smoi) or die "\nparse error in distant profile\n";}
            else              {($name) = ($pdb_result =~ /\s*(.+?)\(.*?letters/ismo) or die "\nparse error in profile\n";}
849

Sebastien Moretti's avatar
Sebastien Moretti committed
850
            my($name1) = ($name=~ /(.+?)\s+$/);
851

Sebastien Moretti's avatar
Sebastien Moretti committed
852
853
854
            open (my $SOR, '>', "tempo_file_profile") or die "can not open tempo_file_profile";
            print {$SOR} "Query= $pdb_result";
            close $SOR;
855

Sebastien Moretti's avatar
Sebastien Moretti committed
856
857
858
            open(COM,"|t_coffee -other_pg seq_reformat -input blast_aln -in tempo_file_profile -output fasta_aln -out ${i}.profile");
            close COM;
            push (@sortie,">$name1 _R_ ${i}.profile\n");
859

Sebastien Moretti's avatar
Sebastien Moretti committed
860
        }
861
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
862
    unlink("tempo_file_profile");
863
864
    undef(@list_pdb);

865
    print "\n**********************************************************************\n\n";
Sebastien Moretti's avatar
Sebastien Moretti committed
866
#-- OUTFILE /STDOUT
Sebastien Moretti's avatar
Sebastien Moretti committed
867
    if ($out_file){ open (SOR1,">$out_file") or die "can not open $out_file"; print SOR1 @sortie; }
868
    print {*STDOUT} @sortie;
Sebastien Moretti's avatar
Sebastien Moretti committed
869
    close SOR1;
Sebastien Moretti's avatar
Sebastien Moretti committed
870
    return;
Sebastien Moretti's avatar
Sebastien Moretti committed
871
872
873
}

#--------------------------------------------------------------------------------------------------------------------------------
Sebastien Moretti's avatar
Sebastien Moretti committed
874
875
876
877
878
879
880
881
882
883
884
885
886
887
sub ORGN {
    my($organism) = @_;

    $organism =~ s/_/ /;

    my(%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
        'Bacteria'                =>'1',
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
888
        'Vertebrata'              =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
889
890
891
892
893
894
895
896
897
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
898
        );
Sebastien Moretti's avatar
Sebastien Moretti committed
899

Sebastien Moretti's avatar
Sebastien Moretti committed
900
901
902
903
904
905
906
907
908
    if ( exists $orgs{$organism} ){
        $organism =~ s/ /+/g;
        return ($organism);
    }
    else{
        print {*STDERR} "organism not valid or syntax error, replace space by \"_\" \n";
        &HELP();
        return;
    }
909
}
Sebastien Moretti's avatar
Sebastien Moretti committed
910
911
#------------------------------------------------------------------------------------------------------------------------------------

Sebastien Moretti's avatar
Sebastien Moretti committed
912
sub LIST_ORGA {
Sebastien Moretti's avatar
Sebastien Moretti committed
913
914
915
916
917
918
    my (%orgs) = (

        'Homo sapiens'            =>'1',
        'Bos taurus'              =>'1',
        'Gallus gallus'           =>'1',
        'Viruses'                 =>'1',
919
        'Bacteria'                =>'1',
Sebastien Moretti's avatar
Sebastien Moretti committed
920
921
922
923
924
925
926
927
928
929
930
931
        'Eukaryota'               =>'1',
        'Mammalia'                =>'1',
        'Vertebrata'              =>'1',
        'All organisms'           =>'1',
        'Fungi'                   =>'1',
        'Primates'                =>'1',
        'Archaea'                 =>'1',
        'Arabidopsis thaliana'    =>'1',
        'Caenorhabditis elegans'  =>'1',
        'Escherichia coli'        =>'1',
        'Mus musculus'            =>'1',
        'Drosophila melanogaster' =>'1',
932
933
        );

Sebastien Moretti's avatar
Sebastien Moretti committed
934
935
936
937
    my (@cle) = keys(%orgs);
    foreach my $cle(@cle){
        $cle=~ s/ /_/g;
    }
Sebastien Moretti's avatar
Sebastien Moretti committed
938
939
    return (@cle);
}
940