Commit 4dd6fc7d authored by Sebastien Moretti's avatar Sebastien Moretti
Browse files

Fix already nucleotides detection when a lot of U, changed in X, for celenocystein

parent 7f3fec7a
......@@ -158,7 +158,7 @@ while(<$MSA>){
my $seqq = $_;
$seqq =~ s/\r\n//g;
$seqq =~ s/\./-/g; #for msa with '.' as gap
$seqq =~ s{[BJOUZ]}{X}ig;
$seqq =~ s{[BJOUZ]}{X}ig; #U here for celenocystein
$seqq =~ s/[^A-Za-z\-\*\n\r]//g; #Remove all the non-gap or non-alphabetic characters from the seq
chomp($seqq);
#fasta sequence on 1 line
......@@ -181,15 +181,14 @@ elsif ( $lim >0 && $fasta_checker > $lim ){
print {*STDERR} "\tThe FASTA file is too large, try with less than $lim sequences\n\tor split your file\n\n";
exit(1);
}
elsif ( exists( $original_seq[0] ) && $original_seq[0] =~ /[acgtu]/i){
elsif ( exists( $original_seq[0] ) && $original_seq[0] =~ /[acgtu]/i ){
my $first_seq = $original_seq[0];
#Check if sequences are amino acids and not nucleotides
my ( $a, $c, $g, $t, $non ) = ( 0, 0, 0, 0, 0 );
$a = ($first_seq =~ s/[aA]//g);
$c = ($first_seq =~ s/[cC]//g);
$g = ($first_seq =~ s/[gG]//g);
$t = ($first_seq =~ s/[tTuU]//g);
$non = ($first_seq =~ s/[^aAcCgGtTuUNn-]//g);
my $a = ($first_seq =~ s/[aA]//g) || 0;
my $c = ($first_seq =~ s/[cC]//g) || 0;
my $g = ($first_seq =~ s/[gG]//g) || 0;
my $t = ($first_seq =~ s/[tTuU]//g) || 0;
my $non = ($first_seq =~ s/[^aAcCgGtTuUXxNn-]//g) || 0;
if ( ($a+$c+$g+$t) >= (($a+$c+$g+$t+$non)*80/100) ){
&failure();
print {*STDERR} "\tYour sequences seem already to be nucleotides\n\tthis program purpose is to turn AMINO ACID alignments into CDS nucleotide alignments\n\n";
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment