Skip to content
Snippets Groups Projects
Commit 5696bd00 authored by Severine Duvaud's avatar Severine Duvaud
Browse files

Revert "Added mm10 as requested by Mikhail"

This reverts commit 22c807c0.
parent 22c807c0
No related branches found
No related tags found
No related merge requests found
......@@ -108,16 +108,6 @@ Rectangle {
Utils.reenable_file_selection()
}
}
RadioButton {
id: mm10
text: "mm10"
exclusiveGroup: genome
onClicked:{
genomeName = "mm10"
Utils.reenable_file_selection()
}
}
}
}
}
......@@ -147,7 +137,6 @@ Rectangle {
/* "normal" content */
ListView {
width: 130
anchors { top: parent.top; bottom: parent.bottom; left: parent.left; margins: 10 }
clip: true;
......
......@@ -209,7 +209,6 @@ function reset_genome_versions() {
hg19.checked = false
hg18.checked = false
mm9.checked = false
mm10.checked = false
}
function update_data_type(type) {
......
""" merge reads """
import argparse
import json
import logging
import math
import os
import re
......@@ -19,7 +17,7 @@ def main():
parser.add_argument('--bed-dir',
dest='bed_dir',
default='sorted_bed',
default='./',
help='Directory with bed files')
parser.add_argument('-d',
help='directory with read files',
......@@ -43,14 +41,10 @@ def main():
help="organism id (hg19 or mm9)",
dest="organism",
required=True)
parser.add_argument('--kallisto',
help="Flag indicating that data is from kallisto processing",
dest="kallisto",
action="store_true",
default=False)
args = parser.parse_args()
genome_length = {'hg19': 3101788170, 'hg18': 3091592211, 'mm9': 2654911517, 'mm10': 2730871774}
genome_length = {'hg19': 3101788170, 'hg18': 3091592211, 'mm9': 2654911517}
# define left, right window coverage if only one number is given
if len(args.window) == 1:
......@@ -69,20 +63,18 @@ def main():
bed_file = read_file.rstrip('.expr')
reads[bed_file] = {}
bed_files.append(bed_file)
for line in fin:
entry = line.rstrip()
if entry == '':
continue
(prom_id, val) = entry.split("\t")
val = float(val)
if val == 0.:
continue
reads[bed_file][prom_id] = val
reads[bed_file][prom_id] = float(val)
proms.add(prom_id)
total_counts = get_total_counts(reads, args.bed_dir, args.kallisto)
logging.warning("merge_reads_chipseq: total counts %s\n" % str(total_counts))
fout = open('counts.tab', 'w')
total_counts = get_total_counts(reads, args.bed_dir)
fout = open(os.path.join(args.read_dir, 'counts.tab'), 'w')
fout.write("\t".join(bed_files) + "\n")
fout.write("\t".join([str(total_counts[bed_file])
for bed_file in bed_files]) + "\n")
......@@ -108,6 +100,7 @@ def main():
/ genome_length[args.organism])
if cluster_id not in table:
table[cluster_id] = []
if expr > 0:
table[cluster_id].append(math.log(expr, 2))
fout = open(args.expr_file, 'w')
......@@ -120,7 +113,7 @@ def main():
fout.close()
# write sample indices
fout = open('sample_indices', 'w')
fout = open(os.path.join(args.read_dir, 'sample_indices'), 'w')
fout.write("\n".join([re.sub("\.(bam|bed)\.?g?z?$", '', x, flags=2)
for x in bed_files]))
fout.write("\n")
......@@ -140,7 +133,7 @@ def get_quantiles(prob, reads):
return quantiles
def get_total_counts(reads, bed_dir, is_kallisto):
def get_total_counts(reads, bed_dir):
""" Returns dict of total counts for samples"""
total_count = {}
......@@ -153,16 +146,10 @@ def get_total_counts(reads, bed_dir, is_kallisto):
out, err = p.communicate()
count = int(out.rstrip())
elif f.lower().endswith(".bed.gz"):
p = subprocess.Popen(["zcat", f], stdout=subprocess.PIPE)
p = subprocess.Popen("zcat", stdin=open(f, 'r'), stdout=subprocess.PIPE)
out = subprocess.check_output(["wc", "-l"], stdin=p.stdout)
count = int(out.rstrip())
elif is_kallisto:
if not os.path.exists("%s/run_info.json" % f):
raise(BaseException("Could not open file %s/run_info.json" % f))
with open("%s/run_info.json" % f) as fin:
count = float(json.loads(fin.read())["n_processed"])
total_count[sample] = count
return total_count
......
......@@ -14,12 +14,6 @@ format-and-index $FILEPATH $FILE
# hardcoded number of this step
json_out "info" 2 "STEP 3: Reading regions" true
if [ "$genome" = "hg19" -o "$genome" = "mm10" ]; then
t_option="$INPUT/$genome/transcript_annotation.gtf.gz"
else
t_option="$INPUT/$genome/refSeqAli.gz $INPUT/$genome/all_mrna.gz"
fi
readRegion_Cmd1="$IPATH/readRegion.py -t $t_option -p $INPUT/$genome/clusters_150_gene_associations.gz -i $TMP/processing/$FILE --expr-dir $TMP/expression"
readRegion_Cmd1="$IPATH/readRegion.py -t $INPUT/$genome/refSeqAli.gz $INPUT/$genome/all_mrna.gz -p $INPUT/$genome/clusters_150_gene_associations.gz -i $TMP/processing/$FILE --expr-dir $TMP/expression"
python $readRegion_Cmd1 || json_out "error" 1 "Script $0 Line $LINENO: Problem with: python $readRegion_Cmd1 Error: $( { python $readRegion_Cmd1; } )" true $?
......@@ -54,8 +54,7 @@ def main():
use_numbers = False
if args.input_file.lower().endswith('bam') or args.input_file.lower().endswith('sam') or args.input_file.lower().endswith('.sam.gz'):
use_numbers = have_numbers(readfile)
elif args.input_file.lower().endswith('.bed') or args.input_file.lower().endswith('.bed.gz'):
use_numbers = have_numbers_bed(readfile)
counter = 0
log.warning("starting read processing")
for chrom in proms.trCluster:
......@@ -145,7 +144,7 @@ def processReads(reads):
return blocks
# process reads depending on type BAM or BED
if isinstance(reads[0], pysam.AlignedRead):
if isinstance(reads[0], pysam.AlignedRead) or isinstance(reads[0], pysam.calignmentfile.AlignedSegment):
for read in reads:
if read.is_paired:
if read.is_proper_pair:
......@@ -241,7 +240,6 @@ class Promoters:
exonTree = {}
trCluster = {}
tr_count = 0
transcripts = {}
def __init__(self, prom_file=None, tr_files=None):
""" Init promoters """
......@@ -250,10 +248,6 @@ class Promoters:
if tr_files:
for tr_file in tr_files:
if tr_file.lower().endswith(".gff") or tr_file.lower().endswith(".gtf") \
or tr_file.lower().endswith(".gff.gz") or tr_file.lower().endswith(".gtf.gz"):
self.read_gtf_transcripts(tr_file)
else:
self.read_all_mrna(tr_file)
def read_promoters(self, prom_file):
......@@ -317,63 +311,6 @@ class Promoters:
return transcript
def read_gtf_transcripts(self, transcript_file):
""" Read and process gtf/gff file (for gtf files from gencode project) """
fin = multi_open(transcript_file)
for line in fin:
if line.strip().startswith('#'):
continue
data = line.rstrip().split('\t')
if data[2] == 'transcript':
chrom = data[0]
strand = data[6]
(tr_start, tr_end) = sorted([int(data[3]), int(data[4])])
#extract transcript ID
transcript_id = None
for pair in data[8].split(";"):
if pair.strip().startswith("transcript_id"):
transcript_id = pair.split()[1].strip('"')
if transcript_id is None:
logging.warning("No transcript id found in line %s" % line)
continue
if transcript_id not in self.transcripts:
self.transcripts[transcript_id] = {(chrom, strand, tr_start, tr_end):[]}
else:
self.transcripts[transcript_id][(chrom, strand, tr_start, tr_end)] = []
logging.warning("Transcript %s is seen multiple times!" % transcript_id)
if data[2] == "exon":
chrom = data[0]
strand = data[6]
(ex_start, ex_end) = sorted([int(data[3]), int(data[4])])
#extract transcript ID
transcript_id = None
for pair in data[8].split(";"):
if pair.strip().startswith("transcript_id"):
transcript_id = pair.split()[1].strip('"')
if transcript_id is None:
logging.warning("No transcript id found in line %s" % line)
continue
if transcript_id not in self.transcripts:
logging.warning("transcript id %s no in annotation! Skipping exon %s"
% (transcript_id, line))
flag = True
for coord in self.transcripts[transcript_id]:
if chrom == coord[0] and strand == coord[1] and ex_start >= coord[2] and ex_end <=coord[3]:
self.transcripts[transcript_id][coord].append((ex_start, ex_end))
flag = False
if flag:
logging.warning("Exon %s is not assigned to any transcripts" % line)
for transcript_id in self.transcripts:
# check if we need this transcript
if transcript_id not in self.tr2proms:
continue
for coord in self.transcripts[transcript_id]:
transcript = (transcript_id, coord[0], coord[1], coord[2], coord[3], self.transcripts[transcript_id][coord])
self.add_transcript(transcript)
def add_transcript(self, transcript):
""" Create refseq transcript from line of refGene file
and add it to clusters """
......@@ -515,12 +452,6 @@ def have_numbers(fin):
return len(chroms) == 0
def have_numbers_bed(fin):
""" Checks if reference sequence are in form of numbers or strings in BED """
chroms = [x for x in fin.contigs if x.startswith('chr')]
return len(chroms) == 0
###################
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment