Skip to content
Snippets Groups Projects

Michael edits

1 file
+ 18
5
Compare changes
  • Side-by-side
  • Inline
+ 18
5
from pymongo import MongoClient
import re
import datetime
# import argparse, sys, json
client = MongoClient()
@@ -8,6 +9,9 @@ samples = db.samples
variants = {}
sampleno = -1
if sampleno > 0:
print 'Test run - only '+str(sampleno)+' samples will be processed!'
i = 1
varid = 1
callno = 0
@@ -46,7 +50,7 @@ for sample in samples.find({}, {'UID': 1, 'BIOSAMPLEID': 1, 'SEGMENTS_HG18': 1})
continue
tag = str(seg['CHRO'])+'_'+str(seg['SEGSTART'])+'_'+str(seg['SEGSTOP'])+'_'+alternate_bases
call = {'call_set_id': str(sample['UID']), 'biosample_id': str(biosample_id)}
call = { 'call_set_id': str(sample['UID']), 'biosample_id': str(biosample_id), 'genotype': ['.', '.'] }
try:
varvalue = float(seg['SEGVALUE'])
@@ -56,10 +60,11 @@ for sample in samples.find({}, {'UID': 1, 'BIOSAMPLEID': 1, 'SEGMENTS_HG18': 1})
call['VALUE'] = float(seg['SEGVALUE'])
if tag in variants:
variants[tag]['updated'] = datetime.datetime.utcnow()
variants[tag]['CALLS'].append(call)
callno += 1
else:
variants[tag] = { 'id': str(varid), 'start': start, 'end': end, 'reference_name': str(seg['CHRO']), 'alternate_bases': str(alternate_bases), 'CALLS':[call]}
variants[tag] = { 'id': str(varid), 'start': start, 'end': end, 'reference_name': str(seg['CHRO']), 'created': datetime.datetime.utcnow(), 'updated': datetime.datetime.utcnow(), 'reference_bases': '.', 'alternate_bases': str(alternate_bases), 'CALLS':[call]}
varid += 1
callno += 1
@@ -75,12 +80,20 @@ for sample in samples.find({}, {'UID': 1, 'BIOSAMPLEID': 1, 'SEGMENTS_HG18': 1})
print str(varid)+' variants were created'
break
db_variants = db.myvariants
print str(callno)+' calls were found for '+str(varid)+' variants'
i = 0
db_variants = db.variants
db_variants.remove()
for k,v in variants.items():
insert_id = db_variants.insert(v)
insert_id = db_variants.insert(v)
i += 1
matchObj = re.search('00000$', str(i))
if matchObj:
print i
print str(callno)+' calls were found for '+str(varid)+' variants'
print str(i)+' variants were loaded into the variants collection.'
# with open('variants.json', 'w') as outfile:
# json.dump(variants, outfile, indent=4, sort_keys=True, separators=(',', ':'))
Loading