Skip to content
Snippets Groups Projects
Commit e6bc9ee6 authored by Bo Gao's avatar Bo Gao
Browse files
parents 2b1b8fdb d0c68856
Branches
Tags 1.2.1
No related merge requests found
......@@ -66,6 +66,7 @@ def cli(input_db, input_collection, output_db, output_collection_individuals, ou
callsets = {}
variants = {}
variantset_id = 'AM_VS_HG18'
# counter for demo mode
sampleno = 1
......@@ -88,9 +89,6 @@ def cli(input_db, input_collection, output_db, output_collection_individuals, ou
varid = 1
callno = 0
########################################
# return the number of processed samples
########################################
......@@ -205,9 +203,9 @@ def cli(input_db, input_collection, output_db, output_collection_individuals, ou
###########################################################
#only samples with enough attributes are assumed to be valid, the threshold is set to 50 arbitrarily.
# MODI: reduce threshold to 25
# MODI: reduce threshold to 30
# TODO: check & discuss => ?!
if (len(sample) > 25):
if (len(sample) > 30):
no_validSamples += 1
# generate ids
......@@ -246,6 +244,21 @@ def cli(input_db, input_collection, output_db, output_collection_individuals, ou
# TODO: fixing country names should be in an external cleanup script
country = string.capwords(get_attribute('COUNTRY', sample))
country = re.sub('USA', 'United States', country, flags=re.IGNORECASE)
countryMatchObj = re.search('\w', country)
city = string.capwords(get_attribute('CITY', sample))
cityMatchObj = re.search('\w', city)
geoLabel = ''
geoPrecision = ''
if cityMatchObj:
geoLabel = city
geoPrecision = 'city'
if countryMatchObj:
geoLabel = geoLabel+', '+country
elif countryMatchObj:
geoLabel = country
geoPrecision = 'country'
biosamples[biosample_id] = {
......@@ -285,13 +298,9 @@ def cli(input_db, input_collection, output_db, output_collection_individuals, ou
'individual_id': individual_id,
'individual_age_at_collection': get_attribute('AGEISO', sample),
'external_identifiers': external_ids,
'location': { 'geo_label': geoLabel, 'geo_precision': geoPrecision, 'latitude': get_attribute('GEOLAT', sample, 'float', ''), 'longitude': get_attribute('GEOLONG', sample, 'float', '') },
'attributes': {
'geo_lat': { 'values': [ {'double_value': (get_attribute('GEOLAT', sample, 'float', '')) } ] },
'geo_long': { 'values': [ {'double_value': (get_attribute('GEOLONG', sample, 'float', '')) } ] },
'tnm': { 'values': [ { 'string_value': get_attribute('TNM', sample) } ] },
# 'age': { 'values': [ { 'double_value': (get_attribute('AGE', sample, 'float', '')) } ] },
'city': { 'values': [ { 'string_value': get_attribute('CITY', sample) } ] },
'country': { 'values': [ { 'string_value': country} ] },
'death': { 'values': [ { 'string_value': get_attribute('DEATH', sample) } ] },
'followup_months': { 'values': [ { 'double_value': (get_attribute('FOLLOWUP', sample, 'float', '')) } ] },
'redirected_to': 'null'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment