Commit a632d519 authored by Severine Duvaud's avatar Severine Duvaud
Browse files

Latest implementation of Beacon 0.3 + handling of datasets

parent 37b166f9
......@@ -6,4 +6,8 @@ beacon.checkResultAndGetResponse = require('./beacon-query.js').checkResultAndGe
beacon.checkPreconditions = require('./beacon-query.js').checkPreconditions
beacon.buildMongoQuery = require('./beacon-query.js').buildMongoQuery
beacon.checkDatasetIdentifier = require('./beacon-dataset.js').checkDatasetIdentifier
beacon.buildMongoDatasetQuery = require('./beacon-dataset.js').buildMongoDatasetQuery
beacon.checkDatasetResultAndGetResponse = require('./beacon-dataset.js').checkDatasetResultAndGetResponse
module.exports = beacon;
\ No newline at end of file
/**
* Created by sduvaud on 26/05/16.
*/
var info = require('./beacon-info.js').info;
// store all the dataset identifiers
var allDatasetIds = [];
info['info']['info']['datasets'].forEach(function(dataset){
allDatasetIds.push(dataset.id);
});
function checkDatasetIdentifier(params) {
if (!params.id) {
return {
hasError: true,
msg: "No identifier specified"
};
}
return {
hasError: false
};
}
function buildMongoDatasetQuery(params)
{
console.log("ID=" + params.id);
var groupCondition = {
$group: {
_id: "$ICDMORPHOLOGYCODE",
sampleCount: {$sum: 1},
name: { $first: "$ICDMORPHOLOGY"}
}
};
if (params.id == 'all') {
return [
groupCondition
];
}
else {
var orConditions = [];
var identifiers = params.id.split(','); // comma separated list of datasets
identifiers.forEach(function (id) {
orConditions.push({ICDMORPHOLOGYCODE: id});
});
return [
{
$match: {
$or: orConditions
}
},
groupCondition
];
}
}
function checkDatasetResultAndGetResponse(params, datasets) {
var matchedIdentifiers = [];
datasets.forEach(function(dataset){
matchedIdentifiers.push(dataset._id);
});
var submittedIdentifiers = allDatasetIds;
if (params.id != 'all') {
submittedIdentifiers = params.id.split(',');
}
if (submittedIdentifiers.length != matchedIdentifiers.length) {
var missingIdentifiers = [];
submittedIdentifiers.forEach(function(submitted) {
var found = false;
matchedIdentifiers.forEach(function(matched) {
if (matched == submitted) {
found = true;
}
}
);
if (!found) {
missingIdentifiers.push(submitted);
}
}
);
missingIdentifiers.forEach(function (missing) {
var json = {
"_id": missing,
"sampleCount": 0,
"name": "Dataset not found"
};
datasets.push(json);
}
);
}
return datasets;
}
module.exports.checkDatasetIdentifier = checkDatasetIdentifier;
module.exports.buildMongoDatasetQuery = buildMongoDatasetQuery;
module.exports.checkDatasetResultAndGetResponse = checkDatasetResultAndGetResponse;
\ No newline at end of file
var info = require('./beacon-info.js').info
//check https://genome.ucsc.edu/FAQ/FAQreleases.html
/*var referenceMap = {
'GRCh38': 'SEGMENTS_HG38',
'GRCh37': 'SEGMENTS_HG19'
};*/
var info = require('./beacon-info.js').info;
// store all the dataset identifiers
// not used when a list of datasets is entered
// find a better place for this?
var allDatasetIds = [];
info['info']['info']['datasets'].forEach(function(dataset){
allDatasetIds.push(dataset.id);
});
var referenceMap = {
'GRCh38': 'SEGMENTS_HG38',
......@@ -69,6 +70,16 @@ function checkPreconditions(params) {
};
}
if (params.datasetIds != 'all') {
var error = checkDatasetIdentifiers(params.datasetIds, allDatasetIds);
if (error.length > 0) {
return {
hasError: true,
msg: "Incorrect dataset(s): " + error.toString()
};
}
}
if (!params.alternateBases) {
return {
hasError: true,
......@@ -83,14 +94,14 @@ function checkPreconditions(params) {
};
}
if (params.length && !Number(params.length)) {
if (params.minlength && !Number(params.minlength)) {
return {
hasError: true,
msg: "min. length not a number"
};
}
if (params.maxLength && !Number(params.maxLength)) {
if (params.maxlength && !Number(params.maxlength)) {
return {
hasError: true,
msg: "max length not a number"
......@@ -104,6 +115,29 @@ function checkPreconditions(params) {
};
}
/*
* Latest version of API v-0.3
*
* BeaconAlleleRequest:
* referenceName
* start
* alternateBases
* assemblyId
* datasetIds
*
* BeaconAlleleResponse
* beaconId
* beaconAlleleRequest
* beacondatasetAlleleResponse[]
* beaconError
*
* BeaconDatasetAlleleResponse
* datasetId
* exists
* beaconError
* note
*
* */
function buildMongoQuery(params) {
var position = parseInt(params.start);
......@@ -146,7 +180,6 @@ function buildMongoQuery(params) {
//elem match element
var condElemMatch = condition[convertedReference]['$elemMatch'];
if (minLength > 0) { //There is a minLength
if (maxLength > 0) { //There is always a minLenght, if there is a max length specified (see code above)
condElemMatch.SEGSIZE = { $gte: minLength,$lte: maxLength } // min. and max lengths
......@@ -155,7 +188,7 @@ function buildMongoQuery(params) {
condElemMatch.SEGSIZE = { $gte: minLength }; // min. length
condElemMatch.SEGSTART = position; // exact position
}
}else { //If there is a position specifiec
}else { //If there is a position specific
condElemMatch.SEGSTOP = { $gte: position };
condElemMatch.SEGSTART = { $lte: position };
}
......@@ -165,66 +198,106 @@ function buildMongoQuery(params) {
andConditions.push({$or: orConditions});
}
return {
$and: andConditions
};
return [
{
"$match": {
"$and": andConditions
}
},
{
"$group": {
_id: "$ICDMORPHOLOGYCODE",
// Number of samples with at least one segment
// Datasets with no sample are not shown
// This is of an issue when the user asks for
// all datasets.
observed : { $sum : 1 }
}
}
];
}
function checkResultAndGetResponse(params, samples, countTotal) {
function checkResultAndGetResponse(params, datasets) {
var responseResource = {
"exists": null,
"error": null,
"note": null
var length = 0;
if (typeof params.minlength != undefined && params.minlength != null && params.minlength != '')
{
length = parseInt(params.minlength);
if (params.alternateBases == 'DUP') {
responseResource.note = "Length provided but not considered when querying DUP";
}
}
if (samples && samples.length > 0) { // a value was found by mongodb
var maxLength = 0;
if (typeof params.maxlength != undefined && params.maxlength != null && params.maxlength != '')
{
maxLength = parseInt(params.maxlength);
}
var responses = [];
// SD, May 2016: according to the specs, observed parameter was removed from BeaconDatasetAlleleResponse.
// We could divide the sample.length by the total number of samples in order to fill the frequency parameters
// for instance (and if of interest for arrayMap).
// responseResource.observed = samples.length;
responseResource.NOT_BEACON_numberMatchedSamples = samples.length;
// The query returns only datasets for which there is at least one sample with at least one matching SEGMENT...
// What should we do with "unmatched" datasets?
// Should we show them, as well with an exists: false?
// what about cases where all the datasets are requested?
var matchedSegments = samples.map(function (s) { return checkResult(params, s)});
if (datasets && datasets.length > 0)
{
// var matchedDatasets = datasets.map(function (s) { return checkResult(params, s)}); // checks to be added!
var matchedDatasets = checkResult(params, datasets);
var response;
if (!matchedSegments || !matchedSegments[0] || !matchedSegments[0].matchedSegment) {
responseResource.exists = null;
// BeaconError instead of simple string
// (error is not null if and only of exists is null)
responseResource.error = {
if (typeof matchedDatasets == undefined && matchedDatasets == null) {
response = {
"exists": null,
"datasetId": null,
"sampleCount": 0,
"error": {
errorCode: 500,
message: "Internal error, DB returned a value but post check is not valid."
},
"note": null
};
responses.push(response);
}else {
matchedDatasets.forEach(function (dataset) {
if (dataset.observed == 0) {
response = {
"exists": false,
"error": null,
"note": null,
"datasetId": dataset._id,
"sampleCount": 0
};
}else {
response = {
"exists": true,
"error": null,
"note": null,
"datasetId": dataset._id,
"sampleCount": dataset.observed
};
} else {
responseResource.exists = true;
responseResource.NOT_BEACON_ARRAYMAP_DEBUG_INFO = {"matchedSegments" : matchedSegments};
}
responses.push(response);
})
}
else {
}else {
response = {
"exists": false,
"datasetId": null,
"sampleCount": 0,
"error": null,
};
if (!alternateBasesMap[params.alternateBases]) {
responseResource.exists = false;
responseResource.note = "Type of variant not supported by arrayMap.";
response.note = "Type of variant not supported by arrayMap.";
}
else {
responseResource.exists = false;
responseResource.note = "No result from arrayMap.";
}
response.note = "No result from arrayMap.";
}
var length = 0;
if (typeof params.length != undefined && params.length != null && params.length != '')
{
length = parseInt(params.length);
if (params.alternateBases == 'DUP') {
responseResource.note = "Length provided but not considered when querying DUP";
}
}
var maxLength = 0;
if (typeof params.maxLength != undefined && params.maxLength != null && params.maxLength != '')
{
maxLength = parseInt(params.maxLength);
responses.push(response);
}
var queryResource = {
......@@ -234,45 +307,64 @@ function checkResultAndGetResponse(params, samples, countTotal) {
"datasetIds": params.datasetIds,
"alternateBases": params.alternateBases,
"length": length,
"maxLength": maxLength
"maxlength": maxLength
};
// BeaconAlleleResponse
return {
"beacon_id": info.id,
"response": responseResource,
"query": queryResource
"beaconId": info['info']['info']['id'],
"datasetAlleleResponses": responses,
"alleleRequest": queryResource,
"error": null
};
}
function checkResult(params, sample) {
var convertedReference = referenceMap[params.assemblyId || defaultReference];
var expectedSegType = alternateBasesMap[params.alternateBases]
var position = parseInt(params.start);
// checks to be added!
// Pending for now!
function checkResult(params, datasets) {
//TODO should check for datasets and genome reference as well
var foundSegment;
var allDatasets = datasets;
sample[convertedReference].forEach(function (segment) {
if (segment.CHRO === params.referenceName) {
var matchedIdentifiers = [];
datasets.forEach(function(dataset){
matchedIdentifiers.push(dataset._id);
});
var posStart = parseInt(segment.SEGSTART);
var posStop = parseInt(segment.SEGSTOP);
var segType = parseInt(segment.SEGTYPE);
var submittedIdentifiers = allDatasetIds;
if (params.datasetIds != 'all') {
submittedIdentifiers = params.datasetIds.split(',');
}
if ((segType == expectedSegType) && (posStart <= position) && (position <= posStop)) {
foundSegment = segment;
return; //exit loop
var missedIdentifiers = checkDatasetIdentifiers(submittedIdentifiers.toString(), matchedIdentifiers);
if (missedIdentifiers != null) {
missedIdentifiers.forEach(function (missed) {
var missedObj = {
"_id": missed,
"observed": 0
}
allDatasets.push(missedObj);
});
}
})
return allDatasets;
}
var result = {
matchedSampleUID: sample.UID,
matchedDataSet: sample.ICDMORPHOLOGYCODE,
matchedSegment: foundSegment
function checkDatasetIdentifiers(userDatasetIdentifiers, allDatasetIdentifiers) {
var identifiers = userDatasetIdentifiers.split(','); // comma separated list of datasets
var found;
var error = [];
identifiers.forEach(function (id) {
found = false;
allDatasetIdentifiers.forEach(function (all) {
if (all == id) {
found = true;
}
return result;
})
if (!found) {
error.push(id);
}
});
return error;
}
module.exports.checkPreconditions = checkPreconditions;
......
......@@ -25,7 +25,12 @@ arrayMap.controller('BeaconController', ['$scope', '$location', function ($scope
"alternateBases": "DEL",
"minlength": '',
"maxlength": ''
}
};
$scope.datasetConfig = {
"id": "all"
};
$scope.getNewApiUrl = function () {
......@@ -45,6 +50,13 @@ arrayMap.controller('BeaconController', ['$scope', '$location', function ($scope
return $location.absUrl() + "info"
};
$scope.getDatasetUrl = function () {
var conf = $scope.datasetConfig;
return $location.absUrl() + "v0.3/dataset?" +
"id=" + conf.id;
};
$scope.getApiUrl = function () {
var conf = $scope.config;
......
......@@ -3,7 +3,6 @@
<h1>Beacon ArrayMap</h1>
<p>First Prototype of a <a target="_blank" href="https://genomicsandhealth.org/work-products-demonstration-projects/beacon-project-0">Beacon</a>
<a target="_blank" href="https://docs.google.com/document/d/1n7qtCBFwsExP_k2GPfWS_PN53Xeh8YxeCBOgyZ5itOw/edit#"> v0.3</a> implementation for <a target="_blank" href="http://arraymap.org/">ArrayMap</a>.</p>
<p>See <a href="documentation">documentation and open questions (OUTDATED)</a>.</p>
</div>
<form class="form-horizontal">
<div class="form-group">
......@@ -86,5 +85,10 @@
</div>
</div>
<div class="form-group">
<div class="col-sm-offset-2 col-sm-8">
<a target="beacon" ng-href="{{getDatasetUrl()}}">{{getDatasetUrl()}}</a>
</div>
</div>
</form>
</div>
\ No newline at end of file
......@@ -73,26 +73,36 @@ router.get('/v0.3/query/', function (req, res) {
res.json(preconditions.msg); //Gets error messages
return;
}
var mongoQuery = beacon.buildMongoQuery(req.query);
console.log("Building MongoDB query params: " + JSON.stringify(mongoQuery));
// Count: list of datasets (and not 1 dataset)
var orConditions = [];
var identifiers = req.query.datasetIds.split(','); // comma separated list of datasets
identifiers.forEach(function (id) {
orConditions.push({ICDMORPHOLOGYCODE: id});
});
req.db.samples.count({$or: orConditions}, function(err, count){
req.db.samples.find(mongoQuery, {}, function (err, docs){
var response = beacon.checkResultAndGetResponse(req.query, docs, count);
response.NOT_BEACON_totalInDataSet = count;
req.db.samples.aggregate(mongoQuery, function(err, docs) {
var response = beacon.checkResultAndGetResponse(req.query, docs);
res.json(response);
});
});
router.get('/v0.3/dataset', function (req, res) {
var preconditions = beacon.checkDatasetIdentifier(req.query);
if (preconditions.hasError) {
res.json(preconditions.msg);
return;
}
)
var mongoQuery = beacon.buildMongoDatasetQuery(req.query);
console.log("Building MongoDB query params: " + JSON.stringify(mongoQuery));
req.db.samples.aggregate(mongoQuery, function(err, docs){
if (docs && docs.length > 0) {
var response = beacon.checkDatasetResultAndGetResponse(req.query, docs);
res.json(response);
}
else {
res.json("No dataset found");
}
});
});
module.exports = router;
\ No newline at end of file
/**
* Created by sduvaud on 25/05/16.
*/
var query = db.samples.aggregate(
[
{
"$match": {
"$and":
[
{
"SEGMENTS_HG38":{
"$elemMatch":{"CHRO":"11","SEGTYPE":-1,"SEGSTOP":{"$gte":34439881},"SEGSTART":{"$lte":34439881}}
}
},
{
"$or":[
{
"ICDMORPHOLOGYCODE":"8041/3"
},
{
"ICDMORPHOLOGYCODE":"8070/3"
}
]
}
]
}
},
{
"$group": {
_id: "$ICDMORPHOLOGYCODE",
// Number of samples with at least one segment
// Datasets with no sample are not shown
// This is of an issue when the user asks for
// all datasets.
total : { $sum : 1 }
}
}
]);
query.forEach(function (match) {
//prettyPrint(match);
})
var query2 = db.samples.aggregate(
[
{