Commit a632d519 authored by Severine Duvaud's avatar Severine Duvaud
Browse files

Latest implementation of Beacon 0.3 + handling of datasets

parent 37b166f9
...@@ -6,4 +6,8 @@ beacon.checkResultAndGetResponse = require('./beacon-query.js').checkResultAndGe ...@@ -6,4 +6,8 @@ beacon.checkResultAndGetResponse = require('./beacon-query.js').checkResultAndGe
beacon.checkPreconditions = require('./beacon-query.js').checkPreconditions beacon.checkPreconditions = require('./beacon-query.js').checkPreconditions
beacon.buildMongoQuery = require('./beacon-query.js').buildMongoQuery beacon.buildMongoQuery = require('./beacon-query.js').buildMongoQuery
beacon.checkDatasetIdentifier = require('./beacon-dataset.js').checkDatasetIdentifier
beacon.buildMongoDatasetQuery = require('./beacon-dataset.js').buildMongoDatasetQuery
beacon.checkDatasetResultAndGetResponse = require('./beacon-dataset.js').checkDatasetResultAndGetResponse
module.exports = beacon; module.exports = beacon;
\ No newline at end of file
/**
* Created by sduvaud on 26/05/16.
*/
var info = require('./beacon-info.js').info;
// store all the dataset identifiers
var allDatasetIds = [];
info['info']['info']['datasets'].forEach(function(dataset){
allDatasetIds.push(dataset.id);
});
function checkDatasetIdentifier(params) {
if (!params.id) {
return {
hasError: true,
msg: "No identifier specified"
};
}
return {
hasError: false
};
}
function buildMongoDatasetQuery(params)
{
console.log("ID=" + params.id);
var groupCondition = {
$group: {
_id: "$ICDMORPHOLOGYCODE",
sampleCount: {$sum: 1},
name: { $first: "$ICDMORPHOLOGY"}
}
};
if (params.id == 'all') {
return [
groupCondition
];
}
else {
var orConditions = [];
var identifiers = params.id.split(','); // comma separated list of datasets
identifiers.forEach(function (id) {
orConditions.push({ICDMORPHOLOGYCODE: id});
});
return [
{
$match: {
$or: orConditions
}
},
groupCondition
];
}
}
function checkDatasetResultAndGetResponse(params, datasets) {
var matchedIdentifiers = [];
datasets.forEach(function(dataset){
matchedIdentifiers.push(dataset._id);
});
var submittedIdentifiers = allDatasetIds;
if (params.id != 'all') {
submittedIdentifiers = params.id.split(',');
}
if (submittedIdentifiers.length != matchedIdentifiers.length) {
var missingIdentifiers = [];
submittedIdentifiers.forEach(function(submitted) {
var found = false;
matchedIdentifiers.forEach(function(matched) {
if (matched == submitted) {
found = true;
}
}
);
if (!found) {
missingIdentifiers.push(submitted);
}
}
);
missingIdentifiers.forEach(function (missing) {
var json = {
"_id": missing,
"sampleCount": 0,
"name": "Dataset not found"
};
datasets.push(json);
}
);
}
return datasets;
}
module.exports.checkDatasetIdentifier = checkDatasetIdentifier;
module.exports.buildMongoDatasetQuery = buildMongoDatasetQuery;
module.exports.checkDatasetResultAndGetResponse = checkDatasetResultAndGetResponse;
\ No newline at end of file
var info = require('./beacon-info.js').info var info = require('./beacon-info.js').info;
//check https://genome.ucsc.edu/FAQ/FAQreleases.html
/*var referenceMap = {
'GRCh38': 'SEGMENTS_HG38',
'GRCh37': 'SEGMENTS_HG19'
};*/
// store all the dataset identifiers
// not used when a list of datasets is entered
// find a better place for this?
var allDatasetIds = [];
info['info']['info']['datasets'].forEach(function(dataset){
allDatasetIds.push(dataset.id);
});
var referenceMap = { var referenceMap = {
'GRCh38': 'SEGMENTS_HG38', 'GRCh38': 'SEGMENTS_HG38',
...@@ -69,6 +70,16 @@ function checkPreconditions(params) { ...@@ -69,6 +70,16 @@ function checkPreconditions(params) {
}; };
} }
if (params.datasetIds != 'all') {
var error = checkDatasetIdentifiers(params.datasetIds, allDatasetIds);
if (error.length > 0) {
return {
hasError: true,
msg: "Incorrect dataset(s): " + error.toString()
};
}
}
if (!params.alternateBases) { if (!params.alternateBases) {
return { return {
hasError: true, hasError: true,
...@@ -83,14 +94,14 @@ function checkPreconditions(params) { ...@@ -83,14 +94,14 @@ function checkPreconditions(params) {
}; };
} }
if (params.length && !Number(params.length)) { if (params.minlength && !Number(params.minlength)) {
return { return {
hasError: true, hasError: true,
msg: "min. length not a number" msg: "min. length not a number"
}; };
} }
if (params.maxLength && !Number(params.maxLength)) { if (params.maxlength && !Number(params.maxlength)) {
return { return {
hasError: true, hasError: true,
msg: "max length not a number" msg: "max length not a number"
...@@ -104,6 +115,29 @@ function checkPreconditions(params) { ...@@ -104,6 +115,29 @@ function checkPreconditions(params) {
}; };
} }
/*
* Latest version of API v-0.3
*
* BeaconAlleleRequest:
* referenceName
* start
* alternateBases
* assemblyId
* datasetIds
*
* BeaconAlleleResponse
* beaconId
* beaconAlleleRequest
* beacondatasetAlleleResponse[]
* beaconError
*
* BeaconDatasetAlleleResponse
* datasetId
* exists
* beaconError
* note
*
* */
function buildMongoQuery(params) { function buildMongoQuery(params) {
var position = parseInt(params.start); var position = parseInt(params.start);
...@@ -146,7 +180,6 @@ function buildMongoQuery(params) { ...@@ -146,7 +180,6 @@ function buildMongoQuery(params) {
//elem match element //elem match element
var condElemMatch = condition[convertedReference]['$elemMatch']; var condElemMatch = condition[convertedReference]['$elemMatch'];
if (minLength > 0) { //There is a minLength if (minLength > 0) { //There is a minLength
if (maxLength > 0) { //There is always a minLenght, if there is a max length specified (see code above) if (maxLength > 0) { //There is always a minLenght, if there is a max length specified (see code above)
condElemMatch.SEGSIZE = { $gte: minLength,$lte: maxLength } // min. and max lengths condElemMatch.SEGSIZE = { $gte: minLength,$lte: maxLength } // min. and max lengths
...@@ -155,7 +188,7 @@ function buildMongoQuery(params) { ...@@ -155,7 +188,7 @@ function buildMongoQuery(params) {
condElemMatch.SEGSIZE = { $gte: minLength }; // min. length condElemMatch.SEGSIZE = { $gte: minLength }; // min. length
condElemMatch.SEGSTART = position; // exact position condElemMatch.SEGSTART = position; // exact position
} }
}else { //If there is a position specifiec }else { //If there is a position specific
condElemMatch.SEGSTOP = { $gte: position }; condElemMatch.SEGSTOP = { $gte: position };
condElemMatch.SEGSTART = { $lte: position }; condElemMatch.SEGSTART = { $lte: position };
} }
...@@ -165,66 +198,106 @@ function buildMongoQuery(params) { ...@@ -165,66 +198,106 @@ function buildMongoQuery(params) {
andConditions.push({$or: orConditions}); andConditions.push({$or: orConditions});
} }
return { return [
$and: andConditions {
}; "$match": {
"$and": andConditions
}
},
{
"$group": {
_id: "$ICDMORPHOLOGYCODE",
// Number of samples with at least one segment
// Datasets with no sample are not shown
// This is of an issue when the user asks for
// all datasets.
observed : { $sum : 1 }
}
}
];
} }
function checkResultAndGetResponse(params, samples, countTotal) { function checkResultAndGetResponse(params, datasets) {
var responseResource = { var length = 0;
"exists": null, if (typeof params.minlength != undefined && params.minlength != null && params.minlength != '')
"error": null, {
"note": null length = parseInt(params.minlength);
if (params.alternateBases == 'DUP') {
responseResource.note = "Length provided but not considered when querying DUP";
}
} }
if (samples && samples.length > 0) { // a value was found by mongodb var maxLength = 0;
if (typeof params.maxlength != undefined && params.maxlength != null && params.maxlength != '')
{
maxLength = parseInt(params.maxlength);
}
// SD, May 2016: according to the specs, observed parameter was removed from BeaconDatasetAlleleResponse. var responses = [];
// We could divide the sample.length by the total number of samples in order to fill the frequency parameters
// for instance (and if of interest for arrayMap).
// responseResource.observed = samples.length;
responseResource.NOT_BEACON_numberMatchedSamples = samples.length;
var matchedSegments = samples.map(function (s) { return checkResult(params, s)}); // The query returns only datasets for which there is at least one sample with at least one matching SEGMENT...
// What should we do with "unmatched" datasets?
// Should we show them, as well with an exists: false?
// what about cases where all the datasets are requested?
if (!matchedSegments || !matchedSegments[0] || !matchedSegments[0].matchedSegment) { if (datasets && datasets.length > 0)
responseResource.exists = null; {
// BeaconError instead of simple string // var matchedDatasets = datasets.map(function (s) { return checkResult(params, s)}); // checks to be added!
// (error is not null if and only of exists is null) var matchedDatasets = checkResult(params, datasets);
responseResource.error = { var response;
errorCode: 500,
message: "Internal error, DB returned a value but post check is not valid." if (typeof matchedDatasets == undefined && matchedDatasets == null) {
response = {
"exists": null,
"datasetId": null,
"sampleCount": 0,
"error": {
errorCode: 500,
message: "Internal error, DB returned a value but post check is not valid."
},
"note": null
}; };
} else { responses.push(response);
responseResource.exists = true; }else {
responseResource.NOT_BEACON_ARRAYMAP_DEBUG_INFO = {"matchedSegments" : matchedSegments}; matchedDatasets.forEach(function (dataset) {
if (dataset.observed == 0) {
response = {
"exists": false,
"error": null,
"note": null,
"datasetId": dataset._id,
"sampleCount": 0
};
}else {
response = {
"exists": true,
"error": null,
"note": null,
"datasetId": dataset._id,
"sampleCount": dataset.observed
};
}
responses.push(response);
})
} }
} }else {
else {
response = {
"exists": false,
"datasetId": null,
"sampleCount": 0,
"error": null,
};
if (!alternateBasesMap[params.alternateBases]) { if (!alternateBasesMap[params.alternateBases]) {
responseResource.exists = false; response.note = "Type of variant not supported by arrayMap.";
responseResource.note = "Type of variant not supported by arrayMap.";
} }
else { else {
responseResource.exists = false; response.note = "No result from arrayMap.";
responseResource.note = "No result from arrayMap.";
} }
}
var length = 0; responses.push(response);
if (typeof params.length != undefined && params.length != null && params.length != '')
{
length = parseInt(params.length);
if (params.alternateBases == 'DUP') {
responseResource.note = "Length provided but not considered when querying DUP";
}
}
var maxLength = 0;
if (typeof params.maxLength != undefined && params.maxLength != null && params.maxLength != '')
{
maxLength = parseInt(params.maxLength);
} }
var queryResource = { var queryResource = {
...@@ -234,45 +307,64 @@ function checkResultAndGetResponse(params, samples, countTotal) { ...@@ -234,45 +307,64 @@ function checkResultAndGetResponse(params, samples, countTotal) {
"datasetIds": params.datasetIds, "datasetIds": params.datasetIds,
"alternateBases": params.alternateBases, "alternateBases": params.alternateBases,
"length": length, "length": length,
"maxLength": maxLength "maxlength": maxLength
}; };
// BeaconAlleleResponse
return { return {
"beacon_id": info.id, "beaconId": info['info']['info']['id'],
"response": responseResource, "datasetAlleleResponses": responses,
"query": queryResource "alleleRequest": queryResource,
"error": null
}; };
} }
function checkResult(params, sample) { // checks to be added!
// Pending for now!
function checkResult(params, datasets) {
var convertedReference = referenceMap[params.assemblyId || defaultReference]; var allDatasets = datasets;
var expectedSegType = alternateBasesMap[params.alternateBases]
var position = parseInt(params.start);
//TODO should check for datasets and genome reference as well var matchedIdentifiers = [];
var foundSegment; datasets.forEach(function(dataset){
matchedIdentifiers.push(dataset._id);
});
sample[convertedReference].forEach(function (segment) { var submittedIdentifiers = allDatasetIds;
if (segment.CHRO === params.referenceName) { if (params.datasetIds != 'all') {
submittedIdentifiers = params.datasetIds.split(',');
}
var posStart = parseInt(segment.SEGSTART); var missedIdentifiers = checkDatasetIdentifiers(submittedIdentifiers.toString(), matchedIdentifiers);
var posStop = parseInt(segment.SEGSTOP); if (missedIdentifiers != null) {
var segType = parseInt(segment.SEGTYPE); missedIdentifiers.forEach(function (missed) {
var missedObj = {
"_id": missed,
"observed": 0
}
allDatasets.push(missedObj);
});
}
return allDatasets;
}
if ((segType == expectedSegType) && (posStart <= position) && (position <= posStop)) { function checkDatasetIdentifiers(userDatasetIdentifiers, allDatasetIdentifiers) {
foundSegment = segment; var identifiers = userDatasetIdentifiers.split(','); // comma separated list of datasets
return; //exit loop var found;
var error = [];
identifiers.forEach(function (id) {
found = false;
allDatasetIdentifiers.forEach(function (all) {
if (all == id) {
found = true;
} }
})
if (!found) {
error.push(id);
} }
}) });
var result = { return error;
matchedSampleUID: sample.UID,
matchedDataSet: sample.ICDMORPHOLOGYCODE,
matchedSegment: foundSegment
}
return result;
} }
module.exports.checkPreconditions = checkPreconditions; module.exports.checkPreconditions = checkPreconditions;
......
...@@ -25,7 +25,12 @@ arrayMap.controller('BeaconController', ['$scope', '$location', function ($scope ...@@ -25,7 +25,12 @@ arrayMap.controller('BeaconController', ['$scope', '$location', function ($scope
"alternateBases": "DEL", "alternateBases": "DEL",
"minlength": '', "minlength": '',
"maxlength": '' "maxlength": ''
} };
$scope.datasetConfig = {
"id": "all"
};
$scope.getNewApiUrl = function () { $scope.getNewApiUrl = function () {
...@@ -45,6 +50,13 @@ arrayMap.controller('BeaconController', ['$scope', '$location', function ($scope ...@@ -45,6 +50,13 @@ arrayMap.controller('BeaconController', ['$scope', '$location', function ($scope
return $location.absUrl() + "info" return $location.absUrl() + "info"
}; };
$scope.getDatasetUrl = function () {
var conf = $scope.datasetConfig;
return $location.absUrl() + "v0.3/dataset?" +
"id=" + conf.id;
};
$scope.getApiUrl = function () { $scope.getApiUrl = function () {
var conf = $scope.config; var conf = $scope.config;
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
<h1>Beacon ArrayMap</h1> <h1>Beacon ArrayMap</h1>
<p>First Prototype of a <a target="_blank" href="https://genomicsandhealth.org/work-products-demonstration-projects/beacon-project-0">Beacon</a> <p>First Prototype of a <a target="_blank" href="https://genomicsandhealth.org/work-products-demonstration-projects/beacon-project-0">Beacon</a>
<a target="_blank" href="https://docs.google.com/document/d/1n7qtCBFwsExP_k2GPfWS_PN53Xeh8YxeCBOgyZ5itOw/edit#"> v0.3</a> implementation for <a target="_blank" href="http://arraymap.org/">ArrayMap</a>.</p> <a target="_blank" href="https://docs.google.com/document/d/1n7qtCBFwsExP_k2GPfWS_PN53Xeh8YxeCBOgyZ5itOw/edit#"> v0.3</a> implementation for <a target="_blank" href="http://arraymap.org/">ArrayMap</a>.</p>
<p>See <a href="documentation">documentation and open questions (OUTDATED)</a>.</p>
</div> </div>
<form class="form-horizontal"> <form class="form-horizontal">
<div class="form-group"> <div class="form-group">
...@@ -86,5 +85,10 @@ ...@@ -86,5 +85,10 @@
</div> </div>
</div> </div>
<div class="form-group">
<div class="col-sm-offset-2 col-sm-8">
<a target="beacon" ng-href="{{getDatasetUrl()}}">{{getDatasetUrl()}}</a>
</div>
</div>
</form> </form>
</div> </div>
\ No newline at end of file
...@@ -73,26 +73,36 @@ router.get('/v0.3/query/', function (req, res) { ...@@ -73,26 +73,36 @@ router.get('/v0.3/query/', function (req, res) {
res.json(preconditions.msg); //Gets error messages res.json(preconditions.msg); //Gets error messages
return; return;
} }
var mongoQuery = beacon.buildMongoQuery(req.query); var mongoQuery = beacon.buildMongoQuery(req.query);
console.log("Building MongoDB query params: " + JSON.stringify(mongoQuery)); console.log("Building MongoDB query params: " + JSON.stringify(mongoQuery));
// Count: list of datasets (and not 1 dataset) req.db.samples.aggregate(mongoQuery, function(err, docs) {
var orConditions = []; var response = beacon.checkResultAndGetResponse(req.query, docs);
var identifiers = req.query.datasetIds.split(','); // comma separated list of datasets res.json(response);
identifiers.forEach(function (id) {