Commit 40dadf5f authored by Robin Engler's avatar Robin Engler
Browse files

Add support for scored markers

parent bb2cd129
...@@ -18,11 +18,13 @@ AUTHORIZED_TISSUES <<- c('stroma', 'tumor', 'dermis', 'epidermis', 'melano ...@@ -18,11 +18,13 @@ AUTHORIZED_TISSUES <<- c('stroma', 'tumor', 'dermis', 'epidermis', 'melano
AUTHORIZED_COMPARTMENTS <<- c('nucleus', 'membrane', 'cytoplasm', 'entire_cell') AUTHORIZED_COMPARTMENTS <<- c('nucleus', 'membrane', 'cytoplasm', 'entire_cell')
AUTHORIZED_STROMA_VALUES <<- c('DAPI', 'stroma', 'other') AUTHORIZED_STROMA_VALUES <<- c('DAPI', 'stroma', 'other')
AUTHORIZED_TUMOR_VALUES <<- c('CK', 'tumor') AUTHORIZED_TUMOR_VALUES <<- c('CK', 'tumor')
AUTHORIZED_MARKERS <<- c('CAL', 'CD3', 'CD4', 'CD8', 'CD11C', 'CD15', 'CD20', 'CD56', 'CD68', AUTHORIZED_MARKERS <<- c('CAL', 'CD3', 'CD4', 'CD8', 'CD11c', 'CD15', 'CD20', 'CD56', 'CD68',
'CD103', 'CD163', 'CD206', 'FOXP3', 'GB', 'gH2AX', 'gH2AXN', 'IDO', 'CD103', 'CD163', 'CD206', 'FOXP3', 'GB', 'gH2AX', 'gH2AXN', 'IDO',
'IL10R', 'Keratin', 'KI67', 'PD1', 'PDL1', 'PERFORIN', 'SOX10', 'IL10R', 'Keratin', 'KI67', 'PD1', 'PDL1', 'PERFORIN', 'SOX10',
'WT1', 'CK', 'VISTA') 'WT1', 'CK', 'VISTA')
IGNORED_PHENOTYPES <<- c('DAPIp', 'MISSING') IGNORED_PHENOTYPES <<- c('DAPIp', 'MISSING')
NO_PHENOTYPE <<- 'MISSING'
NO_PHENOTYPE_SYNONYMS <<- c('ARTEFACT', 'ARTEFACTS', 'OTHER')
DATAREDUCE_SCRIPT <<- file.path(dirname(dirname(sys.frame(1)$ofile)), DATAREDUCE_SCRIPT <<- file.path(dirname(dirname(sys.frame(1)$ofile)),
'inst/bash/reduce_file_size.sh') 'inst/bash/reduce_file_size.sh')
......
This diff is collapsed.
...@@ -131,19 +131,19 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -131,19 +131,19 @@ merge_cell_data_files <- function(files_to_merge){
if(as.numeric(value_frequency[1]) > as.numeric(value_frequency[2])){ if(as.numeric(value_frequency[1]) > as.numeric(value_frequency[2])){
tissue_cat_df[x,] = names(value_frequency)[1] tissue_cat_df[x,] = names(value_frequency)[1]
if(SHOW_TISSUE_CATEGORY_MISMATCH_WARNING) raise_error( if(SHOW_TISSUE_CATEGORY_MISMATCH_WARNING) raise_error(
msg = c(paste0('Tissue_category values differ across files. ', msg = c(paste0('[tissue_category] values differ across files. ',
'Values were reconciled based on majority ruling.'), 'Values were reconciled based on majority ruling.'),
paste0('Offending row: ', x)), paste0('Offending row: ', x)),
file=files_to_merge[1], file = files_to_merge[1],
type = 'warning') type = 'warning')
# Case 2: majority ruling is not possible # Case 2: majority ruling is not possible.
} else{ } else{
raise_error( raise_error(
msg = c('Could not merge individual marker files.', msg = c('Could not merge individual marker files.',
'Reason: tissue_category values differ across files with no majority.', 'Reason: tissue_category values differ across files with no majority.',
paste0('Offending row: ', x), paste0('Offending row: ', x),
paste0('Offending values:', paste(tissue_cat_df[x,], collapse=' '))), paste0('Offending values: ', paste(tissue_cat_df[x,], collapse=' '))),
file = files_to_merge[1]) file = files_to_merge[1])
} }
} }
......
...@@ -36,6 +36,8 @@ inputdir_check <- function(input_dir, output_dir){ ...@@ -36,6 +36,8 @@ inputdir_check <- function(input_dir, output_dir){
# Verify that input parameters and sample rename files are present, and if needed rename them. # Verify that input parameters and sample rename files are present, and if needed rename them.
rename_file_by_pattern(file_name=PARAMETERS_FILE, pattern='param', dir_name=input_dir, rename_file_by_pattern(file_name=PARAMETERS_FILE, pattern='param', dir_name=input_dir,
out_dir=output_dir, raise_error_if_absent=TRUE) out_dir=output_dir, raise_error_if_absent=TRUE)
rename_file_by_pattern(file_name=THRESHOLDS_FILE, pattern='threshold', dir_name=input_dir,
out_dir=output_dir, raise_error_if_absent=FALSE)
rename_file_by_pattern(file_name=SAMPLE_RENAME_FILE, pattern='rename', dir_name=input_dir, rename_file_by_pattern(file_name=SAMPLE_RENAME_FILE, pattern='rename', dir_name=input_dir,
out_dir=output_dir, raise_error_if_absent=FALSE) out_dir=output_dir, raise_error_if_absent=FALSE)
return(invisible(NULL)) return(invisible(NULL))
...@@ -104,7 +106,6 @@ standardize_and_split_cell_data <- function(input_file, ...@@ -104,7 +106,6 @@ standardize_and_split_cell_data <- function(input_file,
phenotype_confidence_threshold, phenotype_confidence_threshold,
delete_input_file = FALSE){ delete_input_file = FALSE){
# ******************************************************************************************** # ********************************************************************************************
#
# Differences between inForm versions: # Differences between inForm versions:
# version 2.2 # version 2.2
# - *_tissue_seg_data_summary.txt files contain a column named "Region Area (pixels)". # - *_tissue_seg_data_summary.txt files contain a column named "Region Area (pixels)".
...@@ -113,7 +114,7 @@ standardize_and_split_cell_data <- function(input_file, ...@@ -113,7 +114,7 @@ standardize_and_split_cell_data <- function(input_file,
# - a new column named "Annotation ID" is added to *_cell_seg_data.txt. # - a new column named "Annotation ID" is added to *_cell_seg_data.txt.
# - *_tissue_seg_data_summary.txt files contain column named "Region Area (square microns)" # - *_tissue_seg_data_summary.txt files contain column named "Region Area (square microns)"
# - in addition the "Sample ID" column of the *_cell_seg_data.txt files no longer contains # - in addition the "Sample ID" column of the *_cell_seg_data.txt files no longer contains
# the imageID values. These are now present in the "Annotation ID" column. # the image ID values. These are now present in the "Annotation ID" column.
# #
# #
# Input arguments: # Input arguments:
...@@ -125,7 +126,7 @@ standardize_and_split_cell_data <- function(input_file, ...@@ -125,7 +126,7 @@ standardize_and_split_cell_data <- function(input_file,
# delete_input_file: if TRUE, the input_file is deleted after it was split by samples. # delete_input_file: if TRUE, the input_file is deleted after it was split by samples.
# ******************************************************************************************** # ********************************************************************************************
# Load input table. Verifty it is not empty and standardize the column names. # Load input table. Verify it is not empty and standardize the column names.
input_table = read.table(input_file, sep='\t', as.is=T, header=T, input_table = read.table(input_file, sep='\t', as.is=T, header=T,
colClasses='character', check.names=T, strip.white=T) colClasses='character', check.names=T, strip.white=T)
if(nrow(input_table) == 0) raise_error('Input file has zero rows.', file = input_file) if(nrow(input_table) == 0) raise_error('Input file has zero rows.', file = input_file)
...@@ -148,7 +149,10 @@ standardize_and_split_cell_data <- function(input_file, ...@@ -148,7 +149,10 @@ standardize_and_split_cell_data <- function(input_file,
sample_names = extract_sample_name(input_table[,'sample_name'], input_file=input_file) sample_names = extract_sample_name(input_table[,'sample_name'], input_file=input_file)
input_table = input_table[sample_names %in% samples,] input_table = input_table[sample_names %in% samples,]
sample_names = sample_names[sample_names %in% samples] sample_names = sample_names[sample_names %in% samples]
if(nrow(input_table) == 0) raise_error('Input file has zero rows.', file = input_file) if(nrow(input_table) == 0) raise_error(
msg = 'No matching values found in [sample_name] column for any of the input samples',
file = input_file,
items_to_list = samples)
# Extract image ID values. If the "annotation_id" column is present (inForm 2.4), extract # Extract image ID values. If the "annotation_id" column is present (inForm 2.4), extract
...@@ -173,7 +177,7 @@ standardize_and_split_cell_data <- function(input_file, ...@@ -173,7 +177,7 @@ standardize_and_split_cell_data <- function(input_file,
file_values_are_from = input_file) file_values_are_from = input_file)
# Reclass 'phenotype_values' for rows where confidence < phenotype_confidence_threshold # Re-class 'phenotype_values' for rows where confidence < phenotype_confidence_threshold
# to the value of 'MISSING'. # to the value of 'MISSING'.
phenotype_values[which(confidence_values < phenotype_confidence_threshold)] = 'MISSING' phenotype_values[which(confidence_values < phenotype_confidence_threshold)] = 'MISSING'
...@@ -352,17 +356,17 @@ standardize_and_split_tissue_data <- function(input_file, ...@@ -352,17 +356,17 @@ standardize_and_split_tissue_data <- function(input_file,
#################################################################################################### ####################################################################################################
#' Standardize column names of input files.
#'
#' @param column_names [string vector] Names of columns to standardize.
#' @param input_file [string] Path and name of file from which the columns were taken. Only used to
#' display an error message.
#' @return Standardized column names.
standardize_column_names = function(column_names, input_file){ standardize_column_names = function(column_names, input_file){
# ********************************************************************************************
#
# Input arguments:
# - column_names: string vector. Names of columns to standardize.
# - input_file: file name from which the columns were taken. Only used to display an error
# message.
# ********************************************************************************************
# Replace any '.' in column names by an '_'. The '.' are generally introduced in column names # Replace any '.' in column names by an '_'. The '.' are generally introduced in column names
# by R as a replacement of a non-authorized character such as a blank space or a bracket. # by R as a replacement of a non-authorized character such as a blank space or a bracket.
# For readability, multiple '.' are replaced by a single '_'.
column_names = gsub(pattern='\\.+', replacement='_', x=column_names) column_names = gsub(pattern='\\.+', replacement='_', x=column_names)
column_names = gsub(pattern='_+$', replacement='', x=column_names) column_names = gsub(pattern='_+$', replacement='', x=column_names)
...@@ -382,10 +386,18 @@ standardize_column_names = function(column_names, input_file){ ...@@ -382,10 +386,18 @@ standardize_column_names = function(column_names, input_file){
for(i in grep(paste0(col_start_regexp, '.*_mean_.*'), x=column_names)){ for(i in grep(paste0(col_start_regexp, '.*_mean_.*'), x=column_names)){
marker_name = sub(col_start_regexp, '', column_names[i]) marker_name = sub(col_start_regexp, '', column_names[i])
marker_name = sub('_.*$', '', marker_name) marker_name = sub('_.*$', '', marker_name)
# If the marker is present in the AUTHORIZED_MARKERS list, correct its capitalization if
# needed.
x = which(toupper(marker_name) == toupper(AUTHORIZED_MARKERS))
stopifnot(length(x) <= 1)
marker_name = ifelse(length(x) == 0, marker_name, AUTHORIZED_MARKERS[x])
# Rename column.
column_names[i] = paste0(marker_name, '_mean') column_names[i] = paste0(marker_name, '_mean')
} }
# Verify there is no duplicate column. # Verify there are no duplicate columns.
duplicated_columns = which(duplicated(column_names)) duplicated_columns = which(duplicated(column_names))
if(length(duplicated_columns) > 0) raise_error( if(length(duplicated_columns) > 0) raise_error(
msg = 'Duplicated column names found in input file:', msg = 'Duplicated column names found in input file:',
...@@ -454,6 +466,8 @@ check_and_fix_phenotype_values <- function(phenotype_values, ...@@ -454,6 +466,8 @@ check_and_fix_phenotype_values <- function(phenotype_values,
type = 'warning') type = 'warning')
} }
# Replace
# Substitute '-' with '_' in Phenotype values. This is for the case where a '-' was used as # Substitute '-' with '_' in Phenotype values. This is for the case where a '-' was used as
# separator value instead of a '_'. # separator value instead of a '_'.
phenotype_values = gsub(pattern='-', replacement='_', phenotype_values) phenotype_values = gsub(pattern='-', replacement='_', phenotype_values)
...@@ -473,11 +487,15 @@ check_and_fix_phenotype_values <- function(phenotype_values, ...@@ -473,11 +487,15 @@ check_and_fix_phenotype_values <- function(phenotype_values,
ignored = IGNORED_PHENOTYPES)) == 0) return(phenotype_values) ignored = IGNORED_PHENOTYPES)) == 0) return(phenotype_values)
# Replace accepted NO_PHENOTYPE synonym values with NO_PHENOTYPE.
phenotype_values[which(toupper(phenotype_values) %in%
toupper(c(NO_PHENOTYPE, NO_PHENOTYPE_SYNONYMS)))] = NO_PHENOTYPE
# Replace accepted stroma and tumor synonyms with 'DAPIp' and 'CKp' respectively. # Replace accepted stroma and tumor synonyms with 'DAPIp' and 'CKp' respectively.
for(x in 1:2) phenotype_values = gsub( for(x in 1:2) phenotype_values = gsub(
pattern=paste0(rep(switch(x, AUTHORIZED_STROMA_VALUES, AUTHORIZED_TUMOR_VALUES), each=2), pattern=paste0(rep(switch(x, AUTHORIZED_STROMA_VALUES, AUTHORIZED_TUMOR_VALUES), each=2),
c('', 'p'), collapse='|'), c('', 'p'), collapse='|'),
replacement=switch(x,'DAPIp','CKp'), x=phenotype_values, ignore.case=TRUE) replacement=switch(x, 'DAPIp', 'CKp'), x=phenotype_values, ignore.case=TRUE)
# Correct capitalization and add a 'p' suffix (for 'positive') to any marker missing it. # Correct capitalization and add a 'p' suffix (for 'positive') to any marker missing it.
...@@ -678,15 +696,13 @@ check_marker_is_authorized <- function(marker_list, marker_type){ ...@@ -678,15 +696,13 @@ check_marker_is_authorized <- function(marker_list, marker_type){
#################################################################################################### ####################################################################################################
#' Test whether a file contains "individual marker" values. This is simply done by looking at the
#' file name. The convention is that individual marker files will contain at least one marker name
#' in their file name.
#' The function returns the list of markers in the file name, or character(0) if none is found.
markers_in_file_name <- function(file_name){ markers_in_file_name <- function(file_name){
# ********************************************************************************************
# Test whether a file contains "individual marker" values. This is simply done by looking at
# the file name. The convention is that individual marker files will contain at least one
# marker name in their file name.
# The function returns the list of markers in the file name, or character(0) if none is found.
# ********************************************************************************************
return(sort(as.character(names(unlist( return(sort(as.character(names(unlist(
sapply(AUTHORIZED_MARKERS, FUN=function(x) grep(x, file_name, ignore.case=T))))))) sapply(AUTHORIZED_MARKERS, FUN=function(x) grep(x, basename(file_name), ignore.case=T)))))))
} }
#################################################################################################### ####################################################################################################
......
...@@ -53,12 +53,16 @@ generate_parameter_file_from_legacy_input <- function(session_root_dir){ ...@@ -53,12 +53,16 @@ generate_parameter_file_from_legacy_input <- function(session_root_dir){
} else scored_markers = c(scored_markers, marker) } else scored_markers = c(scored_markers, marker)
} }
# Rename thresholds file. # Rename thresholds file, or delete it if there are no scored markers.
file.rename(threshold_file, file.path(session_root_dir, THRESHOLDS_FILE)) if(length(scored_markers) > 0){
file.rename(threshold_file, file.path(session_root_dir, THRESHOLDS_FILE))
} else{
file.remove(threshold_file)
}
# Create new "parameters.txt" file. # Create new "parameters.txt" file.
file_connection = file(file.path(session_root_dir, "parameters.txt"), open="w") file_connection = file(file.path(session_root_dir, "parameters.txt"), open='w')
writeLines(paste0("# Auto-generated parameter file for ", basename(session_root_dir), "."), writeLines(paste0("# Auto-generated parameter file for ", basename(session_root_dir), '.'),
con=file_connection) con=file_connection)
writeLines("samples:", con=file_connection) writeLines("samples:", con=file_connection)
for(x in samples) writeLines(x, con=file_connection) for(x in samples) writeLines(x, con=file_connection)
......
...@@ -31,7 +31,7 @@ load_session_parameters <- function(session_root_dir){ ...@@ -31,7 +31,7 @@ load_session_parameters <- function(session_root_dir){
# ************ # ************
# Check that at least one tissue type was provided and that all tissues are part of the # Check that at least one tissue type was provided and that all tissues are part of the
# authorized list. # authorized list.
if(! 'tissues' %in% names(arg_values) ) raise_error(msg="Parameter 'tissues' is missing.", if(! 'tissues' %in% names(arg_values)) raise_error(msg="Parameter 'tissues' is missing.",
file = PARAMETERS_FILE) file = PARAMETERS_FILE)
arg_values[['tissues']] = tolower(arg_values[['tissues']]) arg_values[['tissues']] = tolower(arg_values[['tissues']])
tissues = unique(arg_values[['tissues']]) tissues = unique(arg_values[['tissues']])
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#' @return nothing. #' @return nothing.
#' @examples #' @examples
#' postinform(input_file_or_dir=input_file, delete_input=FALSE, immucan_output=FALSE) #' postinform(input_file_or_dir=input_file, delete_input=FALSE, immucan_output=FALSE)
#'
postinform <- function(input_file_or_dir, postinform <- function(input_file_or_dir,
command = 'process', command = 'process',
output_suffix = '', output_suffix = '',
...@@ -106,8 +105,8 @@ postinform <- function(input_file_or_dir, ...@@ -106,8 +105,8 @@ postinform <- function(input_file_or_dir,
# Run the Post-inForm pipeline # Run the post-inForm pipeline
# ************************** # ****************************
log_message(paste(rep('#', 80), collapse=''), padding='') log_message(paste(rep('#', 80), collapse=''), padding='')
log_message(paste0('Starting Post-inForm - version ', POSTINFORM_VERSION)) log_message(paste0('Starting Post-inForm - version ', POSTINFORM_VERSION))
log_message('**********************************') log_message('**********************************')
...@@ -178,8 +177,6 @@ postinform_pipeline <- function(input_dir, ...@@ -178,8 +177,6 @@ postinform_pipeline <- function(input_dir,
# - search for cell and tissue segmentation files in sub-directories or the session root. # - search for cell and tissue segmentation files in sub-directories or the session root.
log_message('Input data check:') log_message('Input data check:')
log_message(paste('input directory:', input_dir), level=2) log_message(paste('input directory:', input_dir), level=2)
#delete_unnecessary_files(input_dir)
inputdir_check(input_dir, output_dir) inputdir_check(input_dir, output_dir)
log_message('input dir check: OK', level=2) log_message('input dir check: OK', level=2)
...@@ -406,9 +403,9 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -406,9 +403,9 @@ run_sample <- function(sample_name, input_parameters){
thresholds = input_parameters$thresholds thresholds = input_parameters$thresholds
# Keep only the thresholds for the current sample. # Keep only the thresholds for the current sample.
thresholds = thresholds[which(thresholds[,'sample_name'] == sample_name), ] thresholds = thresholds[which(thresholds[,'sample_name'] == sample_name), ]
thresholds_subset = as.matrix(thresholds[sample_name,,]) #thresholds_subset = as.matrix(thresholds[sample_name,,])
rownames(thresholds_subset) = dimnames(thresholds)[[2]] #rownames(thresholds_subset) = dimnames(thresholds)[[2]]
colnames(thresholds_subset) = dimnames(thresholds)[[3]] #colnames(thresholds_subset) = dimnames(thresholds)[[3]]
} else thresholds = NULL } else thresholds = NULL
...@@ -434,7 +431,7 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -434,7 +431,7 @@ run_sample <- function(sample_name, input_parameters){
# Load cell segmentation data for current sample. # Load cell segmentation data for current sample.
# ********************************************** # **********************************************
# Load data and convert marker intensity colums to numeric values. # Load data and convert marker intensity columns to numeric values.
cell_table = read.table(file=file.path(session_root_dir, sample_name, cell_table = read.table(file=file.path(session_root_dir, sample_name,
paste0(sample_name, CELL_FILES_EXTENSION)), paste0(sample_name, CELL_FILES_EXTENSION)),
sep='\t', as.is=T, h=T, sep='\t', as.is=T, h=T,
...@@ -462,17 +459,25 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -462,17 +459,25 @@ run_sample <- function(sample_name, input_parameters){
# Reclassify cells based on marker intensity (scoring) or phenotype data (phenotyping). # Reclassify cells based on marker intensity (scoring) or phenotype data (phenotyping).
# ************************************************************************************ # ************************************************************************************
# For each marker, a cell is reclassifed as either 1 (positive for marker) or 0 (negative). # For each marker, a cell is reclassified as either 1 (positive for marker) or 0 (negative).
# Reclassification is done through phenotyping for markers part of the 'markers_phenotyped' # Reclassification is done through phenotyping for markers in the 'markers_phenotyped' list,
# list, and by scoring for markers part of the 'markers_scored' list. # and by scoring for markers in the 'markers_scored' list.
tmp = sapply(c(markers_phenotyped, markers_scored), tmp = NULL
function(x) reclass_cells_by_marker( # Reclass phenotyped markers, if any.
marker = x, if(length(markers_phenotyped) > 0) tmp = sapply(
cell_values = switch(ifelse(x %in% markers_phenotyped, 1, 2), markers_phenotyped,
cell_table[,'phenotype'], FUN = function(marker) reclass_cells_by_marker_phenotyped(marker, cell_table[,'phenotype']))
cell_table[,paste0(x,'_mean')]),
thresholds = thresholds)) # Reclass scored markers, if any.
colnames(tmp) = paste(colnames(tmp), '_reclassified', sep='') if(length(markers_scored) > 0) tmp = cbind(tmp, sapply(
markers_scored,
FUN = function(marker) reclass_cells_by_marker_scored(marker,
cell_table[,paste0(marker,'_mean')],
cell_table[,'tissue_category'],
thresholds)))
stopifnot(ncol(tmp) == length(c(markers_phenotyped, markers_scored)))
colnames(tmp) = paste0(colnames(tmp), '_reclassified')
cell_table = cbind(cell_table, tmp) cell_table = cbind(cell_table, tmp)
...@@ -493,12 +498,9 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -493,12 +498,9 @@ run_sample <- function(sample_name, input_parameters){
# and standard deviation values, for each tissue type, cell type and image ID combination. # and standard deviation values, for each tissue type, cell type and image ID combination.
# Note: I timed this loop with both for() and lapply(), and for() was a few seconds faster. # Note: I timed this loop with both for() and lapply(), and for() was a few seconds faster.
stat_table = NULL stat_table = NULL
for(image_id in c('Total',image_ids)){ for(image_id in c('Total', image_ids)){
for(tissue_type in tissue_list){ for(tissue_type in tissue_list) stat_table = rbind(
stat_table = rbind(stat_table, stat_table, cell_type_statistics(image_id, tissue_type, cell_types, cell_table))
cell_type_statistics(cell_type=cell_types, tissue_type=tissue_type,
image_id=image_id, cell_table=cell_table))
}
} }
stopifnot(nrow(stat_table) == nrow(summary_table)) stopifnot(nrow(stat_table) == nrow(summary_table))
stopifnot(stat_table$ImageID == summary_table$ImageID) stopifnot(stat_table$ImageID == summary_table$ImageID)
...@@ -514,7 +516,8 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -514,7 +516,8 @@ run_sample <- function(sample_name, input_parameters){
# "CD11cp_total". # "CD11cp_total".
row_id = which(summary_table[,'ImageID'] == summary_table[x,'ImageID'] & row_id = which(summary_table[,'ImageID'] == summary_table[x,'ImageID'] &
summary_table[,'TissueType'] == summary_table[x,'TissueType'] & summary_table[,'TissueType'] == summary_table[x,'TissueType'] &
summary_table[,'CellType'] == paste0(summary_table[x,'CellType'], '_total')) summary_table[,'CellType'] == paste0(summary_table[x,'CellType'],
'_total'))
# exactly one match should be found. # exactly one match should be found.
stopifnot(length(row_id) == 1) stopifnot(length(row_id) == 1)
# "_total" cell type must be >= the cell count of the regular cell type. # "_total" cell type must be >= the cell count of the regular cell type.
...@@ -530,23 +533,23 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -530,23 +533,23 @@ run_sample <- function(sample_name, input_parameters){
# millimeters. # millimeters.
for(image_id in c('Total', image_ids)){ for(image_id in c('Total', image_ids)){
for(tissue in tissue_list){ for(tissue in tissue_list){
row_id = which(summary_table$ImageID == image_id & summary_table$TissueType == tissue) row_id = which(summary_table$ImageID == image_id & summary_table$TissueType == tissue)
row_total = which(summary_table$ImageID == image_id & row_total = which(summary_table$ImageID == image_id &
summary_table$TissueType == tissue & summary_table$TissueType == tissue &
summary_table$CellType == 'Total') summary_table$CellType == 'Total')
# Compute surface in pixels for each cell type. # Compute surface in pixels for each cell type.
cell_type_surface_as_proportion = switch((summary_table[row_total, 'CellCount'] == 0) + 1, cell_type_surface_as_proportion = switch((summary_table[row_total, 'CellCount'] == 0)+1,
summary_table[row_id,'CellCount'] / summary_table[row_id,'CellCount'] /
summary_table[row_total,'CellCount'], 0) summary_table[row_total,'CellCount'], 0)
total_tissue_surface = summary_table[row_total, 'SurfaceMM2'] total_tissue_surface = summary_table[row_total, 'SurfaceMM2']
summary_table[row_id, 'SurfaceMM2'] = round(cell_type_surface_as_proportion * summary_table[row_id, 'SurfaceMM2'] = round(cell_type_surface_as_proportion *
total_tissue_surface) total_tissue_surface)
# Compute cell density for the entire tissue for each cell type. # Compute cell density for the entire tissue for each cell type.
summary_table[row_id, 'CellDensity'] = switch((total_tissue_surface == 0) + 1, summary_table[row_id, 'CellDensity'] = switch((total_tissue_surface == 0) + 1,
round(summary_table[row_id,'CellCount'] / round(summary_table[row_id,'CellCount'] /
total_tissue_surface * 1e+06, 3), 0) total_tissue_surface*1e+06, 3), 0)
} }
} }
# Verify no NA value is left in the surface and cell density columns of the summary table. # Verify no NA value is left in the surface and cell density columns of the summary table.
...@@ -560,14 +563,14 @@ run_sample <- function(sample_name, input_parameters){ ...@@ -560,14 +563,14 @@ run_sample <- function(sample_name, input_parameters){
# Compute cell count statistics. # Compute cell count statistics.
# ****************************** # ******************************
# Compute a number of statistics for the cell counts of each cell type accross the image # Compute a number of statistics for the cell counts of each cell type across the image
# subsets of the sample. The statistics we compute are the following: # subsets of the sample. The statistics we compute are the following:
# - Total cell count accross all image subsets (this is also available in the summary_table). # - Total cell count across all image subsets (this is also available in the summary_table).
# - Mean value of cell counts accross all image subsets. # - Mean value of cell counts across all image subsets.
# - Median " " " # - Median " " "
# - Min " " " # - Min " " "
# - Max " " " # - Max " " "
# - Standard deviation " " " # - Standard deviation " " "
count_stat_table = summary_table[summary_table$ImageID=='Total', count_stat_table = summary_table[summary_table$ImageID=='Total',
c('SampleName','CellType','TissueType','CellCount')] c('SampleName','CellType','TissueType','CellCount')]
names(count_stat_table)[4] = 'TotalCount' names(count_stat_table)[4] = 'TotalCount'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment