Commit 1a25c8c7 authored by Robin Engler's avatar Robin Engler
Browse files

Remove 'cell_id' from list of key fields used to merge individual marker files

parent a2f08b97
......@@ -51,7 +51,7 @@ merge_cell_data_files <- function(files_to_merge){
stopifnot(length(files_to_merge) > 1)
# Define columns that will be used as key fields during the merge.
key_fields = c('sample_name', 'image_id', 'cell_id', 'cell_x_position', 'cell_y_position')
key_fields = c('sample_name', 'image_id', 'cell_x_position', 'cell_y_position')
# Loop through all files that match the prefix + suffix combination. Load and check the
# content of each file, then add it to the merged data frame.
......@@ -112,7 +112,7 @@ merge_cell_data_files <- function(files_to_merge){
for(x in 1:ncol(marker_int_df)) marker_int_df[,x] = as.numeric(marker_int_df[,x])
# Remove the extracted columns from the merged data frame.
merged_df = merged_df[,1:length(key_fields)]
merged_df = merged_df[,which(colnames(merged_df) %in% c(key_fields, 'cell_id'))]
# Combine data from all "Tissue Category" columns.
......@@ -120,8 +120,8 @@ merge_cell_data_files <- function(files_to_merge){
# In principle, the "Tissue Category" columns of all individual markers should contain the
# same value. Here we verify that this is the case and then keep only one copy of them.
if(any(tissue_cat_df != tissue_cat_df[,1])){
diff_rows = unlist(sapply(2:ncol(tissue_cat_df),
FUN=function(x) which(tissue_cat_df[x] != tissue_cat_df[,1])))
diff_rows = sort(unique(unlist(sapply(2:ncol(tissue_cat_df),
FUN=function(x) which(tissue_cat_df[x] != tissue_cat_df[,1])))))
for(x in diff_rows){
value_frequency = sort(table(as.character(tissue_cat_df[x,])), decreasing=T)
stopifnot(length(value_frequency) >= 2)
......@@ -148,9 +148,7 @@ merge_cell_data_files <- function(files_to_merge){
}
stopifnot(all(tissue_cat_df == tissue_cat_df[,1]))
}
# Add tissue category values to merged dataframe.
# Add tissue category values to the merged data frame.
merged_df[,'tissue_category'] = tissue_cat_df[,1]
......@@ -164,7 +162,7 @@ merge_cell_data_files <- function(files_to_merge){
merged_df[,'phenotype'] = apply(phenotype_df, MARGIN=1,
FUN=function(x) gsub(pattern = regexp,
replacement = '',
x = paste(unique(x), collapse='_')) )
x = paste(unique(x), collapse='_')))
# Combine data from all marker intensity columns.
......@@ -185,7 +183,7 @@ merge_cell_data_files <- function(files_to_merge){
msg=c(paste0('Values for column [', col_name, '] differ accross individual files'),
paste0('to merge by more than ', tolerance_limit, ' at ',
length(differing_values), ' occurences [',
length(differing_values)/nrow(marker_int_df)*100, ' %].'),
round(length(differing_values)/nrow(marker_int_df)*100, 3), '%].'),
'Values from the first file (alphabetically) will be used.'),
file=dirname(files_to_merge[1]),
type = 'warning')
......
......@@ -207,9 +207,9 @@ standardize_and_split_cell_data <- function(input_file,
out_file_exists = file.exists(out_file)
write.table(data.frame('sample_name' = sample_names[rows_to_keep],
'image_id' = image_ids[rows_to_keep],
'cell_id' = input_table[rows_to_keep, 'cell_id'],
'cell_x_position' = input_table[rows_to_keep, 'cell_x_position'],
'cell_y_position' = input_table[rows_to_keep, 'cell_y_position'],
'cell_id' = input_table[rows_to_keep, 'cell_id'],
'tissue_category' = tissue_type_values[rows_to_keep],
'phenotype' = phenotype_values[rows_to_keep],
input_table[rows_to_keep, marker_column_list],
......
......@@ -93,7 +93,7 @@ postinform <- function(input_file_or_dir,
# ********************************************************
# If the input is a compressed .zip or .tar.gz file, decompress it.
if(!file.info(input_file_or_dir)$isdir){
message("Decompressing ", input_file_or_dir, " ...")
message("Decompressing ", input_file_or_dir, "...")
input_dir = decompress_file(input_file_or_dir)
}
......@@ -178,9 +178,11 @@ postinform_pipeline <- function(input_dir,
# - search for cell and tissue segmentation files in sub-directories or the session root.
log_message('Input data check:')
log_message(paste('input directory:', input_dir), level=2)
#delete_unnecessary_files(input_dir)
inputdir_check(input_dir, output_dir)
log_message('input dir check: OK', level=2)
input_parameters = load_session_parameters(output_dir)
log_message('session parameters: OK', level=2)
log_message('completed', level=2, add_empty_line=TRUE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment