Commit 1a25c8c7 authored by Robin Engler's avatar Robin Engler
Browse files

Remove 'cell_id' from list of key fields used to merge individual marker files

parent a2f08b97
...@@ -51,7 +51,7 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -51,7 +51,7 @@ merge_cell_data_files <- function(files_to_merge){
stopifnot(length(files_to_merge) > 1) stopifnot(length(files_to_merge) > 1)
# Define columns that will be used as key fields during the merge. # Define columns that will be used as key fields during the merge.
key_fields = c('sample_name', 'image_id', 'cell_id', 'cell_x_position', 'cell_y_position') key_fields = c('sample_name', 'image_id', 'cell_x_position', 'cell_y_position')
# Loop through all files that match the prefix + suffix combination. Load and check the # Loop through all files that match the prefix + suffix combination. Load and check the
# content of each file, then add it to the merged data frame. # content of each file, then add it to the merged data frame.
...@@ -112,7 +112,7 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -112,7 +112,7 @@ merge_cell_data_files <- function(files_to_merge){
for(x in 1:ncol(marker_int_df)) marker_int_df[,x] = as.numeric(marker_int_df[,x]) for(x in 1:ncol(marker_int_df)) marker_int_df[,x] = as.numeric(marker_int_df[,x])
# Remove the extracted columns from the merged data frame. # Remove the extracted columns from the merged data frame.
merged_df = merged_df[,1:length(key_fields)] merged_df = merged_df[,which(colnames(merged_df) %in% c(key_fields, 'cell_id'))]
# Combine data from all "Tissue Category" columns. # Combine data from all "Tissue Category" columns.
...@@ -120,8 +120,8 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -120,8 +120,8 @@ merge_cell_data_files <- function(files_to_merge){
# In principle, the "Tissue Category" columns of all individual markers should contain the # In principle, the "Tissue Category" columns of all individual markers should contain the
# same value. Here we verify that this is the case and then keep only one copy of them. # same value. Here we verify that this is the case and then keep only one copy of them.
if(any(tissue_cat_df != tissue_cat_df[,1])){ if(any(tissue_cat_df != tissue_cat_df[,1])){
diff_rows = unlist(sapply(2:ncol(tissue_cat_df), diff_rows = sort(unique(unlist(sapply(2:ncol(tissue_cat_df),
FUN=function(x) which(tissue_cat_df[x] != tissue_cat_df[,1]))) FUN=function(x) which(tissue_cat_df[x] != tissue_cat_df[,1])))))
for(x in diff_rows){ for(x in diff_rows){
value_frequency = sort(table(as.character(tissue_cat_df[x,])), decreasing=T) value_frequency = sort(table(as.character(tissue_cat_df[x,])), decreasing=T)
stopifnot(length(value_frequency) >= 2) stopifnot(length(value_frequency) >= 2)
...@@ -148,9 +148,7 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -148,9 +148,7 @@ merge_cell_data_files <- function(files_to_merge){
} }
stopifnot(all(tissue_cat_df == tissue_cat_df[,1])) stopifnot(all(tissue_cat_df == tissue_cat_df[,1]))
} }
# Add tissue category values to the merged data frame.
# Add tissue category values to merged dataframe.
merged_df[,'tissue_category'] = tissue_cat_df[,1] merged_df[,'tissue_category'] = tissue_cat_df[,1]
...@@ -164,7 +162,7 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -164,7 +162,7 @@ merge_cell_data_files <- function(files_to_merge){
merged_df[,'phenotype'] = apply(phenotype_df, MARGIN=1, merged_df[,'phenotype'] = apply(phenotype_df, MARGIN=1,
FUN=function(x) gsub(pattern = regexp, FUN=function(x) gsub(pattern = regexp,
replacement = '', replacement = '',
x = paste(unique(x), collapse='_')) ) x = paste(unique(x), collapse='_')))
# Combine data from all marker intensity columns. # Combine data from all marker intensity columns.
...@@ -185,7 +183,7 @@ merge_cell_data_files <- function(files_to_merge){ ...@@ -185,7 +183,7 @@ merge_cell_data_files <- function(files_to_merge){
msg=c(paste0('Values for column [', col_name, '] differ accross individual files'), msg=c(paste0('Values for column [', col_name, '] differ accross individual files'),
paste0('to merge by more than ', tolerance_limit, ' at ', paste0('to merge by more than ', tolerance_limit, ' at ',
length(differing_values), ' occurences [', length(differing_values), ' occurences [',
length(differing_values)/nrow(marker_int_df)*100, ' %].'), round(length(differing_values)/nrow(marker_int_df)*100, 3), '%].'),
'Values from the first file (alphabetically) will be used.'), 'Values from the first file (alphabetically) will be used.'),
file=dirname(files_to_merge[1]), file=dirname(files_to_merge[1]),
type = 'warning') type = 'warning')
......
...@@ -207,9 +207,9 @@ standardize_and_split_cell_data <- function(input_file, ...@@ -207,9 +207,9 @@ standardize_and_split_cell_data <- function(input_file,
out_file_exists = file.exists(out_file) out_file_exists = file.exists(out_file)
write.table(data.frame('sample_name' = sample_names[rows_to_keep], write.table(data.frame('sample_name' = sample_names[rows_to_keep],
'image_id' = image_ids[rows_to_keep], 'image_id' = image_ids[rows_to_keep],
'cell_id' = input_table[rows_to_keep, 'cell_id'],
'cell_x_position' = input_table[rows_to_keep, 'cell_x_position'], 'cell_x_position' = input_table[rows_to_keep, 'cell_x_position'],
'cell_y_position' = input_table[rows_to_keep, 'cell_y_position'], 'cell_y_position' = input_table[rows_to_keep, 'cell_y_position'],
'cell_id' = input_table[rows_to_keep, 'cell_id'],
'tissue_category' = tissue_type_values[rows_to_keep], 'tissue_category' = tissue_type_values[rows_to_keep],
'phenotype' = phenotype_values[rows_to_keep], 'phenotype' = phenotype_values[rows_to_keep],
input_table[rows_to_keep, marker_column_list], input_table[rows_to_keep, marker_column_list],
......
...@@ -93,7 +93,7 @@ postinform <- function(input_file_or_dir, ...@@ -93,7 +93,7 @@ postinform <- function(input_file_or_dir,
# ******************************************************** # ********************************************************
# If the input is a compressed .zip or .tar.gz file, decompress it. # If the input is a compressed .zip or .tar.gz file, decompress it.
if(!file.info(input_file_or_dir)$isdir){ if(!file.info(input_file_or_dir)$isdir){
message("Decompressing ", input_file_or_dir, " ...") message("Decompressing ", input_file_or_dir, "...")
input_dir = decompress_file(input_file_or_dir) input_dir = decompress_file(input_file_or_dir)
} }
...@@ -178,9 +178,11 @@ postinform_pipeline <- function(input_dir, ...@@ -178,9 +178,11 @@ postinform_pipeline <- function(input_dir,
# - search for cell and tissue segmentation files in sub-directories or the session root. # - search for cell and tissue segmentation files in sub-directories or the session root.
log_message('Input data check:') log_message('Input data check:')
log_message(paste('input directory:', input_dir), level=2) log_message(paste('input directory:', input_dir), level=2)
#delete_unnecessary_files(input_dir) #delete_unnecessary_files(input_dir)
inputdir_check(input_dir, output_dir) inputdir_check(input_dir, output_dir)
log_message('input dir check: OK', level=2) log_message('input dir check: OK', level=2)
input_parameters = load_session_parameters(output_dir) input_parameters = load_session_parameters(output_dir)
log_message('session parameters: OK', level=2) log_message('session parameters: OK', level=2)
log_message('completed', level=2, add_empty_line=TRUE) log_message('completed', level=2, add_empty_line=TRUE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment