Commit 325ce288 authored by Robin Engler's avatar Robin Engler
Browse files

Fix file encoding format detection under Windows OS

parent 72925589
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# declaration is made at the top level. # declaration is made at the top level.
# Set application version. # Set application version.
POSTINFORM_VERSION <<- '0.2.1' POSTINFORM_VERSION <<- '0.2.2'
# Define default parameter values. # Define default parameter values.
DEFAULT_CELL_COMPARTMENT = 'nucleus' DEFAULT_CELL_COMPARTMENT = 'nucleus'
...@@ -20,7 +20,7 @@ AUTHORIZED_STROMA_VALUES <<- c('DAPI', 'stroma', 'other') ...@@ -20,7 +20,7 @@ AUTHORIZED_STROMA_VALUES <<- c('DAPI', 'stroma', 'other')
AUTHORIZED_TUMOR_VALUES <<- c('CK', 'tumor') AUTHORIZED_TUMOR_VALUES <<- c('CK', 'tumor')
AUTHORIZED_MARKERS <<- c('CAL', 'CD3', 'CD4', 'CD8', 'CD11c', 'CD15', 'CD20', 'CD56', 'CD68', AUTHORIZED_MARKERS <<- c('CAL', 'CD3', 'CD4', 'CD8', 'CD11c', 'CD15', 'CD20', 'CD56', 'CD68',
'CD103', 'CD163', 'CD206', 'FOXP3', 'GB', 'gH2AX', 'gH2AXN', 'IDO', 'CD103', 'CD163', 'CD206', 'FOXP3', 'GB', 'gH2AX', 'gH2AXN', 'IDO',
'IL10R', 'Keratin', 'KI67', 'PD1', 'PDL1', 'PERFORIN', 'SOX10', 'IL10R', 'Keratin', 'Ki67', 'PD1', 'PDL1', 'PERFORIN', 'SOX10',
'WT1', 'CK', 'VISTA') 'WT1', 'CK', 'VISTA')
IGNORED_PHENOTYPES <<- c('DAPIp', 'MISSING') IGNORED_PHENOTYPES <<- c('DAPIp', 'MISSING')
NO_PHENOTYPE <<- 'MISSING' NO_PHENOTYPE <<- 'MISSING'
......
#################################################################################################### ####################################################################################################
#' Checks for duplicated rows in the input data frame and raises and error if duplicates are found.
#'
#' @param data_frame [data frame] Data frame for which to check for duplicated rows.
#'
check_for_duplicated_rows <- function(data_frame){ check_for_duplicated_rows <- function(data_frame){
# ********************************************************************************************
# Checks for duplicated rows in the input data frame and raises and error if duplicates are
# found.
# ********************************************************************************************
duplicated_rows = which(duplicated(data_frame, MARGIN=1)) duplicated_rows = which(duplicated(data_frame, MARGIN=1))
if(length(duplicated_rows) > 0) raise_error( if(length(duplicated_rows) > 0) raise_error(
msg = "Duplicated rows found in input data:", msg = "Duplicated rows found in input data:",
...@@ -309,15 +309,12 @@ cell_type_statistics <- function(image_id, tissue_type, cell_types, cell_table){ ...@@ -309,15 +309,12 @@ cell_type_statistics <- function(image_id, tissue_type, cell_types, cell_table){
#################################################################################################### ####################################################################################################
#' Find-out host operating system running the function.
#'
guess_host_os = function(){ guess_host_os = function(){
# ******************************************************************************************** # Case 1: name of operating system can be retrieved with "Sys.info()".
# Determine what is the operating system of the machine executing this function. # This is the usual case.
#
# ********************************************************************************************
# Get name of host operating system.
sysinf = Sys.info() sysinf = Sys.info()
# Case 1: name of opperating system can be retrieved with "Sys.info()". This is the usual case.
if(!is.null(sysinf)){ if(!is.null(sysinf)){
os <- sysinf['sysname'] os <- sysinf['sysname']
if(os == 'Darwin'){ if(os == 'Darwin'){
...@@ -348,6 +345,7 @@ guess_host_os = function(){ ...@@ -348,6 +345,7 @@ guess_host_os = function(){
#' #'
#' @param input_file #' @param input_file
#' @return [string] file encoding. #' @return [string] file encoding.
#'
guess_file_encoding = function(input_file){ guess_file_encoding = function(input_file){
# Make a system call to identify the encoding of input_file. Since the system call depends on # Make a system call to identify the encoding of input_file. Since the system call depends on
...@@ -361,7 +359,7 @@ guess_file_encoding = function(input_file){ ...@@ -361,7 +359,7 @@ guess_file_encoding = function(input_file){
file_start = paste(readBin(con=file_connection, what='raw', n=2), collapse='') file_start = paste(readBin(con=file_connection, what='raw', n=2), collapse='')
close(file_connection) close(file_connection)
# UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe". # UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe".
if(file_start == 'efbb') return("utf-8-bom") if(file_start == 'efbb') return("utf-8")
if(file_start == 'fffe') return("utf-16le") if(file_start == 'fffe') return("utf-16le")
return("utf-8") return("utf-8")
} }
...@@ -379,18 +377,17 @@ guess_file_encoding = function(input_file){ ...@@ -379,18 +377,17 @@ guess_file_encoding = function(input_file){
#################################################################################################### ####################################################################################################
generate_global_summary_table = function(sample_output_list, #' Generate a global summary table for cell counts or cell densities for all samples. The user can
cell_measurement, #' select to have columns ordered by cell type or by tissue type.
column_order){ #'
# ******************************************************************************************** #' @param sample_output_list [list] List of "sample output" objects, as produced by the
# Generate a global summary table for cell counts or cell densities for all samples. #' "run_sample()" function.
# The user can select to have columns ordered by cell type or by tissue type. #' @param cell_measurement [str] Either 'count' or 'density'.
# #' @param column_order [str] Either 'by_cell_type' or 'by_tissue'.
# Input arguments: #'
# sample_output_list: list of "sample output" objects, as produced by "run_sample" function. generate_global_summary_table = function(sample_output_list, cell_measurement, column_order){
# cell_measurement : either 'count' or 'density'.
# column_order : either 'by_cell_type' or 'by_tissue'. # Input checks.
# ********************************************************************************************
stopifnot(length(sample_output_list) > 0) stopifnot(length(sample_output_list) > 0)
required_items = c('summary_count','summary_density', 'summary_table') required_items = c('summary_count','summary_density', 'summary_table')
stopifnot(sapply(sample_output_list, FUN=function(x) all(required_items %in% names(x)) )) stopifnot(sapply(sample_output_list, FUN=function(x) all(required_items %in% names(x)) ))
...@@ -424,13 +421,13 @@ generate_global_summary_table = function(sample_output_list, ...@@ -424,13 +421,13 @@ generate_global_summary_table = function(sample_output_list,
#################################################################################################### ####################################################################################################
#' Creates a summary file in Microsoft Excel format. The excel file contains the 'summary' and
#' 'count' tables for each sample in separate worksheets. It also contains global summary tables
#' for cell counts and densities.
#'
generate_excel_summary_file = function(sample_output_list, output_file_name){ generate_excel_summary_file = function(sample_output_list, output_file_name){
# ********************************************************************************************
# Create a summary file in Microsoft Excel format. # Input checks.
# The excel file contains the 'summary' and 'count' tables for each sample in separate
# worksheets. It also contains global summary tables for cell counts and densities.
#
# ********************************************************************************************
stopifnot(length(sample_output_list) > 0) stopifnot(length(sample_output_list) > 0)
# Add global summary tables to the Excel file (i.e. workbook). # Add global summary tables to the Excel file (i.e. workbook).
...@@ -589,10 +586,9 @@ decompress_file <- function(input_file, allow_overwrite=FALSE, dry_run=FALSE){ ...@@ -589,10 +586,9 @@ decompress_file <- function(input_file, allow_overwrite=FALSE, dry_run=FALSE){
#################################################################################################### ####################################################################################################
#' Computes time elapsed between "start" and "end" times.
#'
time_difference <- function(start, end){ time_difference <- function(start, end){
# ********************************************************************************************
#
# ********************************************************************************************
diff_in_seconds = round(as.numeric(end) - as.numeric(start)) diff_in_seconds = round(as.numeric(end) - as.numeric(start))
hours = floor(diff_in_seconds / 3600) hours = floor(diff_in_seconds / 3600)
minutes = floor((diff_in_seconds - (hours * 3600)) / 60) minutes = floor((diff_in_seconds - (hours * 3600)) / 60)
...@@ -606,14 +602,12 @@ time_difference <- function(start, end){ ...@@ -606,14 +602,12 @@ time_difference <- function(start, end){
#################################################################################################### ####################################################################################################
#' Print message to stdout and/or a log file.
#'
log_message <- function(message, level=1, padding='### ', log_message <- function(message, level=1, padding='### ',
add_empty_line = FALSE, log_to_file=TRUE, print_to_stdout=TRUE){ add_empty_line = FALSE, log_to_file=TRUE, print_to_stdout=TRUE){
# ********************************************************************************************
#
# ********************************************************************************************
message = paste0(padding, switch(level,'','-> '), message) message = paste0(padding, switch(level,'','-> '), message)
if(add_empty_line) message = c(message, padding) if(add_empty_line) message = c(message, padding)
if(log_to_file) write(message, LOG_FILE, append=T) if(log_to_file) write(message, LOG_FILE, append=T)
if(print_to_stdout) cat(message, sep='\n') if(print_to_stdout) cat(message, sep='\n')
} }
...@@ -622,16 +616,16 @@ log_message <- function(message, level=1, padding='### ', ...@@ -622,16 +616,16 @@ log_message <- function(message, level=1, padding='### ',
#################################################################################################### ####################################################################################################
#' Displays am error or warning message.
#'
#' @param msg [string vector] One or more error messages to display.
#' @param file [string] Optional, name of file in which the error occurred.
#' @param items_to_list [string vector] Optional, items to list in the error message.
#' @param type [string] Either 'error' or 'warning'.
#'
raise_error <- function(msg, file='', items_to_list=NULL, type='error'){ raise_error <- function(msg, file='', items_to_list=NULL, type='error'){
# ********************************************************************************************
# Displays am error or warning message. # Set printed message prefix (i.e. padding) based on the type of error.
#
# Input arguments:
# msg : string vector. One or more error messages to display.
# file : string. Optional file in which the error occurred.
# items_to_list: string vector. Items to list in the error message.
# type : either 'error' or 'warning'.
# ********************************************************************************************
if(type == 'error'){ if(type == 'error'){
prefix = 'ERROR: ' prefix = 'ERROR: '
log_padding = '' log_padding = ''
......
...@@ -241,19 +241,22 @@ string_to_vector <- function(input_string){ ...@@ -241,19 +241,22 @@ string_to_vector <- function(input_string){
#################################################################################################### ####################################################################################################
#' Read a "thresholds.txt" file from disk. These files contain the threshold values for each
#' sample and each scored marker.
#'
#' @param input_file [str] Path of threshold file to load.
#' @param markers_scored [str vector] List of markers that should be found in the input file.
#' @param sample_names [str vector] List of samples that should be found in the input file.
#' @param tissue_types [str vector] Optional. List of tissue categories samples that should be
#' found in the input file.
#' @param rewrite_input [bool] If TRUE, the original input file is replaced with the curated
# content produced by the function.
#'
load_thresholds_file <- function(input_file, load_thresholds_file <- function(input_file,
markers_scored, markers_scored,
sample_names, sample_names,
tissue_types, tissue_types,
rewrite_input=FALSE){ rewrite_input=FALSE){
# ********************************************************************************************
# Input arguments:
# input_file : string. Path + name of threshold file to load.
# markers_scored: string vector. List of markers that should be found in the input file.
# sample_names : string vector. List of samples that should be found in the input file.
# rewrite_input : If TRUE, the original input file is replaced with the (possibly) curated
# content produced by the function.
# ********************************************************************************************
# Load input file and standardize column names. # Load input file and standardize column names.
# ******************************************** # ********************************************
...@@ -271,8 +274,8 @@ load_thresholds_file <- function(input_file, ...@@ -271,8 +274,8 @@ load_thresholds_file <- function(input_file,
# Standardize column names. # Standardize column names.
col_names = file_content[[1]] col_names = file_content[[1]]
col_names[grep('^sample', col_names, ignore.case=T)] = 'sample_name' col_names[grep('sample', col_names, ignore.case=T)] = 'sample_name'
col_names[grep('^tissue', col_names, ignore.case=T)] = 'tissue_type' col_names[grep('tissue', col_names, ignore.case=T)] = 'tissue_type'
if(col_names[1] != 'sample_name') raise_error( if(col_names[1] != 'sample_name') raise_error(
msg = "The first column of the threshold file must be 'sample_name'.", msg = "The first column of the threshold file must be 'sample_name'.",
file = input_file) file = input_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment