Commit 325ce288 authored by Robin Engler's avatar Robin Engler
Browse files

Fix file encoding format detection under Windows OS

parent 72925589
......@@ -3,7 +3,7 @@
# declaration is made at the top level.
# Set application version.
POSTINFORM_VERSION <<- '0.2.1'
POSTINFORM_VERSION <<- '0.2.2'
# Define default parameter values.
DEFAULT_CELL_COMPARTMENT = 'nucleus'
......@@ -20,7 +20,7 @@ AUTHORIZED_STROMA_VALUES <<- c('DAPI', 'stroma', 'other')
AUTHORIZED_TUMOR_VALUES <<- c('CK', 'tumor')
AUTHORIZED_MARKERS <<- c('CAL', 'CD3', 'CD4', 'CD8', 'CD11c', 'CD15', 'CD20', 'CD56', 'CD68',
'CD103', 'CD163', 'CD206', 'FOXP3', 'GB', 'gH2AX', 'gH2AXN', 'IDO',
'IL10R', 'Keratin', 'KI67', 'PD1', 'PDL1', 'PERFORIN', 'SOX10',
'IL10R', 'Keratin', 'Ki67', 'PD1', 'PDL1', 'PERFORIN', 'SOX10',
'WT1', 'CK', 'VISTA')
IGNORED_PHENOTYPES <<- c('DAPIp', 'MISSING')
NO_PHENOTYPE <<- 'MISSING'
......
####################################################################################################
#' Checks for duplicated rows in the input data frame and raises and error if duplicates are found.
#'
#' @param data_frame [data frame] Data frame for which to check for duplicated rows.
#'
check_for_duplicated_rows <- function(data_frame){
# ********************************************************************************************
# Checks for duplicated rows in the input data frame and raises and error if duplicates are
# found.
# ********************************************************************************************
duplicated_rows = which(duplicated(data_frame, MARGIN=1))
if(length(duplicated_rows) > 0) raise_error(
msg = "Duplicated rows found in input data:",
......@@ -309,15 +309,12 @@ cell_type_statistics <- function(image_id, tissue_type, cell_types, cell_table){
####################################################################################################
#' Find-out host operating system running the function.
#'
guess_host_os = function(){
# ********************************************************************************************
# Determine what is the operating system of the machine executing this function.
#
# ********************************************************************************************
# Get name of host operating system.
# Case 1: name of operating system can be retrieved with "Sys.info()".
# This is the usual case.
sysinf = Sys.info()
# Case 1: name of opperating system can be retrieved with "Sys.info()". This is the usual case.
if(!is.null(sysinf)){
os <- sysinf['sysname']
if(os == 'Darwin'){
......@@ -348,6 +345,7 @@ guess_host_os = function(){
#'
#' @param input_file
#' @return [string] file encoding.
#'
guess_file_encoding = function(input_file){
# Make a system call to identify the encoding of input_file. Since the system call depends on
......@@ -361,7 +359,7 @@ guess_file_encoding = function(input_file){
file_start = paste(readBin(con=file_connection, what='raw', n=2), collapse='')
close(file_connection)
# UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe".
if(file_start == 'efbb') return("utf-8-bom")
if(file_start == 'efbb') return("utf-8")
if(file_start == 'fffe') return("utf-16le")
return("utf-8")
}
......@@ -379,18 +377,17 @@ guess_file_encoding = function(input_file){
####################################################################################################
generate_global_summary_table = function(sample_output_list,
cell_measurement,
column_order){
# ********************************************************************************************
# Generate a global summary table for cell counts or cell densities for all samples.
# The user can select to have columns ordered by cell type or by tissue type.
#
# Input arguments:
# sample_output_list: list of "sample output" objects, as produced by "run_sample" function.
# cell_measurement : either 'count' or 'density'.
# column_order : either 'by_cell_type' or 'by_tissue'.
# ********************************************************************************************
#' Generate a global summary table for cell counts or cell densities for all samples. The user can
#' select to have columns ordered by cell type or by tissue type.
#'
#' @param sample_output_list [list] List of "sample output" objects, as produced by the
#' "run_sample()" function.
#' @param cell_measurement [str] Either 'count' or 'density'.
#' @param column_order [str] Either 'by_cell_type' or 'by_tissue'.
#'
generate_global_summary_table = function(sample_output_list, cell_measurement, column_order){
# Input checks.
stopifnot(length(sample_output_list) > 0)
required_items = c('summary_count','summary_density', 'summary_table')
stopifnot(sapply(sample_output_list, FUN=function(x) all(required_items %in% names(x)) ))
......@@ -424,13 +421,13 @@ generate_global_summary_table = function(sample_output_list,
####################################################################################################
#' Creates a summary file in Microsoft Excel format. The excel file contains the 'summary' and
#' 'count' tables for each sample in separate worksheets. It also contains global summary tables
#' for cell counts and densities.
#'
generate_excel_summary_file = function(sample_output_list, output_file_name){
# ********************************************************************************************
# Create a summary file in Microsoft Excel format.
# The excel file contains the 'summary' and 'count' tables for each sample in separate
# worksheets. It also contains global summary tables for cell counts and densities.
#
# ********************************************************************************************
# Input checks.
stopifnot(length(sample_output_list) > 0)
# Add global summary tables to the Excel file (i.e. workbook).
......@@ -589,10 +586,9 @@ decompress_file <- function(input_file, allow_overwrite=FALSE, dry_run=FALSE){
####################################################################################################
#' Computes time elapsed between "start" and "end" times.
#'
time_difference <- function(start, end){
# ********************************************************************************************
#
# ********************************************************************************************
diff_in_seconds = round(as.numeric(end) - as.numeric(start))
hours = floor(diff_in_seconds / 3600)
minutes = floor((diff_in_seconds - (hours * 3600)) / 60)
......@@ -606,14 +602,12 @@ time_difference <- function(start, end){
####################################################################################################
#' Print message to stdout and/or a log file.
#'
log_message <- function(message, level=1, padding='### ',
add_empty_line = FALSE, log_to_file=TRUE, print_to_stdout=TRUE){
# ********************************************************************************************
#
# ********************************************************************************************
message = paste0(padding, switch(level,'','-> '), message)
if(add_empty_line) message = c(message, padding)
if(log_to_file) write(message, LOG_FILE, append=T)
if(print_to_stdout) cat(message, sep='\n')
}
......@@ -622,16 +616,16 @@ log_message <- function(message, level=1, padding='### ',
####################################################################################################
#' Displays am error or warning message.
#'
#' @param msg [string vector] One or more error messages to display.
#' @param file [string] Optional, name of file in which the error occurred.
#' @param items_to_list [string vector] Optional, items to list in the error message.
#' @param type [string] Either 'error' or 'warning'.
#'
raise_error <- function(msg, file='', items_to_list=NULL, type='error'){
# ********************************************************************************************
# Displays am error or warning message.
#
# Input arguments:
# msg : string vector. One or more error messages to display.
# file : string. Optional file in which the error occurred.
# items_to_list: string vector. Items to list in the error message.
# type : either 'error' or 'warning'.
# ********************************************************************************************
# Set printed message prefix (i.e. padding) based on the type of error.
if(type == 'error'){
prefix = 'ERROR: '
log_padding = ''
......
......@@ -241,19 +241,22 @@ string_to_vector <- function(input_string){
####################################################################################################
#' Read a "thresholds.txt" file from disk. These files contain the threshold values for each
#' sample and each scored marker.
#'
#' @param input_file [str] Path of threshold file to load.
#' @param markers_scored [str vector] List of markers that should be found in the input file.
#' @param sample_names [str vector] List of samples that should be found in the input file.
#' @param tissue_types [str vector] Optional. List of tissue categories samples that should be
#' found in the input file.
#' @param rewrite_input [bool] If TRUE, the original input file is replaced with the curated
# content produced by the function.
#'
load_thresholds_file <- function(input_file,
markers_scored,
sample_names,
tissue_types,
rewrite_input=FALSE){
# ********************************************************************************************
# Input arguments:
# input_file : string. Path + name of threshold file to load.
# markers_scored: string vector. List of markers that should be found in the input file.
# sample_names : string vector. List of samples that should be found in the input file.
# rewrite_input : If TRUE, the original input file is replaced with the (possibly) curated
# content produced by the function.
# ********************************************************************************************
# Load input file and standardize column names.
# ********************************************
......@@ -271,8 +274,8 @@ load_thresholds_file <- function(input_file,
# Standardize column names.
col_names = file_content[[1]]
col_names[grep('^sample', col_names, ignore.case=T)] = 'sample_name'
col_names[grep('^tissue', col_names, ignore.case=T)] = 'tissue_type'
col_names[grep('sample', col_names, ignore.case=T)] = 'sample_name'
col_names[grep('tissue', col_names, ignore.case=T)] = 'tissue_type'
if(col_names[1] != 'sample_name') raise_error(
msg = "The first column of the threshold file must be 'sample_name'.",
file = input_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment