Commit ac64adfc authored by Robin Engler's avatar Robin Engler
Browse files

Fix error when reading windows utf-16le files. Auto-remove quotes from input parameter files

parent 3f30bb06
...@@ -361,9 +361,9 @@ guess_file_encoding = function(input_file){ ...@@ -361,9 +361,9 @@ guess_file_encoding = function(input_file){
file_start = paste(readBin(con=file_connection, what='raw', n=2), collapse='') file_start = paste(readBin(con=file_connection, what='raw', n=2), collapse='')
close(file_connection) close(file_connection)
# UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe". # UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe".
if(file_start == 'efbb') return("UTF-8-BOM") if(file_start == 'efbb') return("utf-8-bom")
if(file_start == 'fffe') return("UTF-16LE") if(file_start == 'fffe') return("utf-16le")
return("UTF-8") return("utf-8")
} }
if(host_os == 'unknown') raise_error("Unable to detect host OS.") if(host_os == 'unknown') raise_error("Unable to detect host OS.")
encoding_type = sub(pattern='^.* charset=', replacement='', x=tmp) encoding_type = sub(pattern='^.* charset=', replacement='', x=tmp)
......
...@@ -148,7 +148,8 @@ load_session_parameters <- function(session_root_dir){ ...@@ -148,7 +148,8 @@ load_session_parameters <- function(session_root_dir){
read_parameters_file <- function(input_file){ read_parameters_file <- function(input_file){
# Load file content. Lines starting with a '#' are ignored. # Load file content. Lines starting with a '#' are ignored.
file_content = read_file_as_vector(input_file) file_content = read_file_as_vector(input_file, ignore_comments=TRUE,
ignore_empty_line=TRUE, remove_quotes=TRUE)
# Parse file content. # Parse file content.
# ****************** # ******************
...@@ -196,20 +197,32 @@ read_parameters_file <- function(input_file){ ...@@ -196,20 +197,32 @@ read_parameters_file <- function(input_file){
#################################################################################################### ####################################################################################################
read_file_as_vector <- function(input_file, ignore_comments=TRUE, ignore_empty_line=TRUE){ read_file_as_vector <- function(input_file, ignore_comments=TRUE,
ignore_empty_line=TRUE, remove_quotes=FALSE){
# ******************************************************************************************** # ********************************************************************************************
# Read a text file from disk and return its content as vector of strings, where each element # Read a text file from disk and return its content as vector of strings, where each element
# corresponds to a line in the file. # corresponds to a line in the file.
# In addition, white spaces are trimmed, and lines starting with a # character are ignored. # In addition, white spaces are trimmed, and lines starting with a # character are ignored.
# ******************************************************************************************** # ********************************************************************************************
stopifnot(file.exists(input_file)) stopifnot(file.exists(input_file))
file_connection = file(input_file, open='r', encoding=guess_file_encoding(input_file))
file_content = readLines(con=file_connection)
close(file_connection)
file_content = trimws(file_content)
# Read file as binary values (hexadecimals) and convert them to a UTF-8 string.
file_encoding = guess_file_encoding(input_file)
file_content = stringi::stri_encode(readBin(con=input_file,
what='raw',
n=file.info(input_file)$size),
from=file_encoding, to='utf-8')
# Split the input string by lines, and trim any leading/trailing white spaces. Note that
# Windows files have `\r\n` end-of-line characters, and therefore, if present, we convert those
# to just simple "\n" characters.
file_content = trimws(unlist(strsplit(gsub(pattern='\r\n', replacement='\n', x=file_content),
split='\n')))
# Remove empty lines and comments, if asked for.
if(remove_quotes) file_content = gsub('"', '', file_content)
if(ignore_empty_line) file_content = file_content[which(file_content != '')] if(ignore_empty_line) file_content = file_content[which(file_content != '')]
if(ignore_comments) file_content = file_content[which(!startsWith(file_content, '#'))] if(ignore_comments) file_content = file_content[which(!startsWith(file_content, '#'))]
return(file_content) return(file_content)
} }
#################################################################################################### ####################################################################################################
......
...@@ -81,7 +81,8 @@ rename_samples <- function(sample_rename, root_dir){ ...@@ -81,7 +81,8 @@ rename_samples <- function(sample_rename, root_dir){
load_sample_rename_file <- function(input_file){ load_sample_rename_file <- function(input_file){
# Load file content by line. Lines starting with # are ignored. # Load file content by line. Lines starting with # are ignored.
file_content = read_file_as_vector(input_file, ignore_comments=TRUE, ignore_empty_line=TRUE) file_content = read_file_as_vector(input_file, ignore_comments=TRUE,
ignore_empty_line=TRUE, remove_quotes=TRUE)
if(length(file_content) < 2) raise_error( if(length(file_content) < 2) raise_error(
msg = 'Sample renaming files must contain at least 2 lines: header + one sample.', msg = 'Sample renaming files must contain at least 2 lines: header + one sample.',
file = input_file) file = input_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment