Commit ac64adfc authored by Robin Engler's avatar Robin Engler
Browse files

Fix error when reading windows utf-16le files. Auto-remove quotes from input parameter files

parent 3f30bb06
......@@ -361,9 +361,9 @@ guess_file_encoding = function(input_file){
file_start = paste(readBin(con=file_connection, what='raw', n=2), collapse='')
close(file_connection)
# UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe".
if(file_start == 'efbb') return("UTF-8-BOM")
if(file_start == 'fffe') return("UTF-16LE")
return("UTF-8")
if(file_start == 'efbb') return("utf-8-bom")
if(file_start == 'fffe') return("utf-16le")
return("utf-8")
}
if(host_os == 'unknown') raise_error("Unable to detect host OS.")
encoding_type = sub(pattern='^.* charset=', replacement='', x=tmp)
......
......@@ -148,7 +148,8 @@ load_session_parameters <- function(session_root_dir){
read_parameters_file <- function(input_file){
# Load file content. Lines starting with a '#' are ignored.
file_content = read_file_as_vector(input_file)
file_content = read_file_as_vector(input_file, ignore_comments=TRUE,
ignore_empty_line=TRUE, remove_quotes=TRUE)
# Parse file content.
# ******************
......@@ -196,20 +197,32 @@ read_parameters_file <- function(input_file){
####################################################################################################
read_file_as_vector <- function(input_file, ignore_comments=TRUE, ignore_empty_line=TRUE){
read_file_as_vector <- function(input_file, ignore_comments=TRUE,
ignore_empty_line=TRUE, remove_quotes=FALSE){
# ********************************************************************************************
# Read a text file from disk and return its content as vector of strings, where each element
# corresponds to a line in the file.
# In addition, white spaces are trimmed, and lines starting with a # character are ignored.
# ********************************************************************************************
stopifnot(file.exists(input_file))
file_connection = file(input_file, open='r', encoding=guess_file_encoding(input_file))
file_content = readLines(con=file_connection)
close(file_connection)
file_content = trimws(file_content)
# Read file as binary values (hexadecimals) and convert them to a UTF-8 string.
file_encoding = guess_file_encoding(input_file)
file_content = stringi::stri_encode(readBin(con=input_file,
what='raw',
n=file.info(input_file)$size),
from=file_encoding, to='utf-8')
# Split the input string by lines, and trim any leading/trailing white spaces. Note that
# Windows files have `\r\n` end-of-line characters, and therefore, if present, we convert those
# to just simple "\n" characters.
file_content = trimws(unlist(strsplit(gsub(pattern='\r\n', replacement='\n', x=file_content),
split='\n')))
# Remove empty lines and comments, if asked for.
if(remove_quotes) file_content = gsub('"', '', file_content)
if(ignore_empty_line) file_content = file_content[which(file_content != '')]
if(ignore_comments) file_content = file_content[which(!startsWith(file_content, '#'))]
if(ignore_comments) file_content = file_content[which(!startsWith(file_content, '#'))]
return(file_content)
}
####################################################################################################
......
......@@ -81,7 +81,8 @@ rename_samples <- function(sample_rename, root_dir){
load_sample_rename_file <- function(input_file){
# Load file content by line. Lines starting with # are ignored.
file_content = read_file_as_vector(input_file, ignore_comments=TRUE, ignore_empty_line=TRUE)
file_content = read_file_as_vector(input_file, ignore_comments=TRUE,
ignore_empty_line=TRUE, remove_quotes=TRUE)
if(length(file_content) < 2) raise_error(
msg = 'Sample renaming files must contain at least 2 lines: header + one sample.',
file = input_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment