Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Vital-IT
post-inForm
Commits
ac64adfc
Commit
ac64adfc
authored
Nov 16, 2020
by
Robin Engler
Browse files
Fix error when reading windows utf-16le files. Auto-remove quotes from input parameter files
parent
3f30bb06
Changes
3
Hide whitespace changes
Inline
Side-by-side
R/functions.R
View file @
ac64adfc
...
...
@@ -361,9 +361,9 @@ guess_file_encoding = function(input_file){
file_start
=
paste
(
readBin
(
con
=
file_connection
,
what
=
'raw'
,
n
=
2
),
collapse
=
''
)
close
(
file_connection
)
# UTF-8 with BOM starts with hexadecimal characters "ef bb". UTF-16 LE with "ff fe".
if
(
file_start
==
'efbb'
)
return
(
"
UTF-8-BOM
"
)
if
(
file_start
==
'fffe'
)
return
(
"
UTF
-16
LE
"
)
return
(
"
UTF
-8"
)
if
(
file_start
==
'efbb'
)
return
(
"
utf-8-bom
"
)
if
(
file_start
==
'fffe'
)
return
(
"
utf
-16
le
"
)
return
(
"
utf
-8"
)
}
if
(
host_os
==
'unknown'
)
raise_error
(
"Unable to detect host OS."
)
encoding_type
=
sub
(
pattern
=
'^.* charset='
,
replacement
=
''
,
x
=
tmp
)
...
...
R/load_data.R
View file @
ac64adfc
...
...
@@ -148,7 +148,8 @@ load_session_parameters <- function(session_root_dir){
read_parameters_file
<-
function
(
input_file
){
# Load file content. Lines starting with a '#' are ignored.
file_content
=
read_file_as_vector
(
input_file
)
file_content
=
read_file_as_vector
(
input_file
,
ignore_comments
=
TRUE
,
ignore_empty_line
=
TRUE
,
remove_quotes
=
TRUE
)
# Parse file content.
# ******************
...
...
@@ -196,20 +197,32 @@ read_parameters_file <- function(input_file){
####################################################################################################
read_file_as_vector
<-
function
(
input_file
,
ignore_comments
=
TRUE
,
ignore_empty_line
=
TRUE
){
read_file_as_vector
<-
function
(
input_file
,
ignore_comments
=
TRUE
,
ignore_empty_line
=
TRUE
,
remove_quotes
=
FALSE
){
# ********************************************************************************************
# Read a text file from disk and return its content as vector of strings, where each element
# corresponds to a line in the file.
# In addition, white spaces are trimmed, and lines starting with a # character are ignored.
# ********************************************************************************************
stopifnot
(
file.exists
(
input_file
))
file_connection
=
file
(
input_file
,
open
=
'r'
,
encoding
=
guess_file_encoding
(
input_file
))
file_content
=
readLines
(
con
=
file_connection
)
close
(
file_connection
)
file_content
=
trimws
(
file_content
)
# Read file as binary values (hexadecimals) and convert them to a UTF-8 string.
file_encoding
=
guess_file_encoding
(
input_file
)
file_content
=
stringi
::
stri_encode
(
readBin
(
con
=
input_file
,
what
=
'raw'
,
n
=
file.info
(
input_file
)
$
size
),
from
=
file_encoding
,
to
=
'utf-8'
)
# Split the input string by lines, and trim any leading/trailing white spaces. Note that
# Windows files have `\r\n` end-of-line characters, and therefore, if present, we convert those
# to just simple "\n" characters.
file_content
=
trimws
(
unlist
(
strsplit
(
gsub
(
pattern
=
'\r\n'
,
replacement
=
'\n'
,
x
=
file_content
),
split
=
'\n'
)))
# Remove empty lines and comments, if asked for.
if
(
remove_quotes
)
file_content
=
gsub
(
'"'
,
''
,
file_content
)
if
(
ignore_empty_line
)
file_content
=
file_content
[
which
(
file_content
!=
''
)]
if
(
ignore_comments
)
file_content
=
file_content
[
which
(
!
startsWith
(
file_content
,
'#'
))]
if
(
ignore_comments
)
file_content
=
file_content
[
which
(
!
startsWith
(
file_content
,
'#'
))]
return
(
file_content
)
}
####################################################################################################
...
...
R/rename_samples.R
View file @
ac64adfc
...
...
@@ -81,7 +81,8 @@ rename_samples <- function(sample_rename, root_dir){
load_sample_rename_file
<-
function
(
input_file
){
# Load file content by line. Lines starting with # are ignored.
file_content
=
read_file_as_vector
(
input_file
,
ignore_comments
=
TRUE
,
ignore_empty_line
=
TRUE
)
file_content
=
read_file_as_vector
(
input_file
,
ignore_comments
=
TRUE
,
ignore_empty_line
=
TRUE
,
remove_quotes
=
TRUE
)
if
(
length
(
file_content
)
<
2
)
raise_error
(
msg
=
'Sample renaming files must contain at least 2 lines: header + one sample.'
,
file
=
input_file
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment