Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Vital-IT
post-inForm
Commits
76908cd4
Commit
76908cd4
authored
Sep 11, 2020
by
Robin Engler
Browse files
Modify behavior of tissue surface merging for individual file: use min instead of median
parent
1a25c8c7
Changes
6
Hide whitespace changes
Inline
Side-by-side
R/config.R
View file @
76908cd4
...
...
@@ -20,7 +20,8 @@ AUTHORIZED_STROMA_VALUES <<- c('DAPI', 'stroma', 'other')
AUTHORIZED_TUMOR_VALUES
<<-
c
(
'CK'
,
'tumor'
)
AUTHORIZED_MARKERS
<<-
c
(
'CAL'
,
'CD3'
,
'CD4'
,
'CD8'
,
'CD11C'
,
'CD15'
,
'CD20'
,
'CD56'
,
'CD68'
,
'CD103'
,
'CD163'
,
'CD206'
,
'FOXP3'
,
'GB'
,
'gH2AX'
,
'gH2AXN'
,
'IDO'
,
'Keratin'
,
'KI67'
,
'PD1'
,
'PDL1'
,
'PERFORIN'
,
'WT1'
,
'CK'
,
'VISTA'
)
'Keratin'
,
'KI67'
,
'PD1'
,
'PDL1'
,
'PERFORIN'
,
'SOX10'
,
'WT1'
,
'CK'
,
'VISTA'
)
IGNORED_PHENOTYPES
<<-
c
(
'DAPIp'
,
'MISSING'
)
DATAREDUCE_SCRIPT
<<-
file.path
(
dirname
(
dirname
(
sys.frame
(
1
)
$
ofile
)),
...
...
R/functions.R
View file @
76908cd4
...
...
@@ -542,34 +542,44 @@ extract_imageid <- function(input_vector, input_file='file not specified'){
####################################################################################################
decompress_file
<-
function
(
input_file
,
dry_run
=
FALSE
){
decompress_file
<-
function
(
input_file
,
allow_overwrite
=
FALSE
,
dry_run
=
FALSE
){
# ********************************************************************************************
# Decompress .zip and .tar.gz input files and return the directory containing the uncompressed
# data.
#
# Input parameters:
# -> input_file: input .tar.gz or .zip file to
un
compress.
# -> input_file: input .tar.gz or .zip file to
de
compress.
# ********************************************************************************************
root_dir
=
dirname
(
input_file
)
#
Input file is tarball.
#
Determine archive type:
if
(
endsWith
(
input_file
,
'.tar.gz'
)){
data_dir
=
file.path
(
root_dir
,
unlist
(
strsplit
(
untar
(
input_file
,
list
=
TRUE
)[
1
],
'/'
))[
1
])
if
(
dry_run
)
return
(
data_dir
)
if
(
dir.exists
(
data_dir
))
unlink
(
data_dir
,
recursive
=
TRUE
)
untar
(
input_file
,
list
=
FALSE
,
exdir
=
root_dir
)
# Input file is zip archive.
type
=
'tar'
}
else
if
(
endsWith
(
input_file
,
'.zip'
)){
type
=
'zip'
}
else
stop
(
'Unsupported compression format. Only [.tar.gz] and [.zip] files are supported.'
)
# Determine output directory:
if
(
type
==
'tar'
){
data_dir
=
file.path
(
root_dir
,
unlist
(
strsplit
(
untar
(
input_file
,
list
=
TRUE
)[
1
],
'/'
))[
1
])
}
else
if
(
type
==
'zip'
){
data_dir
=
file.path
(
root_dir
,
unlist
(
strsplit
(
zip
::
zip_list
(
zipfile
=
input_file
)[
1
,
1
],
'/'
))[
1
])
if
(
dry_run
)
return
(
data_dir
)
if
(
dir.exists
(
data_dir
))
unlink
(
data_dir
,
recursive
=
TRUE
)
zip
::
unzip
(
input_file
,
exdir
=
root_dir
,
overwrite
=
TRUE
)
}
if
(
dry_run
)
return
(
data_dir
)
# Unsupported compression format.
}
else
stop
(
'Unsupported compression format.'
)
# Test whether output already exists, and if yes delete it if allowed by user.
if
(
dir.exists
(
data_dir
)){
if
(
!
allow_overwrite
)
stop
(
'File decompression failed: output already exists ['
,
data_dir
,
']'
)
unlink
(
data_dir
,
recursive
=
TRUE
)
}
# Decompress file.
if
(
type
==
'tar'
){
untar
(
input_file
,
list
=
FALSE
,
exdir
=
root_dir
)
}
else
if
(
type
==
'zip'
){
zip
::
unzip
(
input_file
,
exdir
=
root_dir
,
overwrite
=
TRUE
)
}
# Return dirname of extracted files.
stopifnot
(
file.info
(
data_dir
)
$
isdir
)
...
...
R/individual_markers.R
View file @
76908cd4
...
...
@@ -229,7 +229,7 @@ merge_tissue_data_files <- function(files_to_merge){
file_name
=
f
)
stopifnot
(
all
(
colnames
(
input_df
)
==
c
(
key_fields
,
non_key_fields
)))
# Merge data frame for the current marker with the global dataframe 'merged_df'.
# Merge data frame for the current marker with the global data
frame 'merged_df'.
if
(
is.null
(
merged_df
)){
merged_df
=
input_df
}
else
{
...
...
@@ -240,34 +240,22 @@ merge_tissue_data_files <- function(files_to_merge){
}
}
# Search for mismatches among values of non-key fields. If some are detected, a warning
# is displayed. For rows with mismatches, if any, compute the median of surface values. In
# this way, if one of the input files has a different values it gets excluded (provided there
# are at least 3 files).
mismatches
=
NULL
for
(
col_name
in
non_key_fields
){
col_index
=
grep
(
col_name
,
colnames
(
merged_df
))
difference
=
abs
(
merged_df
[,
col_index
]
-
merged_df
[,
col_index
[
1
]])
mismatches
=
unique
(
c
(
mismatches
,
which
(
apply
(
difference
,
MARGIN
=
1
,
sum
)
>
0
)))
}
if
(
length
(
mismatches
)
>
0
){
percentage
=
round
(
length
(
mismatches
)
/
nrow
(
merged_df
)
*
100
,
2
)
raise_error
(
msg
=
c
(
'Mismatches in tissue surface among tissue seg files were found'
,
'Median value of tissue surface will be used for the following rows:'
,
paste0
(
paste
(
mismatches
,
collapse
=
', '
),
' ['
,
percentage
,
'%]'
)),
file
=
dirname
(
files_to_merge
[
1
]),
type
=
'warning'
)
# Compute median values to reconciliate mismatches.
for
(
col_name
in
non_key_fields
){
col_index
=
grep
(
col_name
,
colnames
(
merged_df
))
merged_df
[
mismatches
,
col_index
[
1
]]
=
apply
(
merged_df
[
mismatches
,
col_index
],
1
,
median
)
}
# Merge tissue surface values (absolute value or percentage).
# The merge is made by keeping the smallest surface value from the individual files. This is
# because, when merging cells value, we keep the intersection of all individual files, which,
# in terms of surface, corresponds (roughly) to the smallest surface value in the tissue
#surface files.
for
(
i
in
1
:
nrow
(
merged_df
)){
# Identify column with the smallest surface value.
col_index
=
grep
(
'region_area_surface'
,
colnames
(
merged_df
))
min_index
=
which
(
merged_df
[
i
,
col_index
]
==
min
(
merged_df
[
i
,
col_index
]))[
1
]
# Replace tissue surface values with the minimum value across all files.
merged_df
[
i
,
'region_area_surface'
]
=
merged_df
[
i
,
col_index
[
min_index
]]
merged_df
[
i
,
'region_area_percent'
]
=
merged_df
[
i
,
col_index
[
min_index
]
+
1
]
}
# Remove duplicated columns.
merged_df
=
merged_df
[,
1
:
5
]
return
(
merged_df
)
return
(
merged_df
[,
1
:
5
])
}
####################################################################################################
...
...
R/postinform.R
View file @
76908cd4
...
...
@@ -94,7 +94,7 @@ postinform <- function(input_file_or_dir,
# If the input is a compressed .zip or .tar.gz file, decompress it.
if
(
!
file.info
(
input_file_or_dir
)
$
isdir
){
message
(
"Decompressing "
,
input_file_or_dir
,
"..."
)
input_dir
=
decompress_file
(
input_file_or_dir
)
input_dir
=
decompress_file
(
input_file_or_dir
,
allow_overwrite
=
allow_overwrite
)
}
# Create output directory and log file.
...
...
README.md
View file @
76908cd4
...
...
@@ -62,3 +62,47 @@ This command will produce an output file named "Test_session_random_suffix.zip".
postinform(input_file_or_dir="Test_session.zip", command='process', output_suffix="random_suffix",
compress_output=TRUE, immucan_output=TRUE, allow_overwrite=FALSE)
```
### Post-inForm input parameter file format.
The list of samples, tissues, markers and marker combinations to process are passed to post-inForm
via a single plain text file that must be named
`parameters.txt`
and be located at the root of the
input directory.
**Important:**
for
**Windows users**
, the
`parameters.txt`
file should be encoded in
`UTF-8`
The parameters input file must contain the following 5 sections. For each section, values can be
passed on the same line separated by a
`,`
, or on multiple lines (one value per line). Any line
starting the a
`#`
value is ignored (allows to add comments to file).
For the
`marker_combinations:`
section, the special value
`all`
can be passed to process all
possible combinations of markers (to avoid having to enter all combinations manually).
```
samples:
tissues:
phenotyped_markers:
scored_markers:
marker_combinations:
```
A template file can be downloaded
[
here
](
tests/parameters.txt
)
.
`parameters.txt`
file example:
```
# List of samples to process.
samples:
SAMPLE_1
SAMPLE_2
SAMPLE_3
# List of tissues to process.
tissues: stroma, tumor
# List of markers.
phenotyped_markers: CD20, CD3, CD68
scored_markers:
# Marker combinations to test
marker_combinations: all
```
tests/parameters.txt
0 → 100644
View file @
76908cd4
# Post-inForm input parameter template.
# List of samples to process.
samples:
# List of tissues to process.
tissues: stroma, tumor
# List of markers.
phenotyped_markers:
scored_markers:
# Marker combinations to test
marker_combinations: all
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment