config.yml

# This is the Snakemake configuration file that specifies paths and 
# and options for the pipeline. Anybody wishing to use
# the provided snakemake pipeline should first fill out this file with paths to
# their own data, as the Snakefile requires it.
# Every config option has reasonable defaults unless it is labeled as "required."
# All paths are relative to the directory that Snakemake is executed in.
# Note: this file is written in the YAML syntax (https://learnxinyminutes.com/docs/yaml/)


# The path to a text file specifying where to find images (with a .JPG ending) for each sample
# Each row in the sample file should represent a different sample.
# The sample file should have 3 columns (each separated by a single tab):
#       <unique_sample_name> <path_to_images> <image_ext>
# Here's an example:
#       test	data/test	.JPG
# Note that <image_ext> is optional. If it isn't provided (ie there is no tab
# after <path_to_images>, the ext will be inferred based on the most common
# file extension among the files in the <path_to_images> dir. If <image_ext>
# is blank (ie there is a trailing tab but no ext), all of the files in the
# <path_to_images> directory will be assumed to be images.
# required!
sample_file: data/samples.tsv

# which samples should we execute the pipeline on?
# Comment out this line if you want to run all samples in the sample file
# SAMP_NAMES: [region1, region2]

# Whether to perform the default strategy or the experimental one
# The default strategy performs segmentation on the stitched orthomosaic, while
# the experimental strategy parallelizes the segmentation step by running it on
# each of the original images.
# If this option is set to false or commented out,
# the default strategy will be used
parallel: true

# Whether to perform the stitching step with low quality settings (true)
# instead of high quality ones (false). The high quality settings are usually
# only helpful if you plan to perform segmentation and classification on the
# orthomosaic. Using low quality settings can significantly improve the running
# time and memory usage of the stitching step.
# If this line is commented out or the value is set to null, it will default to
# the value of the 'parallel' config option (see above).
low_qual_ortho: null

# Truth data for training/testing.
# This is a dictionary keyed by sample name.
# Each value is itself a dictionary containing three key-value pairs:
# path (required) - The path to a TSV file which should have two columns:
#     1) the IDs of each segment containing plant
#     2) the species labels (currently: 0 or 1 for whether the plant is buckwheat)
#     The TSV file should not have a header line.
# train_all (optional) - Whether to train on all of the data in the truth set or use half
#     of the data for training and half for testing. If this line is commented out
#     or set to a falsey value, the truth set will be split in half. Otherwise,
#     the testing steps will be skipped.
# truth:
#    test2:
#        path: data/test2/truth.tsv
#        train_all: false

# If you already have a trained model, provide it here. Otherwise, comment out
# this line or set it to a falsey value.
# If you already have a trained model, any truth sets you provide (see "truth"
# above) will be used only for testing.
# required! (unless truth sets are provided above)
model: data/models/test-all-exp.rda

# The path to the directory in which to place all of the output files
# defined relative to whatever directory you execute the snakemake command in
# Defaults to 'out' if not provided
out: out

# FOR THE RULE extract_images (only uncomment if using that rule)
# specifying the list of segment labels that you want to extract the
# source images for
extracted_labels: [515, 520, 571, 560]