src/qag.ini

[main]
quiet: False
ignoreWarnings: True
# model type: QG|AE|E2E
type: QA
# text|chat
baseType: chat
modelSize: 7
# training modes: test|norm 
mode: norm

[paths]
base: /models/llama-hf/7b-${main:baseType}
output: /models/output/${main:mode}/${main:modelSize}b-${main:baseType}${main:type}
data: /data/pbe/${main:type}
log: /data/logs

# dataProcessor paths (specify files)
dpSource: /data/pbe/clean/contextQuestions.csv
dpDest: /data/pbe/QA/data.jsonl

[data]
# manual|generate
sampleMode: generate
evalToTrainRatio: 0.07
qualityThreshold: 7

# tunable hyperparameters
[hyperparameters]
# General hyperparameters
learningRate: 1e-4
weightDecay: 1e-4
# multiple epochs decrease performance due to overfitting
epochs: 1

# LORA
# scaling factor as compared to r. setting it at r means your data is as loud as the base data
# setting it at higher multiples makes your data louder. I need to test it at lower values
loraAlpha: 256
# r = matrix rank: size of matrices "on the side" of
# FFW determines how many parameters get fine-tuned
r: 64
loraDropout: 0.01
# which layers to train: query, value, key, o_proj. use first letters
loraLayers: qvko
# none|all|lora_only
bias: none

# general setting for training
[train]
# controls for how often to stop to evaluate|save|log the model
# no|steps|epoch
saveStrategy: epoch
evalStrategy: steps
stepSize: 50
testSteps: 2
# number of most recent checkpoints to save
saveTotalLimit: 1
# whether to keep around the best model
loadBestModelAtEnd: False

# max data length during training
maxSeqLength: 512

# refers to how large the batches are per each GPU
perDeviceTrainBatchSize: 2
# how many predictions steps to accumulate the output tensors for on the GPU,
# before moving the results to the CPU. If None, all predictions are accumulated
# on GPU. higher numbers are faster but can throw 'Out of Memory' errors)
gradientAccumulationSteps: 10
evalAccumulationSteps: 10

# optimize only the model completion (answers/questions)
# see https://github.com/huggingface/trl/issues/426 && .../trl/pull/445
optimizeCompletion: True
# packs several examples into one training input. SFTTrainer uses an EOS
# token to separate examples. forced to False when typeCompletionOnly = True
packing: False
addCustomTokens: False

[eval]
# bleu|meteor|rouge
evalMetric: bleu

[generate]
# max number of new tokens to generate
maxLength: 256
# penalizes token repetition. 1.0 for no penalty, 1.2 recommended
repetitionPenalty: 1.0