File size: 1,933 Bytes
6529956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
model:
  name: "answerdotai/ModernBERT-base"
  loss_function:
    name: "SentimentWeightedLoss" # Options: "SentimentWeightedLoss", "SentimentFocalLoss"
    # Parameters for the chosen loss function.
    # For SentimentFocalLoss, common params are:
    # gamma_focal: 1.0 # (e.g., 2.0 for standard, -2.0 for reversed, 0 for none)
    # label_smoothing_epsilon: 0.05 # (e.g., 0.0 to 0.1)
    # For SentimentWeightedLoss, params is empty:
    params:
      gamma_focal: 1.0
      label_smoothing_epsilon: 0.05
  output_dir: "checkpoints"
  max_length: 880 # 256
  dropout: 0.1
  # --- Pooling Strategy --- #
  # Options: "cls", "mean", "cls_mean_concat", "weighted_layer", "cls_weighted_concat"
  # "cls" uses just the [CLS] token for classification
  # "mean" uses mean pooling over final hidden states for classification
  # "cls_mean_concat" uses both [CLS] and mean pooling over final hidden states for classification
  # "weighted_layer" uses a weighted combination of the final hidden states from the top N layers for classification
  # "cls_weighted_concat" uses a weighted combination of the final hidden states from the top N layers and the [CLS] token for classification
  
  pooling_strategy: "mean" # Current default, change as needed

  num_weighted_layers: 6 # Number of top BERT layers to use for 'weighted_layer' strategies (e.g., 1 to 12 for BERT-base)

data:
  # No specific data paths needed as we use HF datasets at the moment

training:
  epochs: 6
  batch_size: 16
  lr: 1e-5 # 1e-5 # 2.0e-5
  weight_decay_rate: 0.02 # 0.01
  resume_from_checkpoint: "" # "checkpoints/mean_epoch2_0.9361acc_0.9355f1.pt" # Path to checkpoint file, or empty to not resume

inference:
  # Default path, can be overridden
  model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt" 
  # Using the same max_length as training for consistency
  max_length: 880 # 256


# "answerdotai/ModernBERT-base"
# "answerdotai/ModernBERT-large"