{ "lr_encoder": "1e-5", "lr_others": "1e-4", "weight_decay_encoder": 0.01, "weight_decay_other": 0.01, "num_steps": 100000, "warmup_ratio": 0.1, "train_batch_size": 1, "eval_every": 3000, "gradient_accumulation": 8, "eval_batch_size": 32, "num_layers_freeze": null, "early_stopping_patience": null, "early_stopping_delta": 0.0, "save_at": [ 200, 6000, 12000, 20000, 70000 ], "max_saves": 6, "max_width": 12, "model_name": "microsoft/deberta-v3-large", "fine_tune": true, "subtoken_pooling": "first", "hidden_size": 768, "scorer": "dot", "rel_mode": "marker", "span_marker_mode": "markerv1", "refine_prompt": false, "refine_relation": false, "ffn_mul": 4, "dropout": 0.4, "scheduler": "cosine_with_warmup", "loss_func": "binary_cross_entropy_loss", "alpha": 0.6, "gamma": 3, "label_embed_strategy": "both", "coref_classifier": false, "coref_loss_weight": 10.0, "dataset_name": "zero_rel", "root_dir": "ablation_backbone", "train_data": [ "data/zero_rel_all.jsonl" ], "prev_path": "none", "size_sup": -1, "num_train_rel_types": 25, "num_unseen_rel_types": 15, "top_k": 1, "random_drop": true, "max_len": 512, "eval_threshold": [ 0.01, 0.1, 0.2, 0.3, 0.5 ], "max_entity_pair_distance": null, "fixed_relation_types": true, "name": "large", "log_dir": "logs/zero_rel/zero_rel-2024-11-02__10-23-22", "eval_data": "data/wiki_zsl_all.jsonl", "coreference_label": "SELF", "entity_start_token": "[E]", "entity_end_token": "[/E]" }