Spaces:
Running
on
Zero
Running
on
Zero
Riddhi Bhagwat
commited on
Commit
·
4b82d89
1
Parent(s):
5df30d7
minor changes for debugging
Browse files
ml/eval/evaluate_arguments.py
CHANGED
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
3 |
@dataclass
|
4 |
class EvalArguments:
|
5 |
model_name_or_path: str = field(
|
6 |
-
default="
|
7 |
model_pretrained_lora_weights: str = field(
|
8 |
default=None, metadata={"help": "Path to a checkpoint directory."})
|
9 |
output_filepath: str = field(
|
|
|
3 |
@dataclass
|
4 |
class EvalArguments:
|
5 |
model_name_or_path: str = field(
|
6 |
+
default="CohereForAI/aya-expanse-8b", metadata={"help": "Name to a huggingface native pretrained model or path to a model on disk."})
|
7 |
model_pretrained_lora_weights: str = field(
|
8 |
default=None, metadata={"help": "Path to a checkpoint directory."})
|
9 |
output_filepath: str = field(
|
ml/eval/evaluation_pipeline.py
CHANGED
@@ -5,6 +5,8 @@ from reward_eval import process_evaluation
|
|
5 |
from generate import generate_files
|
6 |
from alpaca import alpaca_evaluator
|
7 |
from bt import bradley_terry_comparison, save_results, print_metrics
|
|
|
|
|
8 |
|
9 |
##################
|
10 |
# M-REWARD BENCH #
|
@@ -30,14 +32,13 @@ def evaluator_master_fn(eval_dataset: list[dict],
|
|
30 |
model="CohereForAI/aya-23-8B"):
|
31 |
|
32 |
# 1. Reward score evaluation:
|
33 |
-
args =
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
}
|
41 |
process_evaluation(args, model_name=model, eval_data_list_dict=eval_dataset)
|
42 |
|
43 |
# 2.
|
|
|
5 |
from generate import generate_files
|
6 |
from alpaca import alpaca_evaluator
|
7 |
from bt import bradley_terry_comparison, save_results, print_metrics
|
8 |
+
from evaluate_arguments import EvalArguments
|
9 |
+
|
10 |
|
11 |
##################
|
12 |
# M-REWARD BENCH #
|
|
|
32 |
model="CohereForAI/aya-23-8B"):
|
33 |
|
34 |
# 1. Reward score evaluation:
|
35 |
+
args = EvalArguments(bfloat16=True,
|
36 |
+
reward_output_fmt='1-0',
|
37 |
+
apply_sigmoid_to_reward=False,
|
38 |
+
per_device_batch_size=8,
|
39 |
+
output_filepath= '/path/to/your/data.json',
|
40 |
+
result_filename=None,
|
41 |
+
model_name_or_path="CohereForAI/aya-expanse-8b")
|
|
|
42 |
process_evaluation(args, model_name=model, eval_data_list_dict=eval_dataset)
|
43 |
|
44 |
# 2.
|
ml/eval/generate_sanity_check.py
CHANGED
@@ -45,7 +45,7 @@ ref_model = AutoModelForCausalLM.from_pretrained(
|
|
45 |
).to("cuda")
|
46 |
print(f'loaded reference model')
|
47 |
|
48 |
-
# load a
|
49 |
ref_tokenizer = AutoTokenizer.from_pretrained(
|
50 |
ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
|
51 |
)
|
|
|
45 |
).to("cuda")
|
46 |
print(f'loaded reference model')
|
47 |
|
48 |
+
# load a tokenizer
|
49 |
ref_tokenizer = AutoTokenizer.from_pretrained(
|
50 |
ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
|
51 |
)
|