Riddhi Bhagwat commited on
Commit
4b82d89
·
1 Parent(s): 5df30d7

minor changes for debugging

Browse files
ml/eval/evaluate_arguments.py CHANGED
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
3
  @dataclass
4
  class EvalArguments:
5
  model_name_or_path: str = field(
6
- default="mistralai/Mistral-7B-v0.1", metadata={"help": "Name to a huggingface native pretrained model or path to a model on disk."})
7
  model_pretrained_lora_weights: str = field(
8
  default=None, metadata={"help": "Path to a checkpoint directory."})
9
  output_filepath: str = field(
 
3
  @dataclass
4
  class EvalArguments:
5
  model_name_or_path: str = field(
6
+ default="CohereForAI/aya-expanse-8b", metadata={"help": "Name to a huggingface native pretrained model or path to a model on disk."})
7
  model_pretrained_lora_weights: str = field(
8
  default=None, metadata={"help": "Path to a checkpoint directory."})
9
  output_filepath: str = field(
ml/eval/evaluation_pipeline.py CHANGED
@@ -5,6 +5,8 @@ from reward_eval import process_evaluation
5
  from generate import generate_files
6
  from alpaca import alpaca_evaluator
7
  from bt import bradley_terry_comparison, save_results, print_metrics
 
 
8
 
9
  ##################
10
  # M-REWARD BENCH #
@@ -30,14 +32,13 @@ def evaluator_master_fn(eval_dataset: list[dict],
30
  model="CohereForAI/aya-23-8B"):
31
 
32
  # 1. Reward score evaluation:
33
- args = {
34
- 'bfloat16': False,
35
- 'reward_output_fmt': '1-0',
36
- 'apply_sigmoid_to_reward': False,
37
- 'per_device_batch_size': 8,
38
- 'output_filepath': reward_output_filepath + '.json',
39
- 'result_filename': None,
40
- }
41
  process_evaluation(args, model_name=model, eval_data_list_dict=eval_dataset)
42
 
43
  # 2.
 
5
  from generate import generate_files
6
  from alpaca import alpaca_evaluator
7
  from bt import bradley_terry_comparison, save_results, print_metrics
8
+ from evaluate_arguments import EvalArguments
9
+
10
 
11
  ##################
12
  # M-REWARD BENCH #
 
32
  model="CohereForAI/aya-23-8B"):
33
 
34
  # 1. Reward score evaluation:
35
+ args = EvalArguments(bfloat16=True,
36
+ reward_output_fmt='1-0',
37
+ apply_sigmoid_to_reward=False,
38
+ per_device_batch_size=8,
39
+ output_filepath= '/path/to/your/data.json',
40
+ result_filename=None,
41
+ model_name_or_path="CohereForAI/aya-expanse-8b")
 
42
  process_evaluation(args, model_name=model, eval_data_list_dict=eval_dataset)
43
 
44
  # 2.
ml/eval/generate_sanity_check.py CHANGED
@@ -45,7 +45,7 @@ ref_model = AutoModelForCausalLM.from_pretrained(
45
  ).to("cuda")
46
  print(f'loaded reference model')
47
 
48
- # load a tokenaizer
49
  ref_tokenizer = AutoTokenizer.from_pretrained(
50
  ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
51
  )
 
45
  ).to("cuda")
46
  print(f'loaded reference model')
47
 
48
+ # load a tokenizer
49
  ref_tokenizer = AutoTokenizer.from_pretrained(
50
  ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
51
  )