Skip to content

Commit

Permalink
[Update] Update LiveMathBench Hard Configs (#1826)
Browse files Browse the repository at this point in the history
* support G-Pass@k and livemathbench

* fix bugs

* fix comments of GPassKEvaluator

* update saved details of GPassKEvaluator

* update saved details of GPassKEvaluator

* fix eval api configs & update openai_api for ease of debugging

* update huggingface path

* fix method name of G-Pass@k

* fix default value of eval_model_name

* refactor G-Pass@k evaluator

* log generation params for each backend

* fix evaluation resume

* add notimplementerror

* update livemathbench-hard configs

* remove max_out_len from livemathbench_hard_greedy_gen_9befbf.py

* remove max_out_len from livemathbench_hard_gen_9befbf.py

* rename livemathbench_hard_gen_9befbf.py to livemathbench_hard_gen_353ae7.py

* rename livemathbench_hard_greedy_gen_9befbf.py to livemathbench_hard_greedy_gen_353ae7.py

* update livemathbench_gen_9befbf.py

* remove whitespace

* upload livemathbench hard configs
  • Loading branch information
jnanliu authored Feb 25, 2025
1 parent 465e93e commit 22a33d8
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@
)
)
)
livemathbench_datasets = [livemathbench_dataset]
livemathbench_datasets = [livemathbench_dataset]

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .livemathbench_greedy_gen_efb20d import livemathbench_datasets # noqa: F401, F403
from .livemathbench_greedy_gen_9befbf import livemathbench_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer

from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator


livemathbench_dataset = dict(
type=LiveMathBenchDataset,
path='',
k=16,
replication=3,
dataset_splits=['hard'],
dataset_languages=['cn', 'en'],
cot=True,
version='202412',
abbr='LiveMathBench-v202412-Hard',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answer'
),
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{prompt}'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer
),
),
eval_cfg=dict(
evaluator=dict(
type=LiveMathBenchEvaluator,
model_name='',
url=[],
use_extract_model=False,
extract_url=[],
extract_model_name='',
k=[4, 8, 16],
replication=3,
thresholds=[0.0, 0.25, 0.5, 0.75, 1.0]
)
)
)
livemathbench_datasets = [livemathbench_dataset]
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer

from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator


livemathbench_dataset = dict(
type=LiveMathBenchDataset,
path='',
k=1,
replication=1,
dataset_splits=['hard'],
dataset_languages=['cn', 'en'],
cot=True,
version='202412',
abbr='LiveMathBench-v202412-Hard',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answer'
),
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{prompt}'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer
),
),
eval_cfg=dict(
evaluator=dict(
type=LiveMathBenchEvaluator,
model_name='',
url=[],
use_extract_model=False,
extract_url=[],
extract_model_name='',
k=[1],
replication=1,
thresholds=[0.0]
)
)
)
livemathbench_datasets = [livemathbench_dataset]
2 changes: 1 addition & 1 deletion opencompass/datasets/livemathbench/livemathbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def load(path: str,
if path != '':
path = get_data_path(path)
path = os.path.join(path, version)

for split, language in product(dataset_splits, dataset_languages):
dataset_info[f'{split}_{language}'] = {
'single-choice': 0,
Expand All @@ -64,7 +65,6 @@ def load(path: str,

if path != '':
file_path = os.path.join(path, f'{split}_{language}.jsonl')

if not os.path.exists(file_path):
raise FileNotFoundError(
f'File {file_path} does not exist, please check the '
Expand Down
2 changes: 0 additions & 2 deletions opencompass/models/turbomind_with_tf_above_v4_33.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,6 @@ def generate(self,
self.logger.info('Generation Config of LMdeploy: ')
self.logger.info(gen_config)



results = []
outputs = self.pipe(messages, gen_config=gen_config, do_preprocess=False)
for output in outputs:
Expand Down

0 comments on commit 22a33d8

Please sign in to comment.