test llama_factory

This commit is contained in:
2024-12-29 10:25:38 +00:00
parent fd4d12d88c
commit 5b5fcda5e5
263 changed files with 88555 additions and 0 deletions
@@ -0,0 +1,161 @@
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets
import pandas as pd
_CITATION = """\
@article{huang2023ceval,
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
journal={arXiv preprint arXiv:2305.08322},
year={2023}
}
"""
_DESCRIPTION = """\
C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
"""
_HOMEPAGE = "https://cevalbenchmark.com"
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
_URL = "ceval.zip"
task_list = [
"computer_network",
"operating_system",
"computer_architecture",
"college_programming",
"college_physics",
"college_chemistry",
"advanced_mathematics",
"probability_and_statistics",
"discrete_mathematics",
"electrical_engineer",
"metrology_engineer",
"high_school_mathematics",
"high_school_physics",
"high_school_chemistry",
"high_school_biology",
"middle_school_mathematics",
"middle_school_biology",
"middle_school_physics",
"middle_school_chemistry",
"veterinary_medicine",
"college_economics",
"business_administration",
"marxism",
"mao_zedong_thought",
"education_science",
"teacher_qualification",
"high_school_politics",
"high_school_geography",
"middle_school_politics",
"middle_school_geography",
"modern_chinese_history",
"ideological_and_moral_cultivation",
"logic",
"law",
"chinese_language_and_literature",
"art_studies",
"professional_tour_guide",
"legal_professional",
"high_school_chinese",
"high_school_history",
"middle_school_history",
"civil_servant",
"sports_science",
"plant_protection",
"basic_medicine",
"clinical_medicine",
"urban_and_rural_planner",
"accountant",
"fire_engineer",
"environmental_impact_assessment_engineer",
"tax_accountant",
"physician",
]
class CevalConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class Ceval(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CevalConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"id": datasets.Value("int32"),
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
"explanation": datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
"filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath, encoding="utf-8")
for i, instance in enumerate(df.to_dict(orient="records")):
if "answer" not in instance.keys():
instance["answer"] = ""
if "explanation" not in instance.keys():
instance["explanation"] = ""
yield i, instance
@@ -0,0 +1,210 @@
{
"accountant": {
"name": "注册会计师",
"category": "Other"
},
"advanced_mathematics": {
"name": "高等数学",
"category": "STEM"
},
"art_studies": {
"name": "艺术学",
"category": "Humanities"
},
"basic_medicine": {
"name": "基础医学",
"category": "Other"
},
"business_administration": {
"name": "工商管理",
"category": "Social Sciences"
},
"chinese_language_and_literature": {
"name": "中国语言文学",
"category": "Humanities"
},
"civil_servant": {
"name": "公务员",
"category": "Other"
},
"clinical_medicine": {
"name": "临床医学",
"category": "Other"
},
"college_chemistry": {
"name": "大学化学",
"category": "STEM"
},
"college_economics": {
"name": "大学经济学",
"category": "Social Sciences"
},
"college_physics": {
"name": "大学物理",
"category": "STEM"
},
"college_programming": {
"name": "大学编程",
"category": "STEM"
},
"computer_architecture": {
"name": "计算机组成",
"category": "STEM"
},
"computer_network": {
"name": "计算机网络",
"category": "STEM"
},
"discrete_mathematics": {
"name": "离散数学",
"category": "STEM"
},
"education_science": {
"name": "教育学",
"category": "Social Sciences"
},
"electrical_engineer": {
"name": "注册电气工程师",
"category": "STEM"
},
"environmental_impact_assessment_engineer": {
"name": "环境影响评价工程师",
"category": "Other"
},
"fire_engineer": {
"name": "注册消防工程师",
"category": "Other"
},
"high_school_biology": {
"name": "高中生物",
"category": "STEM"
},
"high_school_chemistry": {
"name": "高中化学",
"category": "STEM"
},
"high_school_chinese": {
"name": "高中语文",
"category": "Humanities"
},
"high_school_geography": {
"name": "高中地理",
"category": "Social Sciences"
},
"high_school_history": {
"name": "高中历史",
"category": "Humanities"
},
"high_school_mathematics": {
"name": "高中数学",
"category": "STEM"
},
"high_school_physics": {
"name": "高中物理",
"category": "STEM"
},
"high_school_politics": {
"name": "高中政治",
"category": "Social Sciences"
},
"ideological_and_moral_cultivation": {
"name": "思想道德修养与法律基础",
"category": "Humanities"
},
"law": {
"name": "法学",
"category": "Humanities"
},
"legal_professional": {
"name": "法律职业资格",
"category": "Humanities"
},
"logic": {
"name": "逻辑学",
"category": "Humanities"
},
"mao_zedong_thought": {
"name": "毛泽东思想和中国特色社会主义理论体系概论",
"category": "Social Sciences"
},
"marxism": {
"name": "马克思主义基本原理",
"category": "Social Sciences"
},
"metrology_engineer": {
"name": "注册计量师",
"category": "STEM"
},
"middle_school_biology": {
"name": "初中生物",
"category": "STEM"
},
"middle_school_chemistry": {
"name": "初中化学",
"category": "STEM"
},
"middle_school_geography": {
"name": "初中地理",
"category": "Social Sciences"
},
"middle_school_history": {
"name": "初中历史",
"category": "Humanities"
},
"middle_school_mathematics": {
"name": "初中数学",
"category": "STEM"
},
"middle_school_physics": {
"name": "初中物理",
"category": "STEM"
},
"middle_school_politics": {
"name": "初中政治",
"category": "Social Sciences"
},
"modern_chinese_history": {
"name": "近代史纲要",
"category": "Humanities"
},
"operating_system": {
"name": "操作系统",
"category": "STEM"
},
"physician": {
"name": "医师资格",
"category": "Other"
},
"plant_protection": {
"name": "植物保护",
"category": "Other"
},
"probability_and_statistics": {
"name": "概率统计",
"category": "STEM"
},
"professional_tour_guide": {
"name": "导游资格",
"category": "Humanities"
},
"sports_science": {
"name": "体育学",
"category": "Other"
},
"tax_accountant": {
"name": "税务师",
"category": "Other"
},
"teacher_qualification": {
"name": "教师资格",
"category": "Social Sciences"
},
"urban_and_rural_planner": {
"name": "注册城乡规划师",
"category": "Other"
},
"veterinary_medicine": {
"name": "兽医学",
"category": "STEM"
}
}
@@ -0,0 +1,168 @@
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets
import pandas as pd
_CITATION = """\
@article{li2023cmmlu,
title={CMMLU: Measuring massive multitask language understanding in Chinese},
author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
journal={arXiv preprint arXiv:2306.09212},
year={2023}
}
"""
_DESCRIPTION = """\
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.
"""
_HOMEPAGE = "https://github.com/haonan-li/CMMLU"
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
_URL = "cmmlu.zip"
task_list = [
"agronomy",
"anatomy",
"ancient_chinese",
"arts",
"astronomy",
"business_ethics",
"chinese_civil_service_exam",
"chinese_driving_rule",
"chinese_food_culture",
"chinese_foreign_policy",
"chinese_history",
"chinese_literature",
"chinese_teacher_qualification",
"clinical_knowledge",
"college_actuarial_science",
"college_education",
"college_engineering_hydrology",
"college_law",
"college_mathematics",
"college_medical_statistics",
"college_medicine",
"computer_science",
"computer_security",
"conceptual_physics",
"construction_project_management",
"economics",
"education",
"electrical_engineering",
"elementary_chinese",
"elementary_commonsense",
"elementary_information_and_technology",
"elementary_mathematics",
"ethnology",
"food_science",
"genetics",
"global_facts",
"high_school_biology",
"high_school_chemistry",
"high_school_geography",
"high_school_mathematics",
"high_school_physics",
"high_school_politics",
"human_sexuality",
"international_law",
"journalism",
"jurisprudence",
"legal_and_moral_basis",
"logical",
"machine_learning",
"management",
"marketing",
"marxist_theory",
"modern_chinese",
"nutrition",
"philosophy",
"professional_accounting",
"professional_law",
"professional_medicine",
"professional_psychology",
"public_relations",
"security_study",
"sociology",
"sports_science",
"traditional_chinese_medicine",
"virology",
"world_history",
"world_religions",
]
class CMMLUConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.1"), **kwargs)
class CMMLU(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CMMLUConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
for i, instance in enumerate(df.to_dict(orient="records")):
question = instance.pop("Question", "")
answer = instance.pop("Answer", "")
instance["question"] = question
instance["answer"] = answer
yield i, instance
@@ -0,0 +1,270 @@
{
"agronomy": {
"name": "农学",
"category": "Other"
},
"anatomy": {
"name": "解剖学",
"category": "STEM"
},
"ancient_chinese": {
"name": "古汉语",
"category": "Social Sciences"
},
"arts": {
"name": "艺术学",
"category": "Humanities"
},
"astronomy": {
"name": "天文学",
"category": "STEM"
},
"business_ethics": {
"name": "商业伦理",
"category": "Social Sciences"
},
"chinese_civil_service_exam": {
"name": "中国公务员考试",
"category": "Social Sciences"
},
"chinese_driving_rule": {
"name": "中国驾驶规则",
"category": "Other"
},
"chinese_food_culture": {
"name": "中国饮食文化",
"category": "Social Sciences"
},
"chinese_foreign_policy": {
"name": "中国外交政策",
"category": "Social Sciences"
},
"chinese_history": {
"name": "中国历史",
"category": "Humanities"
},
"chinese_literature": {
"name": "中国文学",
"category": "Humanities"
},
"chinese_teacher_qualification": {
"name": "中国教师资格",
"category": "Social Sciences"
},
"college_actuarial_science": {
"name": "大学精算学",
"category": "STEM"
},
"college_education": {
"name": "大学教育学",
"category": "Social Sciences"
},
"college_engineering_hydrology": {
"name": "大学工程水文学",
"category": "STEM"
},
"college_law": {
"name": "大学法律",
"category": "Humanities"
},
"college_mathematics": {
"name": "大学数学",
"category": "STEM"
},
"college_medical_statistics": {
"name": "大学医学统计",
"category": "STEM"
},
"clinical_knowledge": {
"name": "临床知识",
"category": "Other"
},
"college_medicine": {
"name": "大学医学",
"category": "Other"
},
"computer_science": {
"name": "计算机科学",
"category": "STEM"
},
"computer_security": {
"name": "计算机安全",
"category": "Other"
},
"conceptual_physics": {
"name": "概念物理学",
"category": "STEM"
},
"construction_project_management": {
"name": "建设工程管理",
"category": "Other"
},
"economics": {
"name": "经济学",
"category": "Social Sciences"
},
"education": {
"name": "教育学",
"category": "Social Sciences"
},
"elementary_chinese": {
"name": "小学语文",
"category": "Social Sciences"
},
"elementary_commonsense": {
"name": "小学常识",
"category": "Other"
},
"elementary_information_and_technology": {
"name": "小学信息技术",
"category": "Other"
},
"electrical_engineering": {
"name": "电气工程",
"category": "STEM"
},
"elementary_mathematics": {
"name": "初等数学",
"category": "STEM"
},
"ethnology": {
"name": "民族学",
"category": "Social Sciences"
},
"food_science": {
"name": "食品科学",
"category": "Other"
},
"genetics": {
"name": "遗传学",
"category": "STEM"
},
"global_facts": {
"name": "全球事实",
"category": "Humanities"
},
"high_school_biology": {
"name": "高中生物",
"category": "STEM"
},
"high_school_chemistry": {
"name": "高中化学",
"category": "STEM"
},
"high_school_geography": {
"name": "高中地理",
"category": "Social Sciences"
},
"high_school_mathematics": {
"name": "高中数学",
"category": "STEM"
},
"high_school_physics": {
"name": "高中物理学",
"category": "STEM"
},
"high_school_politics": {
"name": "高中政治",
"category": "Social Sciences"
},
"human_sexuality": {
"name": "人类性行为",
"category": "Other"
},
"international_law": {
"name": "国际法学",
"category": "Humanities"
},
"journalism": {
"name": "新闻学",
"category": "Social Sciences"
},
"jurisprudence": {
"name": "法理学",
"category": "Humanities"
},
"legal_and_moral_basis": {
"name": "法律与道德基础",
"category": "Other"
},
"logical": {
"name": "逻辑学",
"category": "Humanities"
},
"machine_learning": {
"name": "机器学习",
"category": "STEM"
},
"management": {
"name": "管理学",
"category": "Social Sciences"
},
"marketing": {
"name": "市场营销",
"category": "Social Sciences"
},
"marxist_theory": {
"name": "马克思主义理论",
"category": "Humanities"
},
"modern_chinese": {
"name": "现代汉语",
"category": "Social Sciences"
},
"nutrition": {
"name": "营养学",
"category": "Other"
},
"philosophy": {
"name": "哲学",
"category": "Humanities"
},
"professional_accounting": {
"name": "专业会计",
"category": "Social Sciences"
},
"professional_law": {
"name": "专业法学",
"category": "Humanities"
},
"professional_medicine": {
"name": "专业医学",
"category": "Other"
},
"professional_psychology": {
"name": "专业心理学",
"category": "Social Sciences"
},
"public_relations": {
"name": "公共关系",
"category": "Social Sciences"
},
"security_study": {
"name": "安全研究",
"category": "Social Sciences"
},
"sociology": {
"name": "社会学",
"category": "Social Sciences"
},
"sports_science": {
"name": "体育学",
"category": "Other"
},
"traditional_chinese_medicine": {
"name": "中医中药",
"category": "Other"
},
"virology": {
"name": "病毒学",
"category": "STEM"
},
"world_history": {
"name": "世界历史",
"category": "Humanities"
},
"world_religions": {
"name": "世界宗教",
"category": "Humanities"
}
}
@@ -0,0 +1,230 @@
{
"abstract_algebra": {
"name": "abstract algebra",
"category": "STEM"
},
"anatomy": {
"name": "anatomy",
"category": "Other"
},
"astronomy": {
"name": "astronomy",
"category": "STEM"
},
"business_ethics": {
"name": "business ethics",
"category": "Other"
},
"clinical_knowledge": {
"name": "clinical knowledge",
"category": "Other"
},
"college_biology": {
"name": "college biology",
"category": "STEM"
},
"college_chemistry": {
"name": "college chemistry",
"category": "STEM"
},
"college_computer_science": {
"name": "college computer science",
"category": "STEM"
},
"college_mathematics": {
"name": "college mathematics",
"category": "STEM"
},
"college_medicine": {
"name": "college medicine",
"category": "Other"
},
"college_physics": {
"name": "college physics",
"category": "STEM"
},
"computer_security": {
"name": "computer security",
"category": "STEM"
},
"conceptual_physics": {
"name": "conceptual physics",
"category": "STEM"
},
"econometrics": {
"name": "econometrics",
"category": "Social Sciences"
},
"electrical_engineering": {
"name": "electrical engineering",
"category": "STEM"
},
"elementary_mathematics": {
"name": "elementary mathematics",
"category": "STEM"
},
"formal_logic": {
"name": "formal logic",
"category": "Humanities"
},
"global_facts": {
"name": "global facts",
"category": "Other"
},
"high_school_biology": {
"name": "high school biology",
"category": "STEM"
},
"high_school_chemistry": {
"name": "high school chemistry",
"category": "STEM"
},
"high_school_computer_science": {
"name": "high school computer science",
"category": "STEM"
},
"high_school_european_history": {
"name": "high school european history",
"category": "Humanities"
},
"high_school_geography": {
"name": "high school geography",
"category": "Social Sciences"
},
"high_school_government_and_politics": {
"name": "high school government and politics",
"category": "Social Sciences"
},
"high_school_macroeconomics": {
"name": "high school macroeconomics",
"category": "Social Sciences"
},
"high_school_mathematics": {
"name": "high school mathematics",
"category": "STEM"
},
"high_school_microeconomics": {
"name": "high school microeconomics",
"category": "Social Sciences"
},
"high_school_physics": {
"name": "high school physics",
"category": "STEM"
},
"high_school_psychology": {
"name": "high school psychology",
"category": "Social Sciences"
},
"high_school_statistics": {
"name": "high school statistics",
"category": "STEM"
},
"high_school_us_history": {
"name": "high school us history",
"category": "Humanities"
},
"high_school_world_history": {
"name": "high school world history",
"category": "Humanities"
},
"human_aging": {
"name": "human aging",
"category": "Other"
},
"human_sexuality": {
"name": "human sexuality",
"category": "Social Sciences"
},
"international_law": {
"name": "international law",
"category": "Humanities"
},
"jurisprudence": {
"name": "jurisprudence",
"category": "Humanities"
},
"logical_fallacies": {
"name": "logical fallacies",
"category": "Humanities"
},
"machine_learning": {
"name": "machine learning",
"category": "STEM"
},
"management": {
"name": "management",
"category": "Other"
},
"marketing": {
"name": "marketing",
"category": "Other"
},
"medical_genetics": {
"name": "medical genetics",
"category": "Other"
},
"miscellaneous": {
"name": "miscellaneous",
"category": "Other"
},
"moral_disputes": {
"name": "moral disputes",
"category": "Humanities"
},
"moral_scenarios": {
"name": "moral scenarios",
"category": "Humanities"
},
"nutrition": {
"name": "nutrition",
"category": "Other"
},
"philosophy": {
"name": "philosophy",
"category": "Humanities"
},
"prehistory": {
"name": "prehistory",
"category": "Humanities"
},
"professional_accounting": {
"name": "professional accounting",
"category": "Other"
},
"professional_law": {
"name": "professional law",
"category": "Humanities"
},
"professional_medicine": {
"name": "professional medicine",
"category": "Other"
},
"professional_psychology": {
"name": "professional psychology",
"category": "Social Sciences"
},
"public_relations": {
"name": "public relations",
"category": "Social Sciences"
},
"security_studies": {
"name": "security studies",
"category": "Social Sciences"
},
"sociology": {
"name": "sociology",
"category": "Social Sciences"
},
"us_foreign_policy": {
"name": "us foreign policy",
"category": "Social Sciences"
},
"virology": {
"name": "virology",
"category": "Other"
},
"world_religions": {
"name": "world religions",
"category": "Humanities"
}
}
@@ -0,0 +1,161 @@
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets
import pandas as pd
_CITATION = """\
@article{hendryckstest2021,
title={Measuring Massive Multitask Language Understanding},
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
journal={Proceedings of the International Conference on Learning Representations (ICLR)},
year={2021}
}
"""
_DESCRIPTION = """\
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
"""
_HOMEPAGE = "https://github.com/hendrycks/test"
_LICENSE = "MIT"
_URL = "mmlu.zip"
task_list = [
"high_school_european_history",
"business_ethics",
"clinical_knowledge",
"medical_genetics",
"high_school_us_history",
"high_school_physics",
"high_school_world_history",
"virology",
"high_school_microeconomics",
"econometrics",
"college_computer_science",
"high_school_biology",
"abstract_algebra",
"professional_accounting",
"philosophy",
"professional_medicine",
"nutrition",
"global_facts",
"machine_learning",
"security_studies",
"public_relations",
"professional_psychology",
"prehistory",
"anatomy",
"human_sexuality",
"college_medicine",
"high_school_government_and_politics",
"college_chemistry",
"logical_fallacies",
"high_school_geography",
"elementary_mathematics",
"human_aging",
"college_mathematics",
"high_school_psychology",
"formal_logic",
"high_school_statistics",
"international_law",
"high_school_mathematics",
"high_school_computer_science",
"conceptual_physics",
"miscellaneous",
"high_school_chemistry",
"marketing",
"professional_law",
"management",
"college_physics",
"jurisprudence",
"world_religions",
"sociology",
"us_foreign_policy",
"high_school_macroeconomics",
"computer_security",
"moral_scenarios",
"moral_disputes",
"electrical_engineering",
"astronomy",
"college_biology",
]
class MMLUConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class MMLU(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
MMLUConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath, header=None)
df.columns = ["question", "A", "B", "C", "D", "answer"]
yield from enumerate(df.to_dict(orient="records"))
Binary file not shown.