|
- import os
- import sys
- import json
- from model_url import get_model_resp, get_url_tokenizer, process_inputstr, json_out_stream
- import pandas as pd
-
-
- agieval_single_choice_sets = [
- 'gaokao-chinese',
- 'gaokao-english',
- 'gaokao-geography',
- 'gaokao-history',
- 'gaokao-biology',
- 'gaokao-chemistry',
- 'gaokao-physics',
- 'gaokao-mathqa',
- 'logiqa-zh',
- 'lsat-ar',
- 'lsat-lr',
- 'lsat-rc',
- 'logiqa-en',
- 'sat-math',
- 'sat-en',
- 'sat-en-without-passage',
- 'aqua-rat',
- ]
- agieval_multiple_choices_sets = [
- 'jec-qa-kd',
- 'jec-qa-ca',
- ]
- agieval_cloze_sets = ['gaokao-mathcloze', 'math']
- agieval_chinese_sets = [
- 'gaokao-chinese',
- 'gaokao-english',
- 'gaokao-geography',
- 'gaokao-history',
- 'gaokao-biology',
- 'gaokao-chemistry',
- 'gaokao-physics',
- 'gaokao-mathqa',
- 'logiqa-zh',
- 'gaokao-mathcloze',
- 'jec-qa-kd',
- 'jec-qa-ca'
- ]
- agieval_english_sets = [
- 'lsat-ar',
- 'lsat-lr',
- 'lsat-rc',
- 'logiqa-en',
- 'sat-math',
- 'sat-en',
- 'sat-en-without-passage',
- 'aqua-rat',
- 'math',
- ]
- agieval_gaokao_sets = [
- 'gaokao-chinese',
- 'gaokao-english',
- 'gaokao-geography',
- 'gaokao-history',
- 'gaokao-biology',
- 'gaokao-chemistry',
- 'gaokao-physics',
- 'gaokao-mathqa',
- ]
-
- _intro = {
- 'gaokao-chinese':
- '以下是一道中国高考语文选择题,请选择正确的答案。',
- 'gaokao-english':
- '以下是一道中国高考英语选择题,请选择正确的答案。',
- 'gaokao-geography':
- '以下是一道中国高考地理选择题,请选择正确的答案。',
- 'gaokao-history':
- '以下是一道中国高考历史选择题,请选择正确的答案。',
- 'gaokao-biology':
- '以下是一道中国高考生物选择题,请选择正确的答案。',
- 'gaokao-chemistry':
- '以下是一道中国高考化学选择题,请选择正确的答案。',
- 'gaokao-physics':
- '以下是一道中国高考物理选择题,请选择正确的答案。',
- 'gaokao-mathqa':
- '以下是一道中国高考数学选择题,请选择正确的答案。',
- 'logiqa-zh':
- '以下是一道中国公务员考试题,请选择正确的答案。',
- 'lsat-ar':
- 'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
- 'lsat-lr':
- 'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
- 'lsat-rc':
- 'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
- 'logiqa-en':
- 'The following is a Logic Reasoning question. Please select the correct answer.',
- 'sat-math':
- 'The following is a SAT Math question. Please select the correct answer.',
- 'sat-en':
- 'The following is a SAT English question. Please select the correct answer.',
- 'sat-en-without-passage':
- 'The following is a SAT English question. Please select the correct answer.',
- 'aqua-rat':
- 'The following is a AQUA-RAT question. Please select the correct answer.',
- 'jec-qa-kd':
- '以下是一道中国司法考试基础知识题,请选择正确的答案。',
- 'jec-qa-ca':
- '以下是一道中国司法考试案例分析题,请选择正确的答案。',
- 'gaokao-mathcloze':
- '以下是一道中国高考数学填空题,请填入正确的答案。',
- 'math':
- 'The following is a Math question. Please select the correct answer.',
- }
-
-
- def read_data(file):
- data = []
- with open(file, encoding='utf-8') as f:
- _data = [json.loads(line.strip()) for line in f]
- for _d in _data:
- passage = _d['passage'] if _d['passage'] else ''
- question = passage + _d['question']
- options = '\n'.join(_d['options']) if _d['options'] else ''
- label = _d['label'] if _d['label'] else _d['answer']
- d = {'question': question, 'options': options, 'label': label}
- data.append(d)
- del _data
- return data
- def run_predict(url, log_path, few_shot = True):
- import numpy as np
- tokenizer = get_url_tokenizer()
-
- id_label = {0: "A", 1: "B", 2: "C", 3: "D"}
-
- File_Dir = "task_dataset/AGIEval/data/v1"
- results_zh = []
- results_en = []
- for _name in agieval_single_choice_sets:
- if _name in ['lsat-ar', 'lsat-lr', 'lsat-rc', 'aqua-rat']:
- _options = ['A', 'B', 'C', 'D', 'E']
- else:
- _options = ['A', 'B', 'C', 'D']
-
- if _name in agieval_chinese_sets:
- _hint = '答案是:'
- filename = os.path.join(File_Dir, _name + '.jsonl')
- data = read_data(filename)
- for i in data:
- question, options, label = i['question'], i['options'], i["label"]
- pre = _intro[_name]
- example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
- results_zh.append({"text": example, "src": "agieval"})
- else:
- _hint = 'The answer is '
-
- filename = os.path.join(File_Dir, _name + '.jsonl')
- data = read_data(filename)
- for i in data:
- question, options, label = i['question'], i['options'], i["label"]
- pre = _intro[_name]
- example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
- results_en.append({"text":example, "src":"agieval"})
-
- for _name in agieval_multiple_choices_sets:
- if _name in agieval_chinese_sets:
- _hint = '答案是: '
- filename = os.path.join(File_Dir, _name + '.jsonl')
- data = read_data(filename)
- for i in data:
- question, options, label = i['question'], i['options'], i["label"]
- pre = _intro[_name]
- example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
- results_zh.append({"text": example, "src": "agieval"})
- else:
- _hint = 'The answer is '
- filename = os.path.join(File_Dir, _name + '.jsonl')
- data = read_data(filename)
- for i in data:
- question, options, label = i['question'], i['options'], i["label"]
- pre = _intro[_name]
- example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
- results_en.append({"text":example, "src":"agieval"})
-
- for _name in agieval_cloze_sets:
- if _name in agieval_chinese_sets:
- _hint = '答案是:'
- filename = os.path.join(File_Dir, _name + '.jsonl')
- data = read_data(filename)
- for i in data:
- question, options, label = i['question'], i['options'], i["label"]
- pre = _intro[_name]
- example = f"{pre}\n{question}\n{_hint}{label}"
- results_zh.append({"text": example, "src": "agieval"})
- else:
- _hint = 'The answer is '
- filename = os.path.join(File_Dir, _name + '.jsonl')
- data = read_data(filename)
- for i in data:
- question, options, label = i['question'], i['options'], i["label"]
- pre = _intro[_name]
- example = f"{pre}\n{question}\n{_hint}{label}"
- results_en.append({"text":example, "src":"agieval"})
-
- json_out_stream(log_path + '/agieval-zh.json', results_zh)
- json_out_stream(log_path + '/agieval-en.json', results_en)
|