hanjr
/
mind7b-eval

 
			
							import os
import sys
import json
from model_url import get_model_resp, get_url_tokenizer, process_inputstr, json_out_stream
import pandas as pd


agieval_single_choice_sets = [
    'gaokao-chinese',
    'gaokao-english',
    'gaokao-geography',
    'gaokao-history',
    'gaokao-biology',
    'gaokao-chemistry',
    'gaokao-physics',
    'gaokao-mathqa',
    'logiqa-zh',
    'lsat-ar',
    'lsat-lr',
    'lsat-rc',
    'logiqa-en',
    'sat-math',
    'sat-en',
    'sat-en-without-passage',
    'aqua-rat',
]
agieval_multiple_choices_sets = [
    'jec-qa-kd',
    'jec-qa-ca',
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_chinese_sets = [
    'gaokao-chinese',
    'gaokao-english',
    'gaokao-geography',
    'gaokao-history',
    'gaokao-biology',
    'gaokao-chemistry',
    'gaokao-physics',
    'gaokao-mathqa',
    'logiqa-zh',
    'gaokao-mathcloze',
    'jec-qa-kd',
    'jec-qa-ca'
]
agieval_english_sets = [
    'lsat-ar',
    'lsat-lr',
    'lsat-rc',
    'logiqa-en',
    'sat-math',
    'sat-en',
    'sat-en-without-passage',
    'aqua-rat',
    'math',
]
agieval_gaokao_sets = [
    'gaokao-chinese',
    'gaokao-english',
    'gaokao-geography',
    'gaokao-history',
    'gaokao-biology',
    'gaokao-chemistry',
    'gaokao-physics',
    'gaokao-mathqa',
]

_intro = {
    'gaokao-chinese':
        '以下是一道中国高考语文选择题，请选择正确的答案。',
    'gaokao-english':
        '以下是一道中国高考英语选择题，请选择正确的答案。',
    'gaokao-geography':
        '以下是一道中国高考地理选择题，请选择正确的答案。',
    'gaokao-history':
        '以下是一道中国高考历史选择题，请选择正确的答案。',
    'gaokao-biology':
        '以下是一道中国高考生物选择题，请选择正确的答案。',
    'gaokao-chemistry':
        '以下是一道中国高考化学选择题，请选择正确的答案。',
    'gaokao-physics':
        '以下是一道中国高考物理选择题，请选择正确的答案。',
    'gaokao-mathqa':
        '以下是一道中国高考数学选择题，请选择正确的答案。',
    'logiqa-zh':
        '以下是一道中国公务员考试题，请选择正确的答案。',
    'lsat-ar':
        'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
    'lsat-lr':
        'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
    'lsat-rc':
        'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
    'logiqa-en':
        'The following is a Logic Reasoning question. Please select the correct answer.',
    'sat-math':
        'The following is a SAT Math question. Please select the correct answer.',
    'sat-en':
        'The following is a SAT English question. Please select the correct answer.',
    'sat-en-without-passage':
        'The following is a SAT English question. Please select the correct answer.',
    'aqua-rat':
        'The following is a AQUA-RAT question. Please select the correct answer.',
    'jec-qa-kd':
        '以下是一道中国司法考试基础知识题，请选择正确的答案。',
    'jec-qa-ca':
        '以下是一道中国司法考试案例分析题，请选择正确的答案。',
    'gaokao-mathcloze':
        '以下是一道中国高考数学填空题，请填入正确的答案。',
    'math':
        'The following is a Math question. Please select the correct answer.',
}


def read_data(file):
    data = []
    with open(file, encoding='utf-8') as f:
        _data = [json.loads(line.strip()) for line in f]
        for _d in _data:
            passage = _d['passage'] if _d['passage'] else ''
            question = passage + _d['question']
            options = '\n'.join(_d['options']) if _d['options'] else ''
            label = _d['label'] if _d['label'] else _d['answer']
            d = {'question': question, 'options': options, 'label': label}
            data.append(d)
        del _data
    return data
def run_predict(url, log_path, few_shot = True):
    import numpy as np
    tokenizer = get_url_tokenizer()

    id_label = {0: "A", 1: "B", 2: "C", 3: "D"}

    File_Dir = "task_dataset/AGIEval/data/v1"
    results_zh = []
    results_en = []
    for _name in agieval_single_choice_sets:
        if _name in ['lsat-ar', 'lsat-lr', 'lsat-rc', 'aqua-rat']:
            _options = ['A', 'B', 'C', 'D', 'E']
        else:
            _options = ['A', 'B', 'C', 'D']

        if _name in agieval_chinese_sets:
            _hint = '答案是：'
            filename = os.path.join(File_Dir, _name + '.jsonl')
            data = read_data(filename)
            for i in data:
                question, options, label = i['question'], i['options'], i["label"]
                pre = _intro[_name]
                example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
                results_zh.append({"text": example, "src": "agieval"})
        else:
            _hint = 'The answer is '

            filename = os.path.join(File_Dir, _name + '.jsonl')
            data = read_data(filename)
            for i in data:
                question, options, label = i['question'], i['options'], i["label"]
                pre = _intro[_name]
                example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
                results_en.append({"text":example, "src":"agieval"})

    for _name in agieval_multiple_choices_sets:
        if _name in agieval_chinese_sets:
            _hint = '答案是： '
            filename = os.path.join(File_Dir, _name + '.jsonl')
            data = read_data(filename)
            for i in data:
                question, options, label = i['question'], i['options'], i["label"]
                pre = _intro[_name]
                example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
                results_zh.append({"text": example, "src": "agieval"})
        else:
            _hint = 'The answer is '
            filename = os.path.join(File_Dir, _name + '.jsonl')
            data = read_data(filename)
            for i in data:
                question, options, label = i['question'], i['options'], i["label"]
                pre = _intro[_name]
                example = f"{pre}\n{question}\n{options}\n{_hint}{label}"
                results_en.append({"text":example, "src":"agieval"})

    for _name in agieval_cloze_sets:
        if _name in agieval_chinese_sets:
            _hint = '答案是：'
            filename = os.path.join(File_Dir, _name + '.jsonl')
            data = read_data(filename)
            for i in data:
                question, options, label = i['question'], i['options'], i["label"]
                pre = _intro[_name]
                example = f"{pre}\n{question}\n{_hint}{label}"
                results_zh.append({"text": example, "src": "agieval"})
        else:
            _hint = 'The answer is '
            filename = os.path.join(File_Dir, _name + '.jsonl')
            data = read_data(filename)
            for i in data:
                question, options, label = i['question'], i['options'], i["label"]
                pre = _intro[_name]
                example = f"{pre}\n{question}\n{_hint}{label}"
                results_en.append({"text":example, "src":"agieval"})

    json_out_stream(log_path + '/agieval-zh.json', results_zh)
    json_out_stream(log_path + '/agieval-en.json', results_en)