hanjr
/
mind7b-eval

 
			
			   
				 
					
						
						
							
							import os
import sys
import json
from model_url import get_model_resp, get_url_tokenizer, process_inputstr
import pandas as pd

def run_predict(url, log_path, few_shot = True):
    import numpy as np
    tokenizer = get_url_tokenizer()
    MAIN_DIR = os.path.dirname(os.path.abspath(__file__))
    id_label = {0: "A", 1: "B", 2: "C", 3: "D"}

    File_Dir = MAIN_DIR + "/task_dataset/mmlu"
    file_dir = File_Dir + "/test"
    dirs = [File_Dir+"/test/"+dir for dir in os.listdir(file_dir) if '.csv' in dir.lower()]
    dirs = sorted(dirs)
    total = 0
    results,  acc_all = {}, {}
    for file_dir in dirs:
        count = 0
        correct_num = 0
        acc = 0
        class_val = file_dir.split('/')[-1][:-9]
        # print(f"\nclass_val: {class_val}")
        pre = ""
        if few_shot:
            dev_file = File_Dir + "/dev/"+ class_val + "_dev.csv"
            dev_data = pd.read_csv(dev_file)
            dev_index = [0, 1, 2, 3]
            sample_index = np.random.choice(dev_index, size=4, replace=False)

            example = f"The following is single choice question (with answers) about {class_val}。\n"
            for i in sample_index:
                question, A, B, C, D, answer = dev_data.iloc[i]
                pre += f"{example}{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
                # choice = {
                #     "A": A,
                #     "B": B,
                #     "C": C,
                #     "D": D,
                # }
                # choice = f"{choice[answer]}"
                pre = f"{pre}{answer}\n"

        data = pd.read_csv(file_dir)
        columns = data.columns
        info = {}

        for idx in range(data.count()[columns[0]]):
            count += 1
            example = f"The following are single choice questions (with answers) about {class_val}。\n"
            question, A, B, C, D, answers = data.iloc[idx]
            example = f"{pre}{example}{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
            input_str_one = f"A"
            input_str_two = f"B"
            input_str_thr = f"C"
            input_str_fou = f"D"

            input_str_one, example1 = process_inputstr(input_str_one, example, tokenizer)
            input_str_two, example2 = process_inputstr(input_str_two, example, tokenizer)
            input_str_thr, example3 = process_inputstr(input_str_thr, example, tokenizer)
            input_str_fou, example4 = process_inputstr(input_str_fou, example, tokenizer)

            input_str = []
            input_str.append(input_str_one)
            input_str.append(input_str_two)
            input_str.append(input_str_thr)
            input_str.append(input_str_fou)

            example_list = []
            example_list.append(example1)
            example_list.append(example2)
            example_list.append(example3)
            example_list.append(example4)

            mask_length_list = []
            input_length_list = []
            for pred, example in zip(input_str, example_list):
                input_length_list.append(len(tokenizer.encode(pred)))
                mask_length_list.append(len(tokenizer.encode(example)))
            model_resp = get_model_resp(url=url, input_str=input_str, tokens_to_generate=0, top_k=1, logprobs=True)
            return_resp = []
            for resp_item, input_length, mask_length in zip(model_resp, input_length_list, mask_length_list):
                # assert len(resp_item) == input_length - 1
                item = resp_item[mask_length - 1:input_length - 1]
                return_resp.append(item)

            pred_list = [sum(logprobs) / len(logprobs) for logprobs in return_resp]
            answers_pred = id_label[int(np.argmax(pred_list))]
            info[str(idx)] = answers_pred

            if answers_pred == str(answers):
                correct_num += 1
            acc = correct_num / count * 100
            print("=================== acc ========================")
            print("class_val", class_val, "acc ", acc )

            results[class_val] = info
            acc_all[class_val] = acc
            print("=================== acc ========================")
            print("mmlu fewshot all-acc", acc_all)

    if few_shot:
        with open(log_path + '/mmlu_fewshot_acc.json', 'w', encoding='utf-8') as file:
            file.write(json.dumps(acc_all, ensure_ascii=False))

        with open(log_path + '/mmlu_fewshot.json', 'w', encoding='utf-8') as file:
            file.write(json.dumps(results, ensure_ascii=False))
    else:
        with open(log_path + '/mmlu_zeroshot_acc.json', 'w', encoding='utf-8') as file:
            file.write(json.dumps(acc_all, ensure_ascii=False))

        with open(log_path + '/mmlu_zeroshot.json', 'w', encoding='utf-8') as file:
            file.write(json.dumps(results, ensure_ascii=False))