|
- import json
- import csv
- from culIndex import cal_multi
-
- filePath = "./log-bert-base-uncased-RTE-BAEGarg2019.csv"
- tempPath = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp.json"
- temp1Path = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp1.json"
- temp2Path = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp2.json"
- temp3Path = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp3.json"
- # outputPath = "./JSON/distilbert-base-cased-MRPC-CheckList2020.json"
-
- csvFile = open(filePath, "r")
- # jsonFile = open(outputPath, "w")
- # tempFile = open(tempPath, "w")
-
- reader = csv.reader(csvFile)
-
- Index = ['Index']
- Ori_Sample_TrueLabel = []
- Ori_Sample_PredictLabel = []
- Ori_Sample_Prediction_TOP1 = []
- Ori_Sample = []
- Adv_Sample_PredictLabel = []
- Adv_Sample_Prediction_TOP1 = []
- Adv_Sample = []
- result_type = []
-
- # =======================step1===========================
- i = -1
-
- jsonList = []
-
- for row in reader:
- jsonDict = {}
- if i == -1:
- i += 1
- continue
-
- jsonDict["Index"] = i
- i += 1
-
- if row[8] == "Failed":
- jsonDict["Current_FLAG"] = 0
- elif row[8] == "Successful":
- jsonDict["Current_FLAG"] = 1
- else:
- jsonDict["Current_FLAG"] = 2
-
- jsonDict["Ori_Sample"] = row[4]
-
- if row[0] == "1.0":
- jsonDict["Ori_Sample_TrueLabel"] = 1
- # jsonDict["Ori_Sample_TrueLabel_EN"] = "Similar"
- # jsonDict["Ori_Sample_TrueLabel_EN"] = "Positive"
- jsonDict["Ori_Sample_TrueLabel_EN"] = "Imply"
- # jsonDict["Ori_Sample_TrueLabel_CN"] = "相似"
- # jsonDict["Ori_Sample_TrueLabel_CN"] = "积极"
- jsonDict["Ori_Sample_TrueLabel_CN"] = "蕴含"
- else:
- jsonDict["Ori_Sample_TrueLabel"] = 0
- # jsonDict["Ori_Sample_TrueLabel_EN"] = "Dissimilar"
- # jsonDict["Ori_Sample_TrueLabel_EN"] = "Negative"
- jsonDict["Ori_Sample_TrueLabel_EN"] = "Don'tImply"
- # jsonDict["Ori_Sample_TrueLabel_CN"] = "不相似"
- jsonDict["Ori_Sample_TrueLabel_CN"] = "不蕴含"
-
- top1Dict, top1ENDist, top1CNDist = {}, {}, {}
- if row[2] == "1.0":
- jsonDict["Ori_Sample_PredictLabel"] = 1
- top1Dict["name"] = "1"
- # top1ENDist["name"] = "Similar"
- # top1ENDist["name"] = "Positive"
- top1ENDist["name"] = "Imply"
- # top1CNDist["name"] = "相似"
- # top1CNDist["name"] = "积极"
- top1CNDist["name"] = "蕴含"
- else:
- jsonDict["Ori_Sample_PredictLabel"] = 0
- top1Dict["name"] = "0"
- # top1ENDist["name"] = "Dissimilar"
- # top1ENDist["name"] = "Negative"
- top1ENDist["name"] = "Don'tImply"
- # top1CNDist["name"] = "不相似"
- # top1CNDist["name"] = "消极"
- top1CNDist["name"] = "不蕴含"
- if row[0] == row[2]:
- top1Dict["value"] = 1 - float(row[3])
- else:
- top1Dict["value"] = float(row[3])
- top1ENDist["value"] = top1Dict["value"]
- top1CNDist["value"] = top1Dict["value"]
- jsonDict["Ori_Sample_Prediction_TOP1"] = [top1Dict]
- jsonDict["Ori_Sample_Prediction_TOP1_EN"] = [top1ENDist]
- jsonDict["Ori_Sample_Prediction_TOP1_CN"] = [top1CNDist]
-
- jsonDict["Adv_Sample"] = row[7]
-
- top1Dict, top1ENDist, top1CNDist = {}, {}, {}
- if row[5] == "1.0":
- jsonDict["Adv_Sample_PredictLabel"] = 1
- top1Dict["name"] = "1"
- # top1ENDist["name"] = "Similar"
- # top1ENDist["name"] = "Positive"
- top1ENDist["name"] = "Imply"
- # top1CNDist["name"] = "相似"
- # top1CNDist["name"] = "积极"
- top1CNDist["name"] = "蕴含"
- else:
- jsonDict["Adv_Sample_PredictLabel"] = 0
- top1Dict["name"] = "0"
- # top1ENDist["name"] = "Dissimilar"
- # top1ENDist["name"] = "Negative"
- top1ENDist["name"] = "Don'tImply"
- # top1CNDist["name"] = "不相似"
- # top1CNDist["name"] = "消极"
- top1CNDist["name"] = "不蕴含"
- if row[8] == "Failed":
- top1Dict["value"] = 1 - float(row[6])
- else:
- top1Dict["value"] = float(row[6])
- top1ENDist["value"] = top1Dict["value"]
- top1CNDist["value"] = top1Dict["value"]
- jsonDict["Adv_Sample_Prediction_TOP1"] = [top1Dict]
- jsonDict["Adv_Sample_Prediction_TOP1_EN"] = [top1ENDist]
- jsonDict["Adv_Sample_Prediction_TOP1_CN"] = [top1CNDist]
-
- jsonList.append(jsonDict)
-
- with open(tempPath, 'w', encoding='utf-8') as json_file:
- json.dump(jsonList, json_file, ensure_ascii=False)
- # =======================step1===========================
-
-
- # =======================step2===========================
- # clean_Ori_Sample = []
- #
- # with open(tempPath, 'r', encoding='utf8')as fp:
- # jsonData = json.load(fp)
- # for eachJsonDict in jsonData:
- # Ori_Sample.append(eachJsonDict["Ori_Sample"])
- # Adv_Sample.append(eachJsonDict["Adv_Sample"])
- # for each_Ori_Sample in Ori_Sample:
- # each_Ori_Sample = each_Ori_Sample.replace("[[", "")
- # each_Ori_Sample = each_Ori_Sample.replace("]]", "")
- # clean_Ori_Sample.append(each_Ori_Sample)
- # count = 0
- # for each_Ori_Sample in clean_Ori_Sample:
- # jsonData[count]["Ori_Sample"] = each_Ori_Sample
- # count += 1
- # count = 0
- # for each_Adv_Sample in Adv_Sample:
- # Red_mark_position = []
- # mark_position_start = []
- # mark_position_end = []
- # n = each_Adv_Sample.find("[[")
- # m = each_Adv_Sample.find("]]")
- # while n != -1:
- # mark_position_start.append(n)
- # mark_position_end.append(m)
- # n = each_Adv_Sample.find("[[", n + 1)
- # m = each_Adv_Sample.find("]]", m + 1)
- # each_Adv_Sample = each_Adv_Sample.replace("[[", "")
- # each_Adv_Sample = each_Adv_Sample.replace("]]", "")
- # for i in range(len(mark_position_start)):
- # mark_position_start[i] = mark_position_start[i] - i * 4
- # mark_position_end[i] = mark_position_end[i] - i * 4 - 2
- # for j in range(mark_position_start[i], mark_position_end[i]):
- # Red_mark_position.append(j)
- # jsonData[count]["Red_mark_position"] = Red_mark_position
- # jsonData[count]["Adv_Sample"] = each_Adv_Sample
- # count += 1
- #
- # with open(temp1Path, 'w', encoding='utf-8') as json_file:
- # json.dump(jsonData, json_file, ensure_ascii=False)
- # =======================step2===========================
-
-
- # =======================step3===========================
- # with open(temp1Path, 'r', encoding='utf8')as fp:
- # jsonData = json.load(fp)
- # charList = ["~", "`", "!", "@", "#", "$", "%", "^", "&", "*",
- # "(", ")", "-", "_", "=", "+", "[", "{", "]", "}",
- # ";", ":", ",", "<", ".", ">", "?", "|"]
- # oriLength = len(jsonData)
- # charLength = len(charList)
- # for j in range(1, charLength + 1):
- # for i in range(oriLength):
- # jsonData.append(jsonData[i].copy())
- # jsonData[-1]["Red_mark_position"] = jsonData[-1]["Red_mark_position"].copy()
- # Index = i + j * oriLength
- # sampleLength = len(jsonData[-1]["Adv_Sample"])
- # jsonData[-1]["Index"] = Index
- # jsonData[-1]["Adv_Sample"] = jsonData[-1]["Adv_Sample"] + charList[j - 1]
- # jsonData[-1]["Red_mark_position"].append(sampleLength)
- #
- # startIndex = jsonData[-1]["Index"] - 3
- # for j in range(1, charLength + 1):
- # for i in range(oriLength):
- # jsonData.append(jsonData[i].copy())
- # jsonData[-1]["Red_mark_position"] = jsonData[-1]["Red_mark_position"].copy()
- # Index = i + j * oriLength + startIndex
- # jsonData[-1]["Index"] = Index
- # jsonData[-1]["Adv_Sample"] = charList[j - 1] + jsonData[-1]["Adv_Sample"]
- # if jsonData[-1]["Red_mark_position"]:
- # jsonData[-1]["Red_mark_position"].append(-1)
- # for i in range(len(jsonData[-1]["Red_mark_position"])):
- # jsonData[-1]["Red_mark_position"][i] += 1
- #
- # with open(temp2Path, 'w', encoding='utf-8') as json_file:
- # json.dump(jsonData, json_file, ensure_ascii=False)
- # =======================step3===========================
-
-
- # =======================step4===========================
- # y_true_ori = []
- # y_pred_ori = []
- # y_true_adv = []
- # y_pred_adv = []
- # with open(temp2Path, 'r', encoding='utf8')as fp:
- # jsonData = json.load(fp)
- # TPR = FNR = FPR = TNR = ACC = Prcesion = Recall = F1 = 0.0
- # for i in range(len(jsonData)):
- # if i == 0:
- # y_true_ori.append(jsonData[i]["Ori_Sample_TrueLabel"])
- # y_pred_ori.append(jsonData[i]["Ori_Sample_PredictLabel"])
- # y_true_adv.append(jsonData[i]["Ori_Sample_TrueLabel"])
- # y_pred_adv.append(jsonData[i]["Adv_Sample_PredictLabel"])
- # tempDict = {}
- # tempDict["TPR"] = TPR
- # tempDict["FNR"] = FNR
- # tempDict["FPR"] = FPR
- # tempDict["TNR"] = TNR
- # tempDict["ACC"] = ACC
- # tempDict["Prcesion"] = Prcesion
- # tempDict["Recall"] = Recall
- # tempDict["F1"] = F1
- # jsonData[i]["Ori_Eval"] = tempDict.copy()
- # continue
- # else:
- # y_true_ori.append(jsonData[i]["Ori_Sample_TrueLabel"])
- # y_pred_ori.append(jsonData[i]["Ori_Sample_PredictLabel"])
- # y_true_adv.append(jsonData[i]["Ori_Sample_TrueLabel"])
- # y_pred_adv.append(jsonData[i]["Adv_Sample_PredictLabel"])
- #
- # FPR, FNR, TNR, TPR, ACC, Prcesion, Recall, F1 = cal_multi(y_true_ori, y_pred_ori)
- # tempDict["TPR"] = TPR
- # tempDict["FNR"] = FNR
- # tempDict["FPR"] = FPR
- # tempDict["TNR"] = TNR
- # tempDict["ACC"] = ACC
- # tempDict["Prcesion"] = Prcesion
- # tempDict["Recall"] = Recall
- # tempDict["F1"] = F1
- # jsonData[i]["Ori_Eval"] = tempDict.copy()
- #
- # FPR, FNR, TNR, TPR, ACC, Prcesion, Recall, F1 = cal_multi(y_true_adv, y_pred_adv)
- # tempDict["TPR"] = TPR
- # tempDict["FNR"] = FNR
- # tempDict["FPR"] = FPR
- # tempDict["TNR"] = TNR
- # tempDict["ACC"] = ACC
- # tempDict["Prcesion"] = Prcesion
- # tempDict["Recall"] = Recall
- # tempDict["F1"] = F1
- # jsonData[i]["Adv_Eval"] = tempDict.copy()
- #
- #
- # with open(temp3Path, 'w', encoding='utf-8') as json_file:
- # json.dump(jsonData, json_file, ensure_ascii=False)
- # =======================step4===========================
|