kenan976431
/
TextAttack

 
			
							import json
import csv
from culIndex import cal_multi

filePath = "./log-bert-base-uncased-RTE-BAEGarg2019.csv"
tempPath = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp.json"
temp1Path = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp1.json"
temp2Path = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp2.json"
temp3Path = "./JSON/bert-base-uncased-RTE-BAEGarg2019-temp3.json"
# outputPath = "./JSON/distilbert-base-cased-MRPC-CheckList2020.json"

csvFile = open(filePath, "r")
# jsonFile = open(outputPath, "w")
# tempFile = open(tempPath, "w")

reader = csv.reader(csvFile)

Index = ['Index']
Ori_Sample_TrueLabel = []
Ori_Sample_PredictLabel = []
Ori_Sample_Prediction_TOP1 = []
Ori_Sample = []
Adv_Sample_PredictLabel = []
Adv_Sample_Prediction_TOP1 = []
Adv_Sample = []
result_type = []

# =======================step1===========================
i = -1

jsonList = []

for row in reader:
    jsonDict = {}
    if i == -1:
        i += 1
        continue

    jsonDict["Index"] = i
    i += 1

    if row[8] == "Failed":
        jsonDict["Current_FLAG"] = 0
    elif row[8] == "Successful":
        jsonDict["Current_FLAG"] = 1
    else:
        jsonDict["Current_FLAG"] = 2

    jsonDict["Ori_Sample"] = row[4]

    if row[0] == "1.0":
        jsonDict["Ori_Sample_TrueLabel"] = 1
        # jsonDict["Ori_Sample_TrueLabel_EN"] = "Similar"
        # jsonDict["Ori_Sample_TrueLabel_EN"] = "Positive"
        jsonDict["Ori_Sample_TrueLabel_EN"] = "Imply"
        # jsonDict["Ori_Sample_TrueLabel_CN"] = "相似"
        # jsonDict["Ori_Sample_TrueLabel_CN"] = "积极"
        jsonDict["Ori_Sample_TrueLabel_CN"] = "蕴含"
    else:
        jsonDict["Ori_Sample_TrueLabel"] = 0
        # jsonDict["Ori_Sample_TrueLabel_EN"] = "Dissimilar"
        # jsonDict["Ori_Sample_TrueLabel_EN"] = "Negative"
        jsonDict["Ori_Sample_TrueLabel_EN"] = "Don'tImply"
        # jsonDict["Ori_Sample_TrueLabel_CN"] = "不相似"
        jsonDict["Ori_Sample_TrueLabel_CN"] = "不蕴含"

    top1Dict, top1ENDist, top1CNDist = {}, {}, {}
    if row[2] == "1.0":
        jsonDict["Ori_Sample_PredictLabel"] = 1
        top1Dict["name"] = "1"
        # top1ENDist["name"] = "Similar"
        # top1ENDist["name"] = "Positive"
        top1ENDist["name"] = "Imply"
        # top1CNDist["name"] = "相似"
        # top1CNDist["name"] = "积极"
        top1CNDist["name"] = "蕴含"
    else:
        jsonDict["Ori_Sample_PredictLabel"] = 0
        top1Dict["name"] = "0"
        # top1ENDist["name"] = "Dissimilar"
        # top1ENDist["name"] = "Negative"
        top1ENDist["name"] = "Don'tImply"
        # top1CNDist["name"] = "不相似"
        # top1CNDist["name"] = "消极"
        top1CNDist["name"] = "不蕴含"
    if row[0] == row[2]:
        top1Dict["value"] = 1 - float(row[3])
    else:
        top1Dict["value"] = float(row[3])
    top1ENDist["value"] = top1Dict["value"]
    top1CNDist["value"] = top1Dict["value"]
    jsonDict["Ori_Sample_Prediction_TOP1"] = [top1Dict]
    jsonDict["Ori_Sample_Prediction_TOP1_EN"] = [top1ENDist]
    jsonDict["Ori_Sample_Prediction_TOP1_CN"] = [top1CNDist]

    jsonDict["Adv_Sample"] = row[7]

    top1Dict, top1ENDist, top1CNDist = {}, {}, {}
    if row[5] == "1.0":
        jsonDict["Adv_Sample_PredictLabel"] = 1
        top1Dict["name"] = "1"
        # top1ENDist["name"] = "Similar"
        # top1ENDist["name"] = "Positive"
        top1ENDist["name"] = "Imply"
        # top1CNDist["name"] = "相似"
        # top1CNDist["name"] = "积极"
        top1CNDist["name"] = "蕴含"
    else:
        jsonDict["Adv_Sample_PredictLabel"] = 0
        top1Dict["name"] = "0"
        # top1ENDist["name"] = "Dissimilar"
        # top1ENDist["name"] = "Negative"
        top1ENDist["name"] = "Don'tImply"
        # top1CNDist["name"] = "不相似"
        # top1CNDist["name"] = "消极"
        top1CNDist["name"] = "不蕴含"
    if row[8] == "Failed":
        top1Dict["value"] = 1 - float(row[6])
    else:
        top1Dict["value"] = float(row[6])
    top1ENDist["value"] = top1Dict["value"]
    top1CNDist["value"] = top1Dict["value"]
    jsonDict["Adv_Sample_Prediction_TOP1"] = [top1Dict]
    jsonDict["Adv_Sample_Prediction_TOP1_EN"] = [top1ENDist]
    jsonDict["Adv_Sample_Prediction_TOP1_CN"] = [top1CNDist]

    jsonList.append(jsonDict)

with open(tempPath, 'w', encoding='utf-8') as json_file:
    json.dump(jsonList, json_file, ensure_ascii=False)
# =======================step1===========================


# =======================step2===========================
# clean_Ori_Sample = []
#
# with open(tempPath, 'r', encoding='utf8')as fp:
#     jsonData = json.load(fp)
#     for eachJsonDict in jsonData:
#         Ori_Sample.append(eachJsonDict["Ori_Sample"])
#         Adv_Sample.append(eachJsonDict["Adv_Sample"])
#     for each_Ori_Sample in Ori_Sample:
#         each_Ori_Sample = each_Ori_Sample.replace("[[", "")
#         each_Ori_Sample = each_Ori_Sample.replace("]]", "")
#         clean_Ori_Sample.append(each_Ori_Sample)
#     count = 0
#     for each_Ori_Sample in clean_Ori_Sample:
#         jsonData[count]["Ori_Sample"] = each_Ori_Sample
#         count += 1
#     count = 0
#     for each_Adv_Sample in Adv_Sample:
#         Red_mark_position = []
#         mark_position_start = []
#         mark_position_end = []
#         n = each_Adv_Sample.find("[[")
#         m = each_Adv_Sample.find("]]")
#         while n != -1:
#             mark_position_start.append(n)
#             mark_position_end.append(m)
#             n = each_Adv_Sample.find("[[", n + 1)
#             m = each_Adv_Sample.find("]]", m + 1)
#         each_Adv_Sample = each_Adv_Sample.replace("[[", "")
#         each_Adv_Sample = each_Adv_Sample.replace("]]", "")
#         for i in range(len(mark_position_start)):
#             mark_position_start[i] = mark_position_start[i] - i * 4
#             mark_position_end[i] = mark_position_end[i] - i * 4 - 2
#             for j in range(mark_position_start[i], mark_position_end[i]):
#                 Red_mark_position.append(j)
#         jsonData[count]["Red_mark_position"] = Red_mark_position
#         jsonData[count]["Adv_Sample"] = each_Adv_Sample
#         count += 1
#
# with open(temp1Path, 'w', encoding='utf-8') as json_file:
#     json.dump(jsonData, json_file, ensure_ascii=False)
# =======================step2===========================


# =======================step3===========================
# with open(temp1Path, 'r', encoding='utf8')as fp:
#     jsonData = json.load(fp)
#     charList = ["~", "`", "!", "@", "#", "$", "%", "^", "&", "*",
#                 "(", ")", "-", "_", "=", "+", "[", "{", "]", "}",
#                 ";", ":", ",", "<", ".", ">", "?", "|"]
#     oriLength = len(jsonData)
#     charLength = len(charList)
#     for j in range(1, charLength + 1):
#         for i in range(oriLength):
#             jsonData.append(jsonData[i].copy())
#             jsonData[-1]["Red_mark_position"] = jsonData[-1]["Red_mark_position"].copy()
#             Index = i + j * oriLength
#             sampleLength = len(jsonData[-1]["Adv_Sample"])
#             jsonData[-1]["Index"] = Index
#             jsonData[-1]["Adv_Sample"] = jsonData[-1]["Adv_Sample"] + charList[j - 1]
#             jsonData[-1]["Red_mark_position"].append(sampleLength)
#
#     startIndex = jsonData[-1]["Index"] - 3
#     for j in range(1, charLength + 1):
#         for i in range(oriLength):
#             jsonData.append(jsonData[i].copy())
#             jsonData[-1]["Red_mark_position"] = jsonData[-1]["Red_mark_position"].copy()
#             Index = i + j * oriLength + startIndex
#             jsonData[-1]["Index"] = Index
#             jsonData[-1]["Adv_Sample"] = charList[j - 1] + jsonData[-1]["Adv_Sample"]
#             if jsonData[-1]["Red_mark_position"]:
#                 jsonData[-1]["Red_mark_position"].append(-1)
#                 for i in range(len(jsonData[-1]["Red_mark_position"])):
#                     jsonData[-1]["Red_mark_position"][i] += 1
#
# with open(temp2Path, 'w', encoding='utf-8') as json_file:
#     json.dump(jsonData, json_file, ensure_ascii=False)
# =======================step3===========================


# =======================step4===========================
# y_true_ori = []
# y_pred_ori = []
# y_true_adv = []
# y_pred_adv = []
# with open(temp2Path, 'r', encoding='utf8')as fp:
#     jsonData = json.load(fp)
#     TPR = FNR = FPR = TNR = ACC = Prcesion = Recall = F1 = 0.0
#     for i in range(len(jsonData)):
#         if i == 0:
#             y_true_ori.append(jsonData[i]["Ori_Sample_TrueLabel"])
#             y_pred_ori.append(jsonData[i]["Ori_Sample_PredictLabel"])
#             y_true_adv.append(jsonData[i]["Ori_Sample_TrueLabel"])
#             y_pred_adv.append(jsonData[i]["Adv_Sample_PredictLabel"])
#             tempDict = {}
#             tempDict["TPR"] = TPR
#             tempDict["FNR"] = FNR
#             tempDict["FPR"] = FPR
#             tempDict["TNR"] = TNR
#             tempDict["ACC"] = ACC
#             tempDict["Prcesion"] = Prcesion
#             tempDict["Recall"] = Recall
#             tempDict["F1"] = F1
#             jsonData[i]["Ori_Eval"] = tempDict.copy()
#             continue
#         else:
#             y_true_ori.append(jsonData[i]["Ori_Sample_TrueLabel"])
#             y_pred_ori.append(jsonData[i]["Ori_Sample_PredictLabel"])
#             y_true_adv.append(jsonData[i]["Ori_Sample_TrueLabel"])
#             y_pred_adv.append(jsonData[i]["Adv_Sample_PredictLabel"])
#
#             FPR, FNR, TNR, TPR, ACC, Prcesion, Recall, F1 = cal_multi(y_true_ori, y_pred_ori)
#             tempDict["TPR"] = TPR
#             tempDict["FNR"] = FNR
#             tempDict["FPR"] = FPR
#             tempDict["TNR"] = TNR
#             tempDict["ACC"] = ACC
#             tempDict["Prcesion"] = Prcesion
#             tempDict["Recall"] = Recall
#             tempDict["F1"] = F1
#             jsonData[i]["Ori_Eval"] = tempDict.copy()
#
#             FPR, FNR, TNR, TPR, ACC, Prcesion, Recall, F1 = cal_multi(y_true_adv, y_pred_adv)
#             tempDict["TPR"] = TPR
#             tempDict["FNR"] = FNR
#             tempDict["FPR"] = FPR
#             tempDict["TNR"] = TNR
#             tempDict["ACC"] = ACC
#             tempDict["Prcesion"] = Prcesion
#             tempDict["Recall"] = Recall
#             tempDict["F1"] = F1
#             jsonData[i]["Adv_Eval"] = tempDict.copy()
#
#
# with open(temp3Path, 'w', encoding='utf-8') as json_file:
#     json.dump(jsonData, json_file, ensure_ascii=False)
# =======================step4===========================