OpenMedIA
/
MAKEN

 
			
							import os
import torch
import json
import argparse
from tqdm import tqdm
from lavis.tasks.clef_captioning import coco_caption_eval
from bleurt_pytorch import BleurtConfig, BleurtForSequenceClassification, BleurtTokenizer


for key in ['HF_HOME', 'TORCH_HOME', 'TRANSFORMERS_CACHE', 'HUGGINGFACE_HUB_CACHE']:
    os.environ[key] = '/userhome/.cache'


os.environ['HF_DATASETS_OFFLINE']='1'
os.environ['TRANSFORMERS_OFFLINE']='1'


parser = argparse.ArgumentParser()
parser.add_argument("--pred", type=str)
parser.add_argument("--gt", type=str, default='/userhome/MedicalDatasets/ImageCLEFmedical_2023/Task1-Caption/val_gt_rm_num.json')
# parser.add_argument("--gt", type=str, default='/userhome/MedicalDatasets/mimic_cxr/val_gt.json')
parser.add_argument("-bs", "--bert_score", action='store_true')
parser.add_argument("-nr", "--no_replace", action='store_true')
parser.add_argument('-ss', '--save_scores', action='store_true')
parser.add_argument('-cs', '--clip_score', action='store_true')
parser.add_argument('-bleurt', '--bleurt', action='store_true')
args = parser.parse_args()

coco_eval = coco_caption_eval(args.gt, args.pred, replace_numbers=not args.no_replace)
for k, v in coco_eval.eval.items():
    print(k, v)

if args.save_scores:
    detailed_scores = coco_eval.evalImgs
    with open('detailed_scores.json', 'w') as wf:
        json.dump(detailed_scores, wf)

if args.bert_score:
    with open('hyps.txt', 'w') as wf:
        wf.write('\n'.join(coco_eval.HYPS))
    
    with open('refs.txt', 'w') as wf:
        wf.write('\n'.join(coco_eval.GTS))
    
    cmd = 'bert-score -r refs.txt -c hyps.txt --model microsoft/deberta-xlarge-mnli'
    os.system(cmd)


if args.bleurt:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_name = 'BLEURT-20'
    batch_size = 16
    config = BleurtConfig.from_pretrained(model_name)
    model = BleurtForSequenceClassification.from_pretrained(model_name).to(device)
    tokenizer = BleurtTokenizer.from_pretrained(model_name)

    references = coco_eval.GTS
    candidates = coco_eval.HYPS

    model.eval()

    n_batches = len(references) // batch_size
    if n_batches * batch_size != len(references):
        n_batches += 1

    results = []
    with torch.no_grad():
        for i in tqdm(range(n_batches)):
            refs = references[i*batch_size:(i+1)*batch_size]
            hyps = candidates[i*batch_size:(i+1)*batch_size]
            inputs = tokenizer(refs, hyps, padding=True, return_tensors='pt', truncation=True, max_length=512)
            for k, v in inputs.items():
                inputs[k] = v.to(device)
            res = model(**inputs).logits.cpu().flatten().tolist()
            results.extend(res)

    print('BLEURT:', sum(results) / len(results))

if args.clip_score:
    val_data = json.load(open(os.path.join(os.path.dirname(args.gt), 'val_caption.json')))
    id2image = {}
    for line in val_data:
        id2image[line['image_id']] = line['image']
    out_data = []
    for k, v in coco_eval.RES.items():
        line = {'image': id2image[k], 'caption': v[0], 'image_id': k}
        out_data.append(line)
    print(out_data[0])
    with open('res.json', 'w') as wf:
        json.dump(out_data, wf)

    cmd = 'cd ../clipscore && python clipscore.py ../lavis/res.json /userhome/MedicalDatasets/ImageCLEFmedical_2023/Task1-Caption/'
    os.system(cmd)


'''
python eval_file.py --pred output/ViTg_chatglm_clef/backup_base_prompt_none_CD/result/val_epoch9.json -nr --bert_score --bleurt --clip_score
python eval_file.py --pred output/blip2_chatglm/364/init1_ftQformer_t4/result/val_epoch9.json -nr --bert_score --bleurt --clip_score

python eval_file.py --pred lavis/output/medical/eval/caption_2_ViTg_BERT_OPT2.7_e9/20230423073/result/val_epoch0.json
Bleu_1 0.17022799449448414
Bleu_2 0.09047780581103165
Bleu_3 0.0448445706598185
Bleu_4 0.023619988501716826
METEOR 0.06690504317163476
CIDEr 0.1171865418513422
R-1 0.22655176416923273
R-2 0.06699622400240016
R-L 0.20397622393130813
microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.26.1)_fast-tokenizer P: 0.573597 R: 0.547268 F1: 0.555139

python eval_file.py --pred lavis/output/medical/3_ft_ViTg_BERT_OPT2.7/20230424142/result/val_epoch3.json -bs
Bleu_1 0.12991497464970717
Bleu_2 0.07269334078241776
Bleu_3 0.03785799634475896
Bleu_4 0.020704353504403816
METEOR 0.06976497859511133
CIDEr 0.16143937576701228
R-1 0.2333936540372558
R-2 0.07177419668665155
R-L 0.20605666513544613
microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.26.1)_fast-tokenizer P: 0.642242 R: 0.568178 F1: 0.599041
'''