|
- import os
- import torch
- import json
- import argparse
- from tqdm import tqdm
- from lavis.tasks.clef_captioning import coco_caption_eval
- from bleurt_pytorch import BleurtConfig, BleurtForSequenceClassification, BleurtTokenizer
-
-
- for key in ['HF_HOME', 'TORCH_HOME', 'TRANSFORMERS_CACHE', 'HUGGINGFACE_HUB_CACHE']:
- os.environ[key] = '/userhome/.cache'
-
-
- os.environ['HF_DATASETS_OFFLINE']='1'
- os.environ['TRANSFORMERS_OFFLINE']='1'
-
-
- parser = argparse.ArgumentParser()
- parser.add_argument("--pred", type=str)
- parser.add_argument("--gt", type=str, default='/userhome/MedicalDatasets/ImageCLEFmedical_2023/Task1-Caption/val_gt_rm_num.json')
- # parser.add_argument("--gt", type=str, default='/userhome/MedicalDatasets/mimic_cxr/val_gt.json')
- parser.add_argument("-bs", "--bert_score", action='store_true')
- parser.add_argument("-nr", "--no_replace", action='store_true')
- parser.add_argument('-ss', '--save_scores', action='store_true')
- parser.add_argument('-cs', '--clip_score', action='store_true')
- parser.add_argument('-bleurt', '--bleurt', action='store_true')
- args = parser.parse_args()
-
- coco_eval = coco_caption_eval(args.gt, args.pred, replace_numbers=not args.no_replace)
- for k, v in coco_eval.eval.items():
- print(k, v)
-
- if args.save_scores:
- detailed_scores = coco_eval.evalImgs
- with open('detailed_scores.json', 'w') as wf:
- json.dump(detailed_scores, wf)
-
- if args.bert_score:
- with open('hyps.txt', 'w') as wf:
- wf.write('\n'.join(coco_eval.HYPS))
-
- with open('refs.txt', 'w') as wf:
- wf.write('\n'.join(coco_eval.GTS))
-
- cmd = 'bert-score -r refs.txt -c hyps.txt --model microsoft/deberta-xlarge-mnli'
- os.system(cmd)
-
-
- if args.bleurt:
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- model_name = 'BLEURT-20'
- batch_size = 16
- config = BleurtConfig.from_pretrained(model_name)
- model = BleurtForSequenceClassification.from_pretrained(model_name).to(device)
- tokenizer = BleurtTokenizer.from_pretrained(model_name)
-
- references = coco_eval.GTS
- candidates = coco_eval.HYPS
-
- model.eval()
-
- n_batches = len(references) // batch_size
- if n_batches * batch_size != len(references):
- n_batches += 1
-
- results = []
- with torch.no_grad():
- for i in tqdm(range(n_batches)):
- refs = references[i*batch_size:(i+1)*batch_size]
- hyps = candidates[i*batch_size:(i+1)*batch_size]
- inputs = tokenizer(refs, hyps, padding=True, return_tensors='pt', truncation=True, max_length=512)
- for k, v in inputs.items():
- inputs[k] = v.to(device)
- res = model(**inputs).logits.cpu().flatten().tolist()
- results.extend(res)
-
- print('BLEURT:', sum(results) / len(results))
-
- if args.clip_score:
- val_data = json.load(open(os.path.join(os.path.dirname(args.gt), 'val_caption.json')))
- id2image = {}
- for line in val_data:
- id2image[line['image_id']] = line['image']
- out_data = []
- for k, v in coco_eval.RES.items():
- line = {'image': id2image[k], 'caption': v[0], 'image_id': k}
- out_data.append(line)
- print(out_data[0])
- with open('res.json', 'w') as wf:
- json.dump(out_data, wf)
-
- cmd = 'cd ../clipscore && python clipscore.py ../lavis/res.json /userhome/MedicalDatasets/ImageCLEFmedical_2023/Task1-Caption/'
- os.system(cmd)
-
-
- '''
- python eval_file.py --pred output/ViTg_chatglm_clef/backup_base_prompt_none_CD/result/val_epoch9.json -nr --bert_score --bleurt --clip_score
- python eval_file.py --pred output/blip2_chatglm/364/init1_ftQformer_t4/result/val_epoch9.json -nr --bert_score --bleurt --clip_score
-
- python eval_file.py --pred lavis/output/medical/eval/caption_2_ViTg_BERT_OPT2.7_e9/20230423073/result/val_epoch0.json
- Bleu_1 0.17022799449448414
- Bleu_2 0.09047780581103165
- Bleu_3 0.0448445706598185
- Bleu_4 0.023619988501716826
- METEOR 0.06690504317163476
- CIDEr 0.1171865418513422
- R-1 0.22655176416923273
- R-2 0.06699622400240016
- R-L 0.20397622393130813
- microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.26.1)_fast-tokenizer P: 0.573597 R: 0.547268 F1: 0.555139
-
- python eval_file.py --pred lavis/output/medical/3_ft_ViTg_BERT_OPT2.7/20230424142/result/val_epoch3.json -bs
- Bleu_1 0.12991497464970717
- Bleu_2 0.07269334078241776
- Bleu_3 0.03785799634475896
- Bleu_4 0.020704353504403816
- METEOR 0.06976497859511133
- CIDEr 0.16143937576701228
- R-1 0.2333936540372558
- R-2 0.07177419668665155
- R-L 0.20605666513544613
- microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.12(hug_trans=4.26.1)_fast-tokenizer P: 0.642242 R: 0.568178 F1: 0.599041
- '''
|