|
- '''
- Automatic generation evaluation metrics wrapper
-
- The most useful function here is
-
- get_all_metrics(refs, cands)
- '''
- from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
- from pycocoevalcap.spice.spice import Spice
- from pycocoevalcap.meteor.meteor import Meteor
- from pycocoevalcap.bleu.bleu import Bleu
- from pycocoevalcap.cider.cider import Cider
- from pycocoevalcap.rouge.rouge import Rouge
- from pycocoevalcap.spice.spice import Spice
-
-
- def get_all_metrics(refs, cands, return_per_cap=False):
- metrics = []
- names = []
-
- pycoco_eval_cap_scorers = [(Bleu(4), 'bleu'),
- (Meteor(), 'meteor'),
- (Rouge(), 'rouge'),
- (Cider(), 'cider'),
- (Spice(), 'spice')]
-
- for scorer, name in pycoco_eval_cap_scorers:
- overall, per_cap = pycoco_eval(scorer, refs, cands)
- if return_per_cap:
- metrics.append(per_cap)
- else:
- metrics.append(overall)
- names.append(name)
-
- metrics = dict(zip(names, metrics))
- return metrics
-
-
- def tokenize(refs, cands, no_op=False):
- # no_op is a debug option to see how significantly not using the PTB tokenizer
- # affects things
- tokenizer = PTBTokenizer()
-
- if no_op:
- refs = {idx: [r for r in c_refs] for idx, c_refs in enumerate(refs)}
- cands = {idx: [c] for idx, c in enumerate(cands)}
-
- else:
- refs = {idx: [{'caption':r} for r in c_refs] for idx, c_refs in enumerate(refs)}
- cands = {idx: [{'caption':c}] for idx, c in enumerate(cands)}
-
- refs = tokenizer.tokenize(refs)
- cands = tokenizer.tokenize(cands)
-
- return refs, cands
-
-
- def pycoco_eval(scorer, refs, cands):
- '''
- scorer is assumed to have a compute_score function.
- refs is a list of lists of strings
- cands is a list of predictions
- '''
- refs, cands = tokenize(refs, cands)
- average_score, scores = scorer.compute_score(refs, cands)
- return average_score, scores
|