OpenI
/
TLXZoo
mirror of https://github.com/tensorlayer/TLXZoo.git

 
			
							import numpy as np
import sacrebleu
from rouge_score import rouge_scorer
from rouge_score import scoring
from tensorlayerx import logging


def bleu(targets, predictions):
    """Computes BLEU score.
    Args:
      targets: list of strings or list of list of strings if multiple references
        are present.
      predictions: list of strings
    Returns:
      bleu_score across all targets and predictions
    """
    if isinstance(targets[0], list):
        targets = [[x for x in target] for target in targets]
    else:
        # Need to wrap targets in another list for corpus_bleu.
        targets = [targets]

    bleu_score = sacrebleu.corpus_bleu(predictions, targets,
                                       smooth_method="exp",
                                       smooth_value=0.0,
                                       force=False,
                                       lowercase=False,
                                       tokenize="intl",
                                       use_effective_order=False)
    return {"bleu": bleu_score.score}


def rouge(targets, predictions, score_keys=None):
    """Computes rouge score.
    Args:
      targets: list of strings
      predictions: list of strings
      score_keys: list of strings with the keys to compute.
    Returns:
      dict with score_key: rouge score across all targets and predictions
    """

    if score_keys is None:
        score_keys = ["rouge1", "rouge2", "rougeLsum"]
    scorer = rouge_scorer.RougeScorer(score_keys)
    aggregator = scoring.BootstrapAggregator()

    def _prepare_summary(summary):
        # Make sure the summary is not bytes-type
        # Add newlines between sentences so that rougeLsum is computed correctly.
        summary = summary.replace(" . ", " .\n")
        return summary

    for prediction, target in zip(predictions, targets):
        target = _prepare_summary(target)
        prediction = _prepare_summary(prediction)
        aggregator.add_scores(scorer.score(target=target, prediction=prediction))
    result = aggregator.aggregate()
    for key in score_keys:
        logging.info(
            "%s = %.2f, 95%% confidence [%.2f, %.2f]",
            key,
            result[key].mid.fmeasure * 100,
            result[key].low.fmeasure * 100,
            result[key].high.fmeasure * 100,
        )
    return {key: result[key].mid.fmeasure * 100 for key in score_keys}