Smart_City_Model_Zoo
/
AnomalyDetection

 
			
							"""
All of the stuff with temporal-spatial matrices in here.
This is where most of the processing outside of detection is done.

"""


import numpy as np
import pandas as pd
import types

import utils
from reid.extractor import ReidExtractor
from utils import ResultsDict, VideoReader
import matplotlib.pyplot as plt
import cv2 as cv


def add_boxes(bboxes, ignore_matrix):
    """
    Creates tmp_score and tmp_detect arrays.

    bboxes: list of bounding boxes and scores [x1, y1, x2, y2, score]
    ignore_matrix: Boolean mask of region to ignore for boxes.
    """
    h, w = ignore_matrix.shape

    tmp_score = np.zeros((h, w))
    tmp_detect = np.zeros((h, w), dtype=bool)

    for x1, y1, x2, y2, score in bboxes:  # for each box
        x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))

        tmp_score[y1:y2, x1:x2] = np.maximum(score, tmp_score[y1:y2, x1:x2])  # add box
        tmp_detect[y1:y2, x1:x2] = True

    tmp_score = utils.mask(tmp_score, ignore_matrix)  # get rid of stuff in ignore regions
    tmp_detect &= ignore_matrix

    return tmp_score, tmp_detect


def get_anomalies_preprocessed(video_path, reid_model_path, frame_by_frame_results_path, static_results_path,
                               ignore_matrix_path=None, reid_model_name="resnet50", start_frame=1, frame_interval=20,
                               abnormal_duration_thresh=60, detect_thresh=5, undetect_thresh=8, score_thresh=0.3,
                               light_thresh=0.8, anomaly_score_thresh=0.7, similarity_thresh=0.95,
                               suspicious_time_thresh=18, verbose=False):
    """
    Performs the anomaly detection, assumes all the detection results, background modelling, etc is already done.

    video_path: path to raw video
    reid_model_path: path to re-ID model checkpoint
    frame_by_frame_results_path: path to object detection results on raw video
    static_results_path: path to object detection results on background images
    ignore_matrix_path: path to ignore region mask
    reid_model_name: backbone used for reid model
    start_frame: video frame to start from
    frame_interval: interval between frames to do calculations on
    abnormal_duration_thresh: duration (in seconds) to consider an object abnormal
    detect_thresh: duration (in frames) to consider an object for tracking
    undetect_thresh: duration (in frames) to stop considering an object for tracking
    score_thresh: detection score threshold for bounding boxes
    light_thresh: brightness threshold (not sure what it does)
    anomaly_score_thresh: threshold to consider an object an anomaly
    similarity_thresh: threshold for object re-ID
    suspicious_time_thresh: duration (in seconds) for an object to be considered suspicious
    verbose: verbose printing


    """

    # Read result data
    fbf_bbox_df = pd.read_csv(frame_by_frame_results_path)
    static_results_df = pd.read_csv(static_results_path)

    fbf_results_dict = ResultsDict.from_df(fbf_bbox_df)
    static_results_dict = ResultsDict.from_df(static_results_df)

    vid = VideoReader(video_path)

    return get_anomalies_sequential(vid, reid_model_path, fbf_results_dict, static_results_dict,
                                    ignore_matrix_path, reid_model_name, start_frame, frame_interval,
                                    abnormal_duration_thresh, detect_thresh, undetect_thresh, score_thresh,
                                    light_thresh, anomaly_score_thresh, similarity_thresh, suspicious_time_thresh,
                                    verbose)


def get_anomalies_sequential(video_reader, reid_model_path, fbf_results_dict, static_results_dict,
                             ignore_matrix_gen=None, reid_model_name="resnet50", start_frame=1, frame_interval=20,
                             abnormal_duration_thresh=60, detect_thresh=5, undetect_thresh=8, score_thresh=0.3,
                             light_thresh=0.8, anomaly_score_thresh=0.7, similarity_thresh=0.95,
                             suspicious_time_thresh=18, verbose=False, anomaly_nms_thresh=0.8):
    """
    Performs the anomaly detection. Sequential version

    video_reader: VideoReader object for raw video
    reid_model_path: path to re-ID model checkpoint
    fbf_results_dict: ResultsDict object for frame-by-frame/raw video detection results
    static_results_dict: ResultsDict object for static/background detection results
    ignore_matrix_gen: generator yielding ignore matrix, must have the same interval as frame_interval.
        Or single numpy array, or path to .npy file.
    reid_model_name: backbone used for reid model
    start_frame: video frame to start from
    frame_interval: interval between frames to do calculations on
    abnormal_duration_thresh: duration (in seconds) to consider an object abnormal
    detect_thresh: duration (in frames) to consider an object for tracking
    undetect_thresh: duration (in frames) to stop considering an object for tracking
    score_thresh: detection score threshold for bounding boxes
    light_thresh: brightness threshold (not sure what it does)
    anomaly_score_thresh: threshold to consider an object an anomaly
    similarity_thresh: threshold for object re-ID
    suspicious_time_thresh: duration (in seconds) for an object to be considered suspicious
    verbose: verbose printing
    anomaly_nms_thresh: IoU threshold for anomaly NMS.


    """


    def get_ignore_gen(ign_matrix):
        """
        Handles different inputs for ignore matrix

        :param ign_matrix:
        :return:
        """

        if isinstance(ign_matrix, types.GeneratorType):
            return ign_matrix

        # load/create matrix
        if ign_matrix is None:
            matrix = np.ones((h, w), dtype=bool)  # Dont ignore anything

        elif type(ign_matrix) == str:  # filename
            matrix = np.load(ign_matrix).astype(bool)

        else:
            raise TypeError("Invalid ignore matrix type:", type(ign_matrix))

        return (matrix for _ in iter(int, 1))  # infinite generator


    # Get video data
    num_frames, framerate, image_shape = video_reader.nframes, video_reader.framerate, video_reader.img_shape

    # load model
    reid_model = ReidExtractor(reid_model_name, reid_model_path)

    # Set up information matrices
    h, w, _ = image_shape

    ignore_matrix_gen = get_ignore_gen(ignore_matrix_gen)

    detect_count_matrix = np.zeros((h, w))
    undetect_count_matrix = np.zeros((h, w))
    start_time_matrix = np.zeros((h, w))
    end_time_matrix = np.zeros((h, w))
    score_matrix = np.zeros((h, w))
    state_matrix = np.zeros((h, w), dtype=bool)  # State matrix, 0/1 distinguishes suspicious candidate states

    if verbose:
        print(f"total frames: {num_frames}, framerate: {framerate}, height: {h}, width: {w}")
        print("-------------------------")

    ### Main loop
    start = False
    tmp_start = False
    all_results = []
    anomaly_now = {}
    for frame in range(start_frame, num_frames, frame_interval):
        try:
            ignore_matrix = next(ignore_matrix_gen)

            # if frame % (10*30) == 0:
            #     plt.imshow(ignore_matrix)
            #     plt.show()
        except StopIteration:
            pass  # keep same ignore matrix

        # Comment out if not using crop boxes, not needed
        # if fbf_results_dict.max_frame < static_results_dict.max_frame:
        #     fbf_results_dict.gen_next()

        # create tmp_score, tmp_detect
        static_results = static_results_dict[frame]
        if static_results is not None:
            boxes = static_results.loc[static_results["score"] > score_thresh,
                                       ["x1", "y1", "x2", "y2", "score"]].values
            classes = static_results["class"]
        else:
            boxes = []

        tmp_score, tmp_detect = add_boxes(boxes, ignore_matrix)

        # ## plotting
        img = video_reader.get_frame(frame)
        cmap = plt.get_cmap("viridis")
        #print(classes)
        for x1, y1, x2, y2, score in boxes:
            #print(boxes)
            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
            col = tuple(int(c * 255) for c in cmap(score)[:3])
            cv.rectangle(img, (x1, y1), (x2, y2), col, thickness=2)


        if frame % 500 == 0:
            #cv.imwrite(str(frame) + ".png", img)
            if verbose:
                print(f"frame: {frame}")
                if len(boxes) > 0:
                    print("\tboxes:", len(boxes))
            pass
            # plt.imshow(img)
            # plt.show()


        # if verbose:
        #     print(f"frame: {frame}")
        #
        #     if len(boxes) > 0:
        #         print("\tboxes:", len(boxes))

        score_matrix += tmp_score  # add running totals
        detect_count_matrix += tmp_detect

        # Update detection matrices
        undetect_count_matrix += ~ tmp_detect
        undetect_count_matrix[tmp_detect] = 0

        # Update time matrices
        start_time_matrix[detect_count_matrix == 1] = -600 if frame == 1 else frame  # why -600 for frame 1?
        end_time_matrix[detect_count_matrix > 0] = frame

        # Update state matrices
        state_matrix[detect_count_matrix > detect_thresh] = True

        # Detect anomaly
        time_delay = utils.mask(end_time_matrix - start_time_matrix, state_matrix)
        delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

        #         print(f"\tmax delay: {time_delay.max()}, start: {start_time_matrix[delay_max_idx]}, end: {end_time_matrix[delay_max_idx]}, state: {state_matrix[delay_max_idx]}")
        if not start and time_delay.max() / framerate > abnormal_duration_thresh:  # and score_matrix[delay_max_idx]/detect_count_matrix[delay_max_idx]>0.8:

            delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

            # backtrack the start time
            time_frame = int(start_time_matrix[delay_max_idx] / 5) * 5  # + 1  # why 5s and 1?

            G = np.where(detect_count_matrix < detect_count_matrix[delay_max_idx] - 2, 0,
                         1)  # What does G represent?, why -2?
            region = utils.search_region(G, delay_max_idx)

            # vehicle reid
            if 'start_time' in anomaly_now and (time_frame / framerate - anomaly_now['end_time']) < 30:  # why 30?
                f1_frame_num = max(1, anomaly_now['start_time'] * framerate)
                f2_frame_num = max(1, time_frame)

                similarity = reid_model.similarity(video_reader.get_frame(f1_frame_num),
                                                   video_reader.get_frame(f2_frame_num),
                                                   anomaly_now["region"], region)

                if similarity > similarity_thresh:
                    time_frame = int(anomaly_now['start_time'] * framerate / 5) * 5  # + 1  # why 5s and 1?
                else:
                    anomaly_now['region'] = region

            else:
                anomaly_now['region'] = region

            # IoU stuff
            max_iou = 1
            count = 1
            start_time = time_frame
            tmp_len = 1
            raio = 1
            while (max_iou > 0.1 or tmp_len < 40 or raio > 0.6) and time_frame > 1:  # why 0.1, 40, 0.6?
                raio = count / tmp_len

                print("time frame:", time_frame)
                fbf_results = fbf_results_dict[time_frame]
                if fbf_results is not None:
                    bboxes = fbf_results[["x1", "y1", "x2", "y2", "score"]].values
                    max_iou = utils.compute_iou(anomaly_now['region'], bboxes)

                else:
                    max_iou = 0

                time_frame -= 5  # why 5?
                if max_iou > 0.3:  # why 0.3?
                    count += 1
                    if max_iou > 0.5:  # why 0.5?  # they mention 0.5 IoU in the paper for NMS, might not be this 
                        start_time = time_frame

                tmp_len += 1

            # back track start_time, until brightness at that spot falls below a threshold
            for time_frame in range(start_time, 1, -5):
                #                 print(f"\ttimeframe: {time_frame}")
                tmp_im = video_reader.get_frame(time_frame)
                if utils.compute_brightness(tmp_im[region[1]:region[3], region[0]:region[2]]) <= light_thresh:
                    break

                start_time = time_frame

            anomaly_now['start_time'] = max(0, start_time / framerate)
            anomaly_now['end_time'] = max(0, end_time_matrix[delay_max_idx] / framerate)
            start = True

        elif not tmp_start and time_delay.max() > suspicious_time_thresh * framerate:
            time_frame = start_time_matrix[delay_max_idx]

            G = np.where(detect_count_matrix < detect_count_matrix[delay_max_idx] - 2, 0, 1)  # what does G represent?
            region = utils.search_region(G, delay_max_idx)

            # vehicle reid
            if 'start_time' in anomaly_now and (time_frame / framerate - anomaly_now['end_time']) < 30:  # why 30?
                f1_frame_num = max(1, anomaly_now['start_time'] * framerate)
                f2_frame_num = max(1, time_frame)

                similarity = reid_model.similarity(video_reader.get_frame(f1_frame_num),
                                                   video_reader.get_frame(f2_frame_num),
                                                   anomaly_now["region"], region)

                if similarity > similarity_thresh:
                    time_frame = int(anomaly_now['start_time'] * framerate / 5) * 5 + 1
                    region = anomaly_now['region']

            anomaly_now['region'] = region
            anomaly_now['start_time'] = max(0, time_frame / framerate)
            anomaly_now['end_time'] = max(0, end_time_matrix[delay_max_idx] / framerate)

            tmp_start = True

        if start and time_delay.max() / framerate > abnormal_duration_thresh:

            delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

            if undetect_count_matrix[delay_max_idx] > undetect_thresh:
                anomaly_score = score_matrix[delay_max_idx] / detect_count_matrix[delay_max_idx]

                print("\t", anomaly_now, anomaly_score)
                if anomaly_score > anomaly_score_thresh:
                    anomaly_now['end_time'] = end_time_matrix[delay_max_idx] / framerate
                    anomaly_now['score'] = anomaly_score

                    all_results.append(anomaly_now)
                    anomaly_now = {}

                start = False

        elif tmp_start and time_delay.max() > suspicious_time_thresh * framerate:
            if undetect_count_matrix[delay_max_idx] > undetect_thresh:

                anomaly_score = score_matrix[delay_max_idx] / detect_count_matrix[delay_max_idx]
                if anomaly_score > anomaly_score_thresh:
                    anomaly_now['end_time'] = end_time_matrix[delay_max_idx] / framerate
                    anomaly_now['score'] = anomaly_score

                tmp_start = False

        # undetect matrix change state_matrix
        state_matrix[undetect_count_matrix > undetect_thresh] = False
        undetect_count_matrix[undetect_count_matrix > undetect_thresh] = 0

        # update matrix
        tmp_detect |= state_matrix
        detect_count_matrix = utils.mask(detect_count_matrix, tmp_detect)
        score_matrix = utils.mask(score_matrix, tmp_detect)

    # Add all anomalies to the results list
    print("---", start, time_delay.max(), score_matrix[delay_max_idx], detect_count_matrix[delay_max_idx])
    if start and time_delay.max() > abnormal_duration_thresh * framerate:
        anomaly_score = score_matrix[delay_max_idx] / detect_count_matrix[delay_max_idx]
        if anomaly_score > anomaly_score_thresh:
            anomaly_now['end_time'] = end_time_matrix[delay_max_idx] / framerate
            anomaly_now['score'] = anomaly_score

            all_results.append(anomaly_now)
            anomaly_now = {}
            start = False

    # Apply Non-Maximal Supression to the results
    if all_results:
        nms_out = utils.anomaly_nms(all_results, anomaly_nms_thresh)

        #         final_result = {'start_time': 892, 'score': 0} # why 892?
        #         for nms_start_time, nms_end_time in nms_out[:, 5:7]:
        #             if nms_start_time < final_result["start_time"]:
        #                 final_result["start_time"] = max(0, int(nms_start_time - 1))
        #                 final_result["score"] = 1
        #                 final_result["end_time"] = nms_end_time

        final_results = pd.DataFrame(nms_out, columns=["x1", "y1", "x2", "y2", "score", "start_time", "end_time"])

        return final_results

    return None