wmkai
/
Integrate_compression

 
			
import argparse
import copy
import os.path as osp
import time
import sys
import os
current_directory = os.path.dirname(os.path.abspath(__file__))
root_path = os.path.abspath(os.path.dirname(current_directory) + os.path.sep + ".")
sys.path.append(root_path)
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import torch
from yolodet.utils.config import Config
from tools import file_utils
from yolodet.utils.Logger import Logging
from yolodet.utils.collect_env import collect_env
from yolodet.utils.newInstance_utils import build_from_dict
from yolodet.utils.registry import DETECTORS,DATASET
from yolodet.apis.train import set_random_seed,train_detector
from yolodet.models.utils.torch_utils import select_device


def parse_args():
    parser = argparse.ArgumentParser(description='YOLODet train detectors')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work_dir', help='the dir to save logs and models')
    parser.add_argument(
        '--resume_from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--validate',
        action='store_true',
        help='whether to evaluate the checkpoint during training')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--seed', type=int, default=None, help='random seed')
    parser.add_argument('--deterministic',action='store_true',help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument('--autoscale-lr',action='store_true',help='automatically scale lr with the number of gpus')
    parser.add_argument('--multi-scale',action='store_true',help='vary img-size +/- 50%%')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.device is not None:
        cfg.device = args.device
    else:
        cfg.device = None
    device = select_device(cfg.device)
    if args.autoscale_lr:
        # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
        cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8


    # create work_dir
    file_utils.mkdir_or_exist(osp.abspath(cfg.work_dir))
    # init the logger before other steps
    # timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    # log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp))
    logger = Logging.getLogger()

    # init the meta dict to record some important information such as
    # environment info and seed, which will be logged
    meta = dict()
    # log env info
    env_info_dict = collect_env()
    env_info = '\n'.join([('{}: {}'.format(k, v))
                          for k, v in env_info_dict.items()])
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line + env_info + '\n' +dash_line)
    meta['env_info'] = env_info
    meta['batch_size'] = cfg.data.batch_size
    meta['subdivisions'] = cfg.data.subdivisions
    meta['multi_scale'] = args.multi_scale
    # log some basic info
    logger.info('Config:\n{}'.format(cfg.text))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}, deterministic: {}'.format(args.seed, args.deterministic))
        set_random_seed(args.seed, deterministic=args.deterministic)
    cfg.seed = args.seed
    meta['seed'] = args.seed
    model = build_from_dict(cfg.model, DETECTORS)

    model = model.cuda(device)
    # model.device = device
    if device.type != 'cpu' and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    model.device = device

    datasets = [build_from_dict(cfg.data.train, DATASET)]
    if len(cfg.workflow) == 2:
        val_dataset = copy.deepcopy(cfg.data.val)
        val_dataset.pipeline = cfg.data.train.pipeline
        datasets.append(build_from_dict(val_dataset, DATASET))
    if cfg.checkpoint_config is not None:
        # save mmdet version, config file content and class names in
        # checkpoints as meta data
        cfg.checkpoint_config.meta = dict(
            config=cfg.text,
            CLASSES=datasets[0].CLASSES)
    # add an attribute for visualization convenience
    model.CLASSES = datasets[0].CLASSES
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    train_detector(model,datasets,cfg,validate=args.validate,timestamp=timestamp,meta=meta)


if __name__ == '__main__':
    main()


# This file was automatically generated by SWIG (http://www.swig.org).
# Version 3.0.12
#
# Do not make changes to this file unless you know what you are doing--modify
# the SWIG interface file instead.

from sys import version_info as _swig_python_version_info

if _swig_python_version_info >= (2, 7, 0):

    def swig_import_helper():
        import importlib

        pkg = __name__.rpartition(".")[0]
        mname = ".".join((pkg, "_snowboydetect")).lstrip(".")
        try:
            return importlib.import_module(mname)
        except ImportError:
            return importlib.import_module("_snowboydetect")

    _snowboydetect = swig_import_helper()
    del swig_import_helper
elif _swig_python_version_info >= (2, 6, 0):

    def swig_import_helper():
        from os.path import dirname
        import imp

        fp = None
        try:
            fp, pathname, description = imp.find_module(
                "_snowboydetect", [dirname(__file__)]
            )
        except ImportError:
            import _snowboydetect

            return _snowboydetect
        try:
            _mod = imp.load_module("_snowboydetect", fp, pathname, description)
        finally:
            if fp is not None:
                fp.close()
        return _mod

    _snowboydetect = swig_import_helper()
    del swig_import_helper
else:
    import _snowboydetect
del _swig_python_version_info

try:
    _swig_property = property
except NameError:
    pass  # Python < 2.2 doesn't have 'property'.

try:
    import builtins as __builtin__
except ImportError:
    import __builtin__


def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
    if name == "thisown":
        return self.this.own(value)
    if name == "this":
        if type(value).__name__ == "SwigPyObject":
            self.__dict__[name] = value
            return
    method = class_type.__swig_setmethods__.get(name, None)
    if method:
        return method(self, value)
    if not static:
        if _newclass:
            object.__setattr__(self, name, value)
        else:
            self.__dict__[name] = value
    else:
        raise AttributeError("You cannot add attributes to %s" % self)


def _swig_setattr(self, class_type, name, value):
    return _swig_setattr_nondynamic(self, class_type, name, value, 0)


def _swig_getattr(self, class_type, name):
    if name == "thisown":
        return self.this.own()
    method = class_type.__swig_getmethods__.get(name, None)
    if method:
        return method(self)
    raise AttributeError(
        "'%s' object has no attribute '%s'" % (class_type.__name__, name)
    )


def _swig_repr(self):
    try:
        strthis = "proxy of " + self.this.__repr__()
    except __builtin__.Exception:
        strthis = ""
    return "<%s.%s; %s >" % (
        self.__class__.__module__,
        self.__class__.__name__,
        strthis,
    )


try:
    _object = object
    _newclass = 1
except __builtin__.Exception:

    class _object:
        pass

    _newclass = 0


class SnowboyDetect(_object):
    __swig_setmethods__ = {}
    __setattr__ = lambda self, name, value: _swig_setattr(
        self, SnowboyDetect, name, value
    )
    __swig_getmethods__ = {}
    __getattr__ = lambda self, name: _swig_getattr(self, SnowboyDetect, name)
    __repr__ = _swig_repr

    def __init__(self, resource_filename, model_str):
        this = _snowboydetect.new_SnowboyDetect(resource_filename, model_str)
        try:
            self.this.append(this)
        except __builtin__.Exception:
            self.this = this

    def Reset(self):
        return _snowboydetect.SnowboyDetect_Reset(self)

    def RunDetection(self, *args):
        return _snowboydetect.SnowboyDetect_RunDetection(self, *args)

    def SetSensitivity(self, sensitivity_str):
        return _snowboydetect.SnowboyDetect_SetSensitivity(self, sensitivity_str)

    def SetHighSensitivity(self, high_sensitivity_str):
        return _snowboydetect.SnowboyDetect_SetHighSensitivity(
            self, high_sensitivity_str
        )

    def GetSensitivity(self):
        return _snowboydetect.SnowboyDetect_GetSensitivity(self)

    def SetAudioGain(self, audio_gain):
        return _snowboydetect.SnowboyDetect_SetAudioGain(self, audio_gain)

    def UpdateModel(self):
        return _snowboydetect.SnowboyDetect_UpdateModel(self)

    def NumHotwords(self):
        return _snowboydetect.SnowboyDetect_NumHotwords(self)

    def ApplyFrontend(self, apply_frontend):
        return _snowboydetect.SnowboyDetect_ApplyFrontend(self, apply_frontend)

    def SampleRate(self):
        return _snowboydetect.SnowboyDetect_SampleRate(self)

    def NumChannels(self):
        return _snowboydetect.SnowboyDetect_NumChannels(self)

    def BitsPerSample(self):
        return _snowboydetect.SnowboyDetect_BitsPerSample(self)

    __swig_destroy__ = _snowboydetect.delete_SnowboyDetect
    __del__ = lambda self: None


SnowboyDetect_swigregister = _snowboydetect.SnowboyDetect_swigregister
SnowboyDetect_swigregister(SnowboyDetect)


class SnowboyVad(_object):
    __swig_setmethods__ = {}
    __setattr__ = lambda self, name, value: _swig_setattr(self, SnowboyVad, name, value)
    __swig_getmethods__ = {}
    __getattr__ = lambda self, name: _swig_getattr(self, SnowboyVad, name)
    __repr__ = _swig_repr

    def __init__(self, resource_filename):
        this = _snowboydetect.new_SnowboyVad(resource_filename)
        try:
            self.this.append(this)
        except __builtin__.Exception:
            self.this = this

    def Reset(self):
        return _snowboydetect.SnowboyVad_Reset(self)

    def RunVad(self, *args):
        return _snowboydetect.SnowboyVad_RunVad(self, *args)

    def SetAudioGain(self, audio_gain):
        return _snowboydetect.SnowboyVad_SetAudioGain(self, audio_gain)

    def ApplyFrontend(self, apply_frontend):
        return _snowboydetect.SnowboyVad_ApplyFrontend(self, apply_frontend)

    def SampleRate(self):
        return _snowboydetect.SnowboyVad_SampleRate(self)

    def NumChannels(self):
        return _snowboydetect.SnowboyVad_NumChannels(self)

    def BitsPerSample(self):
        return _snowboydetect.SnowboyVad_BitsPerSample(self)

    __swig_destroy__ = _snowboydetect.delete_SnowboyVad
    __del__ = lambda self: None


SnowboyVad_swigregister = _snowboydetect.SnowboyVad_swigregister
SnowboyVad_swigregister(SnowboyVad)

# This file is compatible with both classic and new-style classes.


#!/usr/bin/env python

import collections
import pyaudio
from . import snowboydetect
from robot import utils, logging
import time
import wave
import os
from ctypes import CFUNCTYPE, c_char_p, c_int, cdll
from contextlib import contextmanager
from robot import constants


logger = logging.getLogger("snowboy")
TOP_DIR = os.path.dirname(os.path.abspath(__file__))

RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")


def py_error_handler(filename, line, function, err, fmt):
    pass


ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)

c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)


@contextmanager
def no_alsa_error():
    try:
        asound = cdll.LoadLibrary("libasound.so")
        asound.snd_lib_error_set_handler(c_error_handler)
        yield
        asound.snd_lib_error_set_handler(None)
    except:
        yield
        pass


class RingBuffer(object):
    """Ring buffer to hold audio from PortAudio"""

    def __init__(self, size=4096):
        self._buf = collections.deque(maxlen=size)

    def extend(self, data):
        """Adds data to the end of buffer"""
        self._buf.extend(data)

    def get(self):
        """Retrieves data from the beginning of buffer and clears it"""
        tmp = bytes(bytearray(self._buf))
        self._buf.clear()
        return tmp


def play_audio_file(fname=DETECT_DING):
    """Simple callback function to play a wave file. By default it plays
    a Ding sound.

    :param str fname: wave file name
    :return: None
    """
    ding_wav = wave.open(fname, "rb")
    ding_data = ding_wav.readframes(ding_wav.getnframes())
    with no_alsa_error():
        audio = pyaudio.PyAudio()
    stream_out = audio.open(
        format=audio.get_format_from_width(ding_wav.getsampwidth()),
        channels=ding_wav.getnchannels(),
        rate=ding_wav.getframerate(),
        input=False,
        output=True,
    )
    stream_out.start_stream()
    stream_out.write(ding_data)
    time.sleep(0.2)
    stream_out.stop_stream()
    stream_out.close()
    audio.terminate()


class ActiveListener(object):
    """ Active Listening with VAD """

    def __init__(self, decoder_model, resource=RESOURCE_FILE):
        logger.debug("activeListen __init__()")
        self.recordedData = []
        model_str = ",".join(decoder_model)
        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode()
        )
        self.ring_buffer = RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5
        )

    def listen(
        self,
        interrupt_check=lambda: False,
        sleep_time=0.03,
        silent_count_threshold=15,
        recording_timeout=100,
    ):
        """
        :param interrupt_check: a function that returns True if the main loop
                                needs to stop.
        :param silent_count_threshold: indicates how long silence must be heard
                                       to mark the end of a phrase that is
                                       being recorded.
        :param float sleep_time: how much time in second every loop waits.
        :param recording_timeout: limits the maximum length of a recording.
        :return: recorded file path
        """
        logger.debug("activeListen listen()")

        self._running = True

        def audio_callback(in_data, frame_count, time_info, status):
            self.ring_buffer.extend(in_data)
            play_data = chr(0) * len(in_data)
            return play_data, pyaudio.paContinue

        with no_alsa_error():
            self.audio = pyaudio.PyAudio()

        logger.debug("opening audio stream")

        try:
            self.stream_in = self.audio.open(
                input=True,
                output=False,
                format=self.audio.get_format_from_width(
                    self.detector.BitsPerSample() / 8
                ),
                channels=self.detector.NumChannels(),
                rate=self.detector.SampleRate(),
                frames_per_buffer=2048,
                stream_callback=audio_callback,
            )
        except Exception as e:
            logger.critical(e)
            return

        logger.debug("audio stream opened")

        if interrupt_check():
            logger.debug("detect voice return")
            return

        silentCount = 0
        recordingCount = 0

        logger.debug("begin activeListen loop")

        while self._running is True:

            if interrupt_check():
                logger.debug("detect voice break")
                break
            data = self.ring_buffer.get()
            if len(data) == 0:
                time.sleep(sleep_time)
                continue

            status = self.detector.RunDetection(data)
            if status == -1:
                logger.warning("Error initializing streams or reading audio data")

            stopRecording = False
            if recordingCount > recording_timeout:
                stopRecording = True
            elif status == -2:  # silence found
                if silentCount > silent_count_threshold:
                    stopRecording = True
                else:
                    silentCount = silentCount + 1
            elif status == 0:  # voice found
                silentCount = 0

            if stopRecording == True:
                return self.saveMessage()

            recordingCount = recordingCount + 1
            self.recordedData.append(data)

        logger.debug("finished.")

    def saveMessage(self):
        """
        Save the message stored in self.recordedData to a timestamped file.
        """
        filename = os.path.join(
            constants.TEMP_PATH, "output" + str(int(time.time())) + ".wav"
        )
        data = b"".join(self.recordedData)

        # use wave to save data
        wf = wave.open(filename, "wb")
        wf.setnchannels(self.detector.NumChannels())
        wf.setsampwidth(
            self.audio.get_sample_size(
                self.audio.get_format_from_width(self.detector.BitsPerSample() / 8)
            )
        )
        wf.setframerate(self.detector.SampleRate())
        wf.writeframes(data)
        wf.close()
        logger.debug("finished saving: " + filename)

        self.stream_in.stop_stream()
        self.stream_in.close()
        self.audio.terminate()

        return filename


class HotwordDetector(object):
    """
    Snowboy decoder to detect whether a keyword specified by `decoder_model`
    exists in a microphone input stream.

    :param decoder_model: decoder model file path, a string or a list of strings
    :param resource: resource file path.
    :param sensitivity: decoder sensitivity, a float of a list of floats.
                              The bigger the value, the more senstive the
                              decoder. If an empty list is provided, then the
                              default sensitivity in the model will be used.
    :param audio_gain: multiply input volume by this factor.
    :param apply_frontend: applies the frontend processing algorithm if True.
    """

    def __init__(
        self,
        decoder_model,
        resource=RESOURCE_FILE,
        sensitivity=[],
        audio_gain=1,
        apply_frontend=False,
    ):

        self._running = False

        tm = type(decoder_model)
        ts = type(sensitivity)
        if tm is not list:
            decoder_model = [decoder_model]
        if ts is not list:
            sensitivity = [sensitivity]
        model_str = ",".join(decoder_model)

        self.detector = snowboydetect.SnowboyDetect(
            resource_filename=resource.encode(), model_str=model_str.encode()
        )
        self.detector.SetAudioGain(audio_gain)
        self.detector.ApplyFrontend(apply_frontend)
        self.num_hotwords = self.detector.NumHotwords()

        if len(decoder_model) > 1 and len(sensitivity) == 1:
            sensitivity = sensitivity * self.num_hotwords
        if len(sensitivity) != 0:
            assert self.num_hotwords == len(sensitivity), (
                "number of hotwords in decoder_model (%d) and sensitivity "
                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
            )
        sensitivity_str = ",".join([str(t) for t in sensitivity])
        if len(sensitivity) != 0:
            self.detector.SetSensitivity(sensitivity_str.encode())

        self.ring_buffer = RingBuffer(
            self.detector.NumChannels() * self.detector.SampleRate() * 5
        )

    def start(
        self,
        detected_callback=play_audio_file,
        interrupt_check=lambda: False,
        sleep_time=0.03,
        audio_recorder_callback=None,
        silent_count_threshold=15,
        recording_timeout=100,
    ):
        """
        Start the voice detector. For every `sleep_time` second it checks the
        audio buffer for triggering keywords. If detected, then call
        corresponding function in `detected_callback`, which can be a single
        function (single model) or a list of callback functions (multiple
        models). Every loop it also calls `interrupt_check` -- if it returns
        True, then breaks from the loop and return.

        :param detected_callback: a function or list of functions. The number of
                                  items must match the number of models in
                                  `decoder_model`.
        :param interrupt_check: a function that returns True if the main loop
                                needs to stop.
        :param float sleep_time: how much time in second every loop waits.
        :param audio_recorder_callback: if specified, this will be called after
                                        a keyword has been spoken and after the
                                        phrase immediately after the keyword has
                                        been recorded. The function will be
                                        passed the name of the file where the
                                        phrase was recorded.
        :param silent_count_threshold: indicates how long silence must be heard
                                       to mark the end of a phrase that is
                                       being recorded.
        :param recording_timeout: limits the maximum length of a recording.
        :return: None
        """
        self._running = True

        def audio_callback(in_data, frame_count, time_info, status):
            if utils.isRecordable():
                self.ring_buffer.extend(in_data)
                play_data = chr(0) * len(in_data)
            else:
                play_data = chr(0)
            return play_data, pyaudio.paContinue

        with no_alsa_error():
            self.audio = pyaudio.PyAudio()
        self.stream_in = self.audio.open(
            input=True,
            output=False,
            format=self.audio.get_format_from_width(self.detector.BitsPerSample() / 8),
            channels=self.detector.NumChannels(),
            rate=self.detector.SampleRate(),
            frames_per_buffer=2048,
            stream_callback=audio_callback,
        )

        if interrupt_check():
            logger.debug("detect voice return")
            return

        tc = type(detected_callback)
        if tc is not list:
            detected_callback = [detected_callback]
        if len(detected_callback) == 1 and self.num_hotwords > 1:
            detected_callback *= self.num_hotwords

        assert self.num_hotwords == len(detected_callback), (
            "Error: hotwords in your models (%d) do not match the number of "
            "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
        )

        logger.debug("detecting...")

        state = "PASSIVE"
        while self._running is True:
            if interrupt_check():
                logger.debug("detect voice break")
                break
            data = self.ring_buffer.get()
            if len(data) == 0:
                time.sleep(sleep_time)
                continue

            status = self.detector.RunDetection(data)
            if status == -1:
                logger.warning("Error initializing streams or reading audio data")

            # small state machine to handle recording of phrase after keyword
            if state == "PASSIVE":
                if status > 0:  # key word found

                    self.recordedData = []
                    self.recordedData.append(data)
                    silentCount = 0
                    recordingCount = 0
                    message = "Keyword " + str(status) + " detected at time: "
                    message += time.strftime(
                        "%Y-%m-%d %H:%M:%S", time.localtime(time.time())
                    )
                    logger.info(message)
                    callback = detected_callback[status - 1]
                    if callback is not None:
                        callback()

                    if (
                        audio_recorder_callback is not None
                        and status == 1
                        and utils.is_proper_time()
                    ):
                        state = "ACTIVE"
                    continue

            elif state == "ACTIVE":
                stopRecording = False
                if recordingCount > recording_timeout:
                    stopRecording = True
                elif status == -2:  # silence found
                    if silentCount > silent_count_threshold:
                        stopRecording = True
                    else:
                        silentCount = silentCount + 1
                elif status == 0:  # voice found
                    silentCount = 0

                if stopRecording == True:
                    fname = self.saveMessage()
                    audio_recorder_callback(fname)
                    state = "PASSIVE"
                    continue

                recordingCount = recordingCount + 1
                self.recordedData.append(data)

        logger.debug("finished.")

    def saveMessage(self):
        """
        Save the message stored in self.recordedData to a timestamped file.
        """
        filename = os.path.join(
            constants.TEMP_PATH, "output" + str(int(time.time())) + ".wav"
        )
        data = b"".join(self.recordedData)

        # use wave to save data
        wf = wave.open(filename, "wb")
        wf.setnchannels(self.detector.NumChannels())
        wf.setsampwidth(
            self.audio.get_sample_size(
                self.audio.get_format_from_width(self.detector.BitsPerSample() / 8)
            )
        )
        wf.setframerate(self.detector.SampleRate())
        wf.writeframes(data)
        wf.close()
        logger.debug("finished saving: " + filename)
        return filename

    def terminate(self):
        """
        Terminate audio stream. Users can call start() again to detect.
        :return: None
        """
        if self._running:
            self.stream_in.stop_stream()
            self.stream_in.close()
            self.audio.terminate()
            self._running = False