|
-
-
- import argparse
- import copy
- import os.path as osp
- import time
- import sys
- import os
- current_directory = os.path.dirname(os.path.abspath(__file__))
- root_path = os.path.abspath(os.path.dirname(current_directory) + os.path.sep + ".")
- sys.path.append(root_path)
- # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
- import torch
- from yolodet.utils.config import Config
- from tools import file_utils
- from yolodet.utils.Logger import Logging
- from yolodet.utils.collect_env import collect_env
- from yolodet.utils.newInstance_utils import build_from_dict
- from yolodet.utils.registry import DETECTORS,DATASET
- from yolodet.apis.train import set_random_seed,train_detector
- from yolodet.models.utils.torch_utils import select_device
-
-
-
- def parse_args():
- parser = argparse.ArgumentParser(description='YOLODet train detectors')
- parser.add_argument('config', help='train config file path')
- parser.add_argument('--work_dir', help='the dir to save logs and models')
- parser.add_argument(
- '--resume_from', help='the checkpoint file to resume from')
- parser.add_argument(
- '--validate',
- action='store_true',
- help='whether to evaluate the checkpoint during training')
- parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
- parser.add_argument('--seed', type=int, default=None, help='random seed')
- parser.add_argument('--deterministic',action='store_true',help='whether to set deterministic options for CUDNN backend.')
- parser.add_argument('--autoscale-lr',action='store_true',help='automatically scale lr with the number of gpus')
- parser.add_argument('--multi-scale',action='store_true',help='vary img-size +/- 50%%')
- args = parser.parse_args()
- return args
-
-
- def main():
- args = parse_args()
-
- cfg = Config.fromfile(args.config)
- # set cudnn_benchmark
- if cfg.get('cudnn_benchmark', False):
- torch.backends.cudnn.benchmark = True
- # update configs according to CLI args
- if args.work_dir is not None:
- cfg.work_dir = args.work_dir
- if args.resume_from is not None:
- cfg.resume_from = args.resume_from
- if args.device is not None:
- cfg.device = args.device
- else:
- cfg.device = None
- device = select_device(cfg.device)
- if args.autoscale_lr:
- # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
- cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
-
-
- # create work_dir
- file_utils.mkdir_or_exist(osp.abspath(cfg.work_dir))
- # init the logger before other steps
- # timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
- # log_file = osp.join(cfg.work_dir, '{}.log'.format(timestamp))
- logger = Logging.getLogger()
-
- # init the meta dict to record some important information such as
- # environment info and seed, which will be logged
- meta = dict()
- # log env info
- env_info_dict = collect_env()
- env_info = '\n'.join([('{}: {}'.format(k, v))
- for k, v in env_info_dict.items()])
- dash_line = '-' * 60 + '\n'
- logger.info('Environment info:\n' + dash_line + env_info + '\n' +dash_line)
- meta['env_info'] = env_info
- meta['batch_size'] = cfg.data.batch_size
- meta['subdivisions'] = cfg.data.subdivisions
- meta['multi_scale'] = args.multi_scale
- # log some basic info
- logger.info('Config:\n{}'.format(cfg.text))
-
- # set random seeds
- if args.seed is not None:
- logger.info('Set random seed to {}, deterministic: {}'.format(args.seed, args.deterministic))
- set_random_seed(args.seed, deterministic=args.deterministic)
- cfg.seed = args.seed
- meta['seed'] = args.seed
- model = build_from_dict(cfg.model, DETECTORS)
-
- model = model.cuda(device)
- # model.device = device
- if device.type != 'cpu' and torch.cuda.device_count() > 1:
- model = torch.nn.DataParallel(model)
-
- model.device = device
-
- datasets = [build_from_dict(cfg.data.train, DATASET)]
- if len(cfg.workflow) == 2:
- val_dataset = copy.deepcopy(cfg.data.val)
- val_dataset.pipeline = cfg.data.train.pipeline
- datasets.append(build_from_dict(val_dataset, DATASET))
- if cfg.checkpoint_config is not None:
- # save mmdet version, config file content and class names in
- # checkpoints as meta data
- cfg.checkpoint_config.meta = dict(
- config=cfg.text,
- CLASSES=datasets[0].CLASSES)
- # add an attribute for visualization convenience
- model.CLASSES = datasets[0].CLASSES
- timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
- train_detector(model,datasets,cfg,validate=args.validate,timestamp=timestamp,meta=meta)
-
-
- if __name__ == '__main__':
- main()
-
-
-
- # This file was automatically generated by SWIG (http://www.swig.org).
- # Version 3.0.12
- #
- # Do not make changes to this file unless you know what you are doing--modify
- # the SWIG interface file instead.
-
- from sys import version_info as _swig_python_version_info
-
- if _swig_python_version_info >= (2, 7, 0):
-
- def swig_import_helper():
- import importlib
-
- pkg = __name__.rpartition(".")[0]
- mname = ".".join((pkg, "_snowboydetect")).lstrip(".")
- try:
- return importlib.import_module(mname)
- except ImportError:
- return importlib.import_module("_snowboydetect")
-
- _snowboydetect = swig_import_helper()
- del swig_import_helper
- elif _swig_python_version_info >= (2, 6, 0):
-
- def swig_import_helper():
- from os.path import dirname
- import imp
-
- fp = None
- try:
- fp, pathname, description = imp.find_module(
- "_snowboydetect", [dirname(__file__)]
- )
- except ImportError:
- import _snowboydetect
-
- return _snowboydetect
- try:
- _mod = imp.load_module("_snowboydetect", fp, pathname, description)
- finally:
- if fp is not None:
- fp.close()
- return _mod
-
- _snowboydetect = swig_import_helper()
- del swig_import_helper
- else:
- import _snowboydetect
- del _swig_python_version_info
-
- try:
- _swig_property = property
- except NameError:
- pass # Python < 2.2 doesn't have 'property'.
-
- try:
- import builtins as __builtin__
- except ImportError:
- import __builtin__
-
-
- def _swig_setattr_nondynamic(self, class_type, name, value, static=1):
- if name == "thisown":
- return self.this.own(value)
- if name == "this":
- if type(value).__name__ == "SwigPyObject":
- self.__dict__[name] = value
- return
- method = class_type.__swig_setmethods__.get(name, None)
- if method:
- return method(self, value)
- if not static:
- if _newclass:
- object.__setattr__(self, name, value)
- else:
- self.__dict__[name] = value
- else:
- raise AttributeError("You cannot add attributes to %s" % self)
-
-
- def _swig_setattr(self, class_type, name, value):
- return _swig_setattr_nondynamic(self, class_type, name, value, 0)
-
-
- def _swig_getattr(self, class_type, name):
- if name == "thisown":
- return self.this.own()
- method = class_type.__swig_getmethods__.get(name, None)
- if method:
- return method(self)
- raise AttributeError(
- "'%s' object has no attribute '%s'" % (class_type.__name__, name)
- )
-
-
- def _swig_repr(self):
- try:
- strthis = "proxy of " + self.this.__repr__()
- except __builtin__.Exception:
- strthis = ""
- return "<%s.%s; %s >" % (
- self.__class__.__module__,
- self.__class__.__name__,
- strthis,
- )
-
-
- try:
- _object = object
- _newclass = 1
- except __builtin__.Exception:
-
- class _object:
- pass
-
- _newclass = 0
-
-
- class SnowboyDetect(_object):
- __swig_setmethods__ = {}
- __setattr__ = lambda self, name, value: _swig_setattr(
- self, SnowboyDetect, name, value
- )
- __swig_getmethods__ = {}
- __getattr__ = lambda self, name: _swig_getattr(self, SnowboyDetect, name)
- __repr__ = _swig_repr
-
- def __init__(self, resource_filename, model_str):
- this = _snowboydetect.new_SnowboyDetect(resource_filename, model_str)
- try:
- self.this.append(this)
- except __builtin__.Exception:
- self.this = this
-
- def Reset(self):
- return _snowboydetect.SnowboyDetect_Reset(self)
-
- def RunDetection(self, *args):
- return _snowboydetect.SnowboyDetect_RunDetection(self, *args)
-
- def SetSensitivity(self, sensitivity_str):
- return _snowboydetect.SnowboyDetect_SetSensitivity(self, sensitivity_str)
-
- def SetHighSensitivity(self, high_sensitivity_str):
- return _snowboydetect.SnowboyDetect_SetHighSensitivity(
- self, high_sensitivity_str
- )
-
- def GetSensitivity(self):
- return _snowboydetect.SnowboyDetect_GetSensitivity(self)
-
- def SetAudioGain(self, audio_gain):
- return _snowboydetect.SnowboyDetect_SetAudioGain(self, audio_gain)
-
- def UpdateModel(self):
- return _snowboydetect.SnowboyDetect_UpdateModel(self)
-
- def NumHotwords(self):
- return _snowboydetect.SnowboyDetect_NumHotwords(self)
-
- def ApplyFrontend(self, apply_frontend):
- return _snowboydetect.SnowboyDetect_ApplyFrontend(self, apply_frontend)
-
- def SampleRate(self):
- return _snowboydetect.SnowboyDetect_SampleRate(self)
-
- def NumChannels(self):
- return _snowboydetect.SnowboyDetect_NumChannels(self)
-
- def BitsPerSample(self):
- return _snowboydetect.SnowboyDetect_BitsPerSample(self)
-
- __swig_destroy__ = _snowboydetect.delete_SnowboyDetect
- __del__ = lambda self: None
-
-
- SnowboyDetect_swigregister = _snowboydetect.SnowboyDetect_swigregister
- SnowboyDetect_swigregister(SnowboyDetect)
-
-
- class SnowboyVad(_object):
- __swig_setmethods__ = {}
- __setattr__ = lambda self, name, value: _swig_setattr(self, SnowboyVad, name, value)
- __swig_getmethods__ = {}
- __getattr__ = lambda self, name: _swig_getattr(self, SnowboyVad, name)
- __repr__ = _swig_repr
-
- def __init__(self, resource_filename):
- this = _snowboydetect.new_SnowboyVad(resource_filename)
- try:
- self.this.append(this)
- except __builtin__.Exception:
- self.this = this
-
- def Reset(self):
- return _snowboydetect.SnowboyVad_Reset(self)
-
- def RunVad(self, *args):
- return _snowboydetect.SnowboyVad_RunVad(self, *args)
-
- def SetAudioGain(self, audio_gain):
- return _snowboydetect.SnowboyVad_SetAudioGain(self, audio_gain)
-
- def ApplyFrontend(self, apply_frontend):
- return _snowboydetect.SnowboyVad_ApplyFrontend(self, apply_frontend)
-
- def SampleRate(self):
- return _snowboydetect.SnowboyVad_SampleRate(self)
-
- def NumChannels(self):
- return _snowboydetect.SnowboyVad_NumChannels(self)
-
- def BitsPerSample(self):
- return _snowboydetect.SnowboyVad_BitsPerSample(self)
-
- __swig_destroy__ = _snowboydetect.delete_SnowboyVad
- __del__ = lambda self: None
-
-
- SnowboyVad_swigregister = _snowboydetect.SnowboyVad_swigregister
- SnowboyVad_swigregister(SnowboyVad)
-
- # This file is compatible with both classic and new-style classes.
-
-
- #!/usr/bin/env python
-
- import collections
- import pyaudio
- from . import snowboydetect
- from robot import utils, logging
- import time
- import wave
- import os
- from ctypes import CFUNCTYPE, c_char_p, c_int, cdll
- from contextlib import contextmanager
- from robot import constants
-
-
- logger = logging.getLogger("snowboy")
- TOP_DIR = os.path.dirname(os.path.abspath(__file__))
-
- RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
- DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
- DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
-
-
- def py_error_handler(filename, line, function, err, fmt):
- pass
-
-
- ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
-
- c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
-
-
- @contextmanager
- def no_alsa_error():
- try:
- asound = cdll.LoadLibrary("libasound.so")
- asound.snd_lib_error_set_handler(c_error_handler)
- yield
- asound.snd_lib_error_set_handler(None)
- except:
- yield
- pass
-
-
- class RingBuffer(object):
- """Ring buffer to hold audio from PortAudio"""
-
- def __init__(self, size=4096):
- self._buf = collections.deque(maxlen=size)
-
- def extend(self, data):
- """Adds data to the end of buffer"""
- self._buf.extend(data)
-
- def get(self):
- """Retrieves data from the beginning of buffer and clears it"""
- tmp = bytes(bytearray(self._buf))
- self._buf.clear()
- return tmp
-
-
- def play_audio_file(fname=DETECT_DING):
- """Simple callback function to play a wave file. By default it plays
- a Ding sound.
-
- :param str fname: wave file name
- :return: None
- """
- ding_wav = wave.open(fname, "rb")
- ding_data = ding_wav.readframes(ding_wav.getnframes())
- with no_alsa_error():
- audio = pyaudio.PyAudio()
- stream_out = audio.open(
- format=audio.get_format_from_width(ding_wav.getsampwidth()),
- channels=ding_wav.getnchannels(),
- rate=ding_wav.getframerate(),
- input=False,
- output=True,
- )
- stream_out.start_stream()
- stream_out.write(ding_data)
- time.sleep(0.2)
- stream_out.stop_stream()
- stream_out.close()
- audio.terminate()
-
-
- class ActiveListener(object):
- """ Active Listening with VAD """
-
- def __init__(self, decoder_model, resource=RESOURCE_FILE):
- logger.debug("activeListen __init__()")
- self.recordedData = []
- model_str = ",".join(decoder_model)
- self.detector = snowboydetect.SnowboyDetect(
- resource_filename=resource.encode(), model_str=model_str.encode()
- )
- self.ring_buffer = RingBuffer(
- self.detector.NumChannels() * self.detector.SampleRate() * 5
- )
-
- def listen(
- self,
- interrupt_check=lambda: False,
- sleep_time=0.03,
- silent_count_threshold=15,
- recording_timeout=100,
- ):
- """
- :param interrupt_check: a function that returns True if the main loop
- needs to stop.
- :param silent_count_threshold: indicates how long silence must be heard
- to mark the end of a phrase that is
- being recorded.
- :param float sleep_time: how much time in second every loop waits.
- :param recording_timeout: limits the maximum length of a recording.
- :return: recorded file path
- """
- logger.debug("activeListen listen()")
-
- self._running = True
-
- def audio_callback(in_data, frame_count, time_info, status):
- self.ring_buffer.extend(in_data)
- play_data = chr(0) * len(in_data)
- return play_data, pyaudio.paContinue
-
- with no_alsa_error():
- self.audio = pyaudio.PyAudio()
-
- logger.debug("opening audio stream")
-
- try:
- self.stream_in = self.audio.open(
- input=True,
- output=False,
- format=self.audio.get_format_from_width(
- self.detector.BitsPerSample() / 8
- ),
- channels=self.detector.NumChannels(),
- rate=self.detector.SampleRate(),
- frames_per_buffer=2048,
- stream_callback=audio_callback,
- )
- except Exception as e:
- logger.critical(e)
- return
-
- logger.debug("audio stream opened")
-
- if interrupt_check():
- logger.debug("detect voice return")
- return
-
- silentCount = 0
- recordingCount = 0
-
- logger.debug("begin activeListen loop")
-
- while self._running is True:
-
- if interrupt_check():
- logger.debug("detect voice break")
- break
- data = self.ring_buffer.get()
- if len(data) == 0:
- time.sleep(sleep_time)
- continue
-
- status = self.detector.RunDetection(data)
- if status == -1:
- logger.warning("Error initializing streams or reading audio data")
-
- stopRecording = False
- if recordingCount > recording_timeout:
- stopRecording = True
- elif status == -2: # silence found
- if silentCount > silent_count_threshold:
- stopRecording = True
- else:
- silentCount = silentCount + 1
- elif status == 0: # voice found
- silentCount = 0
-
- if stopRecording == True:
- return self.saveMessage()
-
- recordingCount = recordingCount + 1
- self.recordedData.append(data)
-
- logger.debug("finished.")
-
- def saveMessage(self):
- """
- Save the message stored in self.recordedData to a timestamped file.
- """
- filename = os.path.join(
- constants.TEMP_PATH, "output" + str(int(time.time())) + ".wav"
- )
- data = b"".join(self.recordedData)
-
- # use wave to save data
- wf = wave.open(filename, "wb")
- wf.setnchannels(self.detector.NumChannels())
- wf.setsampwidth(
- self.audio.get_sample_size(
- self.audio.get_format_from_width(self.detector.BitsPerSample() / 8)
- )
- )
- wf.setframerate(self.detector.SampleRate())
- wf.writeframes(data)
- wf.close()
- logger.debug("finished saving: " + filename)
-
- self.stream_in.stop_stream()
- self.stream_in.close()
- self.audio.terminate()
-
- return filename
-
-
- class HotwordDetector(object):
- """
- Snowboy decoder to detect whether a keyword specified by `decoder_model`
- exists in a microphone input stream.
-
- :param decoder_model: decoder model file path, a string or a list of strings
- :param resource: resource file path.
- :param sensitivity: decoder sensitivity, a float of a list of floats.
- The bigger the value, the more senstive the
- decoder. If an empty list is provided, then the
- default sensitivity in the model will be used.
- :param audio_gain: multiply input volume by this factor.
- :param apply_frontend: applies the frontend processing algorithm if True.
- """
-
- def __init__(
- self,
- decoder_model,
- resource=RESOURCE_FILE,
- sensitivity=[],
- audio_gain=1,
- apply_frontend=False,
- ):
-
- self._running = False
-
- tm = type(decoder_model)
- ts = type(sensitivity)
- if tm is not list:
- decoder_model = [decoder_model]
- if ts is not list:
- sensitivity = [sensitivity]
- model_str = ",".join(decoder_model)
-
- self.detector = snowboydetect.SnowboyDetect(
- resource_filename=resource.encode(), model_str=model_str.encode()
- )
- self.detector.SetAudioGain(audio_gain)
- self.detector.ApplyFrontend(apply_frontend)
- self.num_hotwords = self.detector.NumHotwords()
-
- if len(decoder_model) > 1 and len(sensitivity) == 1:
- sensitivity = sensitivity * self.num_hotwords
- if len(sensitivity) != 0:
- assert self.num_hotwords == len(sensitivity), (
- "number of hotwords in decoder_model (%d) and sensitivity "
- "(%d) does not match" % (self.num_hotwords, len(sensitivity))
- )
- sensitivity_str = ",".join([str(t) for t in sensitivity])
- if len(sensitivity) != 0:
- self.detector.SetSensitivity(sensitivity_str.encode())
-
- self.ring_buffer = RingBuffer(
- self.detector.NumChannels() * self.detector.SampleRate() * 5
- )
-
- def start(
- self,
- detected_callback=play_audio_file,
- interrupt_check=lambda: False,
- sleep_time=0.03,
- audio_recorder_callback=None,
- silent_count_threshold=15,
- recording_timeout=100,
- ):
- """
- Start the voice detector. For every `sleep_time` second it checks the
- audio buffer for triggering keywords. If detected, then call
- corresponding function in `detected_callback`, which can be a single
- function (single model) or a list of callback functions (multiple
- models). Every loop it also calls `interrupt_check` -- if it returns
- True, then breaks from the loop and return.
-
- :param detected_callback: a function or list of functions. The number of
- items must match the number of models in
- `decoder_model`.
- :param interrupt_check: a function that returns True if the main loop
- needs to stop.
- :param float sleep_time: how much time in second every loop waits.
- :param audio_recorder_callback: if specified, this will be called after
- a keyword has been spoken and after the
- phrase immediately after the keyword has
- been recorded. The function will be
- passed the name of the file where the
- phrase was recorded.
- :param silent_count_threshold: indicates how long silence must be heard
- to mark the end of a phrase that is
- being recorded.
- :param recording_timeout: limits the maximum length of a recording.
- :return: None
- """
- self._running = True
-
- def audio_callback(in_data, frame_count, time_info, status):
- if utils.isRecordable():
- self.ring_buffer.extend(in_data)
- play_data = chr(0) * len(in_data)
- else:
- play_data = chr(0)
- return play_data, pyaudio.paContinue
-
- with no_alsa_error():
- self.audio = pyaudio.PyAudio()
- self.stream_in = self.audio.open(
- input=True,
- output=False,
- format=self.audio.get_format_from_width(self.detector.BitsPerSample() / 8),
- channels=self.detector.NumChannels(),
- rate=self.detector.SampleRate(),
- frames_per_buffer=2048,
- stream_callback=audio_callback,
- )
-
- if interrupt_check():
- logger.debug("detect voice return")
- return
-
- tc = type(detected_callback)
- if tc is not list:
- detected_callback = [detected_callback]
- if len(detected_callback) == 1 and self.num_hotwords > 1:
- detected_callback *= self.num_hotwords
-
- assert self.num_hotwords == len(detected_callback), (
- "Error: hotwords in your models (%d) do not match the number of "
- "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
- )
-
- logger.debug("detecting...")
-
- state = "PASSIVE"
- while self._running is True:
- if interrupt_check():
- logger.debug("detect voice break")
- break
- data = self.ring_buffer.get()
- if len(data) == 0:
- time.sleep(sleep_time)
- continue
-
- status = self.detector.RunDetection(data)
- if status == -1:
- logger.warning("Error initializing streams or reading audio data")
-
- # small state machine to handle recording of phrase after keyword
- if state == "PASSIVE":
- if status > 0: # key word found
-
- self.recordedData = []
- self.recordedData.append(data)
- silentCount = 0
- recordingCount = 0
- message = "Keyword " + str(status) + " detected at time: "
- message += time.strftime(
- "%Y-%m-%d %H:%M:%S", time.localtime(time.time())
- )
- logger.info(message)
- callback = detected_callback[status - 1]
- if callback is not None:
- callback()
-
- if (
- audio_recorder_callback is not None
- and status == 1
- and utils.is_proper_time()
- ):
- state = "ACTIVE"
- continue
-
- elif state == "ACTIVE":
- stopRecording = False
- if recordingCount > recording_timeout:
- stopRecording = True
- elif status == -2: # silence found
- if silentCount > silent_count_threshold:
- stopRecording = True
- else:
- silentCount = silentCount + 1
- elif status == 0: # voice found
- silentCount = 0
-
- if stopRecording == True:
- fname = self.saveMessage()
- audio_recorder_callback(fname)
- state = "PASSIVE"
- continue
-
- recordingCount = recordingCount + 1
- self.recordedData.append(data)
-
- logger.debug("finished.")
-
- def saveMessage(self):
- """
- Save the message stored in self.recordedData to a timestamped file.
- """
- filename = os.path.join(
- constants.TEMP_PATH, "output" + str(int(time.time())) + ".wav"
- )
- data = b"".join(self.recordedData)
-
- # use wave to save data
- wf = wave.open(filename, "wb")
- wf.setnchannels(self.detector.NumChannels())
- wf.setsampwidth(
- self.audio.get_sample_size(
- self.audio.get_format_from_width(self.detector.BitsPerSample() / 8)
- )
- )
- wf.setframerate(self.detector.SampleRate())
- wf.writeframes(data)
- wf.close()
- logger.debug("finished saving: " + filename)
- return filename
-
- def terminate(self):
- """
- Terminate audio stream. Users can call start() again to detect.
- :return: None
- """
- if self._running:
- self.stream_in.stop_stream()
- self.stream_in.close()
- self.audio.terminate()
- self._running = False
|