|
- import multiprocessing
- import os
- import shutil
- import time
- from concurrent.futures import ThreadPoolExecutor
- from typing import Callable, List, Optional, Tuple
-
- import numpy as np
- from torchvision.datasets import utils
-
- from .. import configure
- from .. import datasets as sjds
- from ..datasets import np_savez
-
- CATEGORY_LABEL = {
- "handshake": 0,
- "slapping": 1,
- "punching": 2,
- "walking": 3,
- "fingerguess": 4,
- "strangling": 5,
- "greeting": 6,
- "pushing": 7,
- "hairgrabs": 8,
- "kicking": 9,
- }
-
- class Bullying10kClassification(sjds.NeuromorphicDatasetFolder):
-
- def __init__(
- self,
- root: str,
- train: Optional[bool] = None,
- data_type: str = 'event',
- frames_number: Optional[int] = None,
- split_by: Optional[str] = None,
- duration: Optional[int] = None,
- custom_integrate_function: Optional[Callable] = None,
- custom_integrated_frames_dir_name: Optional[str] = None,
- transform: Optional[Callable] = None,
- target_transform: Optional[Callable] = None,
- ) -> None:
- """
- The Bullying10K dataset for action recognition (classification), which
- is proposed by `Bullying10K: A Neuromorphic Dataset towards
- Privacy-Preserving Bullying Recognition <https://arxiv.org/abs/2306.11546>`_.
-
- Refer to :class:`spikingjelly.datasets.NeuromorphicDatasetFolder` for more details about params information.
- """
- if train is None:
- raise ValueError(
- "The argument `train` must be specified as a boolean value."
- )
- super().__init__(
- root, train, data_type, frames_number, split_by, duration,
- custom_integrate_function, custom_integrated_frames_dir_name,
- transform, target_transform
- )
-
-
- @staticmethod
- def resource_url_md5() -> List[Tuple[str, str, str]]:
- '''
- :return: A list ``url`` that ``url[i]`` is a tuple, which contains the i-th file's name, download link, and MD5
- :rtype: list
- '''
- return [
- ("handshake.zip", "https://figshare.com/ndownloader/files/41268834", "681d70f499e736a1e805305284ddc425"),
- ("slapping.zip", "https://figshare.com/ndownloader/files/41247021", "84b41d6805958f9f62f425223916ffc2"),
- ("punching.zip", "https://figshare.com/ndownloader/files/41263314", "40954f480ab210099d448b7b88fc4719"),
- ("walking.zip", "https://figshare.com/ndownloader/files/41247024", "56e4cac9c0814ce701c3b2292c15b6a9"),
- ("fingerguess.zip", "https://figshare.com/ndownloader/files/41253057", "f83114e5b4f0ea57cac86fb080c7e4d7"),
- ("strangling.zip", "https://figshare.com/ndownloader/files/41261904", "8185ecd6f3147e9b609d22f06270aa86"),
- ("greeting.zip", "https://figshare.com/ndownloader/files/41268792", "4a763fad728b04c8356db8544f1121fe"),
- ("pushing.zip", "https://figshare.com/ndownloader/files/41268951", "7986c74ade7149a98672120a89b13ba8"),
- ("hairgrabs.zip", "https://figshare.com/ndownloader/files/41277855", "a9cf690ed0a3305da4a4b8e110f64db1"),
- ("kicking.zip", "https://figshare.com/ndownloader/files/41278008", "6c3218f977de4ac29c84a10b17779c33"),
- ]
-
- @staticmethod
- def downloadable() -> bool:
- '''
- :return: Whether the dataset can be directly downloaded by python codes. If not, the user have to download it manually
- :rtype: bool
- '''
- return True
-
- @staticmethod
- def extract_downloaded_files(download_root: str, extract_root: str) -> None:
- '''
- :param download_root: Root directory path which saves downloaded dataset files
- :type download_root: str
- :param extract_root: Root directory path which saves extracted files from downloaded files
- :type extract_root: str
- :return: None
-
- This function defines how to extract downloaded files.
- '''
- with ThreadPoolExecutor(
- max_workers=min(multiprocessing.cpu_count(), 10)
- ) as tpe:
- sub_threads = []
- for file_name in os.listdir(download_root):
- if not file_name.endswith(".zip"):
- # move the json files to extract_root directly
- src_file = os.path.join(download_root, file_name)
- dst_file = os.path.join(extract_root, file_name)
- shutil.copy(src_file, dst_file)
- else:
- zip_file = os.path.join(download_root, file_name)
- print(f'Extract [{zip_file}] to [{extract_root}].')
- sub_threads.append(tpe.submit(
- utils.extract_archive, zip_file, extract_root
- ))
-
- for sub_thread in sub_threads:
- if sub_thread.exception():
- print(sub_thread.exception())
- exit(-1)
-
- @staticmethod
- def convert_npy_to_npz(src_path: str, dst_dir: str, label: int):
- """
- :param src_path: the path of a npy file
- :type src_path: str
- :param dst_dir: the path of the directory to contain the converted npz files
- :type dst_dir: str
- :param label: the label of the sample, ranging from 0 to 9
- :type label: int
- :return: None
-
- This function defines how to convert a single npy file to npz format and save converted file in ``dst_dir``.
- """
- original_data = np.load(src_path, allow_pickle=True)
- original_data = [y for x in original_data for y in x]
- # original_data: [(t, x, y, p, ...), ...]
- # npz data: {"t": t, "x": x, "y": y, "p": p, "label": label}
- t = np.array([d[0] for d in original_data])
- x = np.array([d[1] for d in original_data])
- y = np.array([d[2] for d in original_data])
- p = np.array([d[3] for d in original_data])
- fname = os.path.split(src_path)[-1].split(".")[0]
- target_file_path = os.path.join(
- dst_dir, str(label), f'{fname}.npz'
- )
- np_savez(
- target_file_path, t=t, x=x, y=y, p=p, label=label
- )
- print(f"[{target_file_path}] saved.")
-
- @staticmethod
- def create_events_np_files(extract_root: str, events_np_root: str) -> None:
- '''
- :param extract_root: Root directory path which saves extracted files from downloaded files
- :type extract_root: str
- :param events_np_root: Root directory path which saves events files in the ``npz`` format
- :type events_np_root: str
- :return: None
-
- This function defines how to convert the extracted npy data in ``extract_root`` to ``npz`` format and save converted files in ``events_np_root``.
- '''
- train_dir= os.path.join(events_np_root, "train")
- val_dir = os.path.join(events_np_root, "val")
- os.mkdir(train_dir)
- os.mkdir(val_dir)
- print(f"Mkdir [{train_dir}] and [{val_dir}].")
- for label in range(10):
- os.mkdir(os.path.join(train_dir, str(label)))
- os.mkdir(os.path.join(val_dir, str(label)))
- print(
- f"Mkdir {os.listdir(train_dir)} in [{train_dir}] "
- f"and {os.listdir(val_dir)} in [{val_dir}]."
- )
-
- all_files_labels = []
- categories = list(filter(
- lambda x: (not x.endswith(".json")) and (not x.startswith(".")),
- os.listdir(extract_root)
- ))
- for c in categories:
- cp = os.path.join(extract_root, c)
- for dir_path, _, dir_file_names in os.walk(cp):
- for dfn in dir_file_names:
- all_files_labels.append(
- (os.path.join(dir_path, dfn), CATEGORY_LABEL[c])
- )
- num_files = len(all_files_labels)
- all_files_labels = np.array(all_files_labels)
- print(f"Found {num_files} files in total.")
-
- # the same way to split training / validation sets as the original work:
- # https://github.com/Brain-Cog-Lab/Bullying10K/blob/main/Bullying10k.py
- val_loc = np.zeros(num_files, dtype=bool)
- val_loc[range(0, num_files, 5)] = 1
- train_files_labels = all_files_labels[~val_loc]
- val_files_labels = all_files_labels[val_loc]
- print(
- f"Training set: {len(train_files_labels)} files. "
- f"Validation set: {len(val_files_labels)} files."
- )
-
- t_ckp = time.time()
- with ThreadPoolExecutor(max_workers=min(
- multiprocessing.cpu_count(),
- configure.max_threads_number_for_datasets_preprocess
- )) as tpe:
- sub_threads = []
- print(
- f"Start the ThreadPoolExecutor with max workers"
- f" = [{tpe._max_workers}]."
- )
- for fp, label in train_files_labels:
- sub_threads.append(tpe.submit(
- Bullying10kClassification.convert_npy_to_npz,
- fp, train_dir, label
- ))
- for fp, label in val_files_labels:
- sub_threads.append(tpe.submit(
- Bullying10kClassification.convert_npy_to_npz,
- fp, val_dir, label
- ))
- print(f'Used time = [{round(time.time() - t_ckp, 2)}s].')
- print(
- f"All npy files have been converted into npz files "
- f"and into [{train_dir, val_dir}]."
- )
-
- # remove the extracted files, since they're too large
- print(f"Remove the directory [{extract_root}].")
- shutil.rmtree(extract_root)
-
- @staticmethod
- def get_H_W() -> Tuple:
- '''
- :return: A tuple ``(H, W)``, where ``H`` is the height of the data and ``W` is the weight of the data.
- For example, this function returns ``(128, 128)`` for the DVS128 Gesture dataset.
- :rtype: tuple
- '''
- return 260, 346
|