|
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- @Author: Yue Wang
- @Contact: yuewangx@mit.edu
- @File: data.py
- @Time: 2018/10/13 6:21 PM
-
- Modified by
- @Author: An Tao
- @Contact: ta19@mails.tsinghua.edu.cn
- @Time: 2020/2/27 9:32 PM
-
- Modified by
- @Author: Dinghao Yang
- @Contact: dinghaoyang@gmail,cin
- @Time: 2020/9/28 7:29 PM
- """
-
-
- import os
- import sys
- import glob
- import h5py
- import numpy as np
- import torch
- from torch.utils.data import Dataset
-
-
- def download_modelnet40():
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- if not os.path.exists(DATA_DIR):
- os.mkdir(DATA_DIR)
- if not os.path.exists(os.path.join(DATA_DIR, 'modelnet40_ply_hdf5_2048')):
- www = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip'
- zipfile = os.path.basename(www)
- os.system('wget %s; unzip %s' % (www, zipfile))
- os.system('mv %s %s' % (zipfile[:-4], DATA_DIR))
- os.system('rm %s' % (zipfile))
-
-
- def download_shapenetpart():
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- if not os.path.exists(DATA_DIR):
- os.mkdir(DATA_DIR)
- if not os.path.exists(os.path.join(DATA_DIR, 'shapenet_part_seg_hdf5_data')):
- www = 'https://shapenet.cs.stanford.edu/media/shapenet_part_seg_hdf5_data.zip'
- zipfile = os.path.basename(www)
- os.system('wget %s; unzip %s' % (www, zipfile))
- os.system('mv %s %s' % (zipfile[:-4], os.path.join(DATA_DIR, 'shapenet_part_seg_hdf5_data')))
- os.system('rm %s' % (zipfile))
-
-
- def download_S3DIS():
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- if not os.path.exists(DATA_DIR):
- os.mkdir(DATA_DIR)
- if not os.path.exists(os.path.join(DATA_DIR, 'indoor3d_sem_seg_hdf5_data')):
- www = 'https://shapenet.cs.stanford.edu/media/indoor3d_sem_seg_hdf5_data.zip'
- zipfile = os.path.basename(www)
- os.system('wget %s; unzip %s' % (www, zipfile))
- os.system('mv %s %s' % (zipfile[:-4], DATA_DIR))
- os.system('rm %s' % (zipfile))
- if not os.path.exists(os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2_Aligned_Version')):
- if not os.path.exists(os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2_Aligned_Version.zip')):
- print('Please download Stanford3dDataset_v1.2_Aligned_Version.zip \
- from https://goo.gl/forms/4SoGp4KtH1jfRqEj2 and place it under data/')
- sys.exit(0)
- else:
- zippath = os.path.join(DATA_DIR, 'Stanford3dDataset_v1.2_Aligned_Version.zip')
- os.system('unzip %s' % (zippath))
- os.system('rm %s' % (zippath))
-
-
- def load_data_cls(partition):
- download_modelnet40()
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- all_data = []
- all_label = []
- for h5_name in glob.glob(os.path.join(DATA_DIR, 'modelnet40*hdf5_2048', '*%s*.h5'%partition)):
- f = h5py.File(h5_name, 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- all_data.append(data)
- all_label.append(label)
- all_data = np.concatenate(all_data, axis=0)
- all_label = np.concatenate(all_label, axis=0)
- return all_data, all_label
-
-
- def load_data_cls_lle(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_lle.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def load_data_cls_ltsa(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- print('DATA_DIR:', DATA_DIR)
- file_path = os.path.join(DATA_DIR, 'modelnet40_%s_add_ltsa.h5'%partition)
- print('file_path:', file_path)
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_ltsa.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def load_data_cls_isomap(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_isomap.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def load_data_cls_se(partition):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- f = h5py.File(os.path.join(DATA_DIR, 'modelnet40_%s_add_se.h5'%partition), 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- f.close()
- return data, label
-
-
- def load_data_partseg(partition):
- download_shapenetpart()
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- all_data = []
- all_label = []
- all_seg = []
- if partition == 'trainval':
- file = glob.glob(os.path.join(DATA_DIR, 'shapenet*hdf5*', '*train*.h5')) \
- + glob.glob(os.path.join(DATA_DIR, 'shapenet*hdf5*', '*val*.h5'))
- else:
- file = glob.glob(os.path.join(DATA_DIR, 'shapenet*hdf5*', '*%s*.h5'%partition))
- for h5_name in file:
- f = h5py.File(h5_name, 'r+')
- data = f['data'][:].astype('float32')
- label = f['label'][:].astype('int64')
- seg = f['pid'][:].astype('int64')
- f.close()
- all_data.append(data)
- all_label.append(label)
- all_seg.append(seg)
- all_data = np.concatenate(all_data, axis=0)
- all_label = np.concatenate(all_label, axis=0)
- all_seg = np.concatenate(all_seg, axis=0)
- return all_data, all_label, all_seg
-
-
- def prepare_test_data_semseg():
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- if not os.path.exists(os.path.join(DATA_DIR, 'stanford_indoor3d')):
- os.system('python prepare_data/collect_indoor3d_data.py')
- if not os.path.exists(os.path.join(DATA_DIR, 'indoor3d_sem_seg_hdf5_data_test')):
- os.system('python prepare_data/gen_indoor3d_h5.py')
-
-
- def load_data_semseg(partition, test_area):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- download_S3DIS()
- prepare_test_data_semseg()
- if partition == 'train':
- data_dir = os.path.join(DATA_DIR, 'indoor3d_sem_seg_hdf5_data')
- else:
- data_dir = os.path.join(DATA_DIR, 'indoor3d_sem_seg_hdf5_data_test')
- with open(os.path.join(data_dir, "all_files.txt")) as f:
- all_files = [line.rstrip() for line in f]
- with open(os.path.join(data_dir, "room_filelist.txt")) as f:
- room_filelist = [line.rstrip() for line in f]
- data_batchlist, label_batchlist = [], []
- for f in all_files:
- file = h5py.File(os.path.join(DATA_DIR, f), 'r+')
- data = file["data"][:]
- label = file["label"][:]
- data_batchlist.append(data)
- label_batchlist.append(label)
- data_batches = np.concatenate(data_batchlist, 0)
- seg_batches = np.concatenate(label_batchlist, 0)
- test_area_name = "Area_" + test_area
- train_idxs, test_idxs = [], []
- for i, room_name in enumerate(room_filelist):
- if test_area_name in room_name:
- test_idxs.append(i)
- else:
- train_idxs.append(i)
- if partition == 'train':
- all_data = data_batches[train_idxs, ...]
- all_seg = seg_batches[train_idxs, ...]
- else:
- all_data = data_batches[test_idxs, ...]
- all_seg = seg_batches[test_idxs, ...]
- return all_data, all_seg
-
-
- def load_data_semseg_lle(partition, test_area):
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- DATA_DIR = os.path.join(BASE_DIR, 'data')
- download_S3DIS()
- prepare_test_data_semseg()
- if partition == 'train':
- data_dir = os.path.join(DATA_DIR, 'indoor3d_sem_seg_lle_hdf5_data')
- else:
- data_dir = os.path.join(DATA_DIR, 'indoor3d_sem_seg_lle_hdf5_data_test')
- with open(os.path.join(data_dir, "all_files.txt")) as f:
- all_files = [line.rstrip() for line in f]
- with open(os.path.join(data_dir, "room_filelist.txt")) as f:
- room_filelist = [line.rstrip() for line in f]
- data_batchlist, label_batchlist = [], []
- for f in all_files:
- file = h5py.File(os.path.join(DATA_DIR, f), 'r+')
- data = file["data"][:]
- label = file["label"][:]
- data_batchlist.append(data)
- label_batchlist.append(label)
- data_batches = np.concatenate(data_batchlist, 0)
- seg_batches = np.concatenate(label_batchlist, 0)
- test_area_name = "Area_" + test_area
- train_idxs, test_idxs = [], []
- for i, room_name in enumerate(room_filelist):
- if test_area_name in room_name:
- test_idxs.append(i)
- else:
- train_idxs.append(i)
- if partition == 'train':
- all_data = data_batches[train_idxs, ...]
- all_seg = seg_batches[train_idxs, ...]
- else:
- all_data = data_batches[test_idxs, ...]
- all_seg = seg_batches[test_idxs, ...]
- return all_data, all_seg
-
-
- def translate_pointcloud(pointcloud):
- xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3])
- xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3])
-
- translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
- return translated_pointcloud
-
-
- def translate_pointcloud_manifold(pointcloud):
- xyz1 = np.random.uniform(low=2./3., high=3./2., size=[5])
- xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[5])
-
- translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
- return translated_pointcloud
-
-
- def jitter_pointcloud(pointcloud, sigma=0.01, clip=0.02):
- N, C = pointcloud.shape
- pointcloud += np.clip(sigma * np.random.randn(N, C), -1*clip, clip)
- return pointcloud
-
-
- def rotate_pointcloud(pointcloud):
- theta = np.pi*2 * np.random.uniform()
- rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]])
- pointcloud[:,[0,2]] = pointcloud[:,[0,2]].dot(rotation_matrix) # random rotation (x,z)
- return pointcloud
-
-
- class ModelNet40(Dataset):
- def __init__(self, num_points, partition='train'):
- self.data, self.label = load_data_cls(partition)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- label = self.label[item]
- if self.partition == 'train':
- pointcloud = translate_pointcloud(pointcloud)
- np.random.shuffle(pointcloud)
- return pointcloud, label
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class ModelNet40_LLE(Dataset):
- def __init__(self, num_points, partition='train'):
- self.data, self.label = load_data_cls_lle(partition)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- label = self.label[item]
- if self.partition == 'train':
- pointcloud = translate_pointcloud_manifold(pointcloud)
- np.random.shuffle(pointcloud)
- return pointcloud, label
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class ModelNet40_LTSA(Dataset):
- def __init__(self, num_points, partition='train'):
- self.data, self.label = load_data_cls_ltsa(partition)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- label = self.label[item]
- if self.partition == 'train':
- pointcloud = translate_pointcloud_manifold(pointcloud)
- np.random.shuffle(pointcloud)
- return pointcloud, label
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class ModelNet40_ISOMAP(Dataset):
- def __init__(self, num_points, partition='train'):
- self.data, self.label = load_data_cls_isomap(partition)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- label = self.label[item]
- if self.partition == 'train':
- pointcloud = translate_pointcloud_manifold(pointcloud)
- np.random.shuffle(pointcloud)
- return pointcloud, label
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class ModelNet40_SE(Dataset):
- def __init__(self, num_points, partition='train'):
- self.data, self.label = load_data_cls_se(partition)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- label = self.label[item]
- if self.partition == 'train':
- pointcloud = translate_pointcloud_manifold(pointcloud)
- np.random.shuffle(pointcloud)
- return pointcloud, label
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class ShapeNetPart(Dataset):
- def __init__(self, num_points, partition='train', class_choice=None):
- self.data, self.label, self.seg = load_data_partseg(partition)
- self.cat2id = {'airplane': 0, 'bag': 1, 'cap': 2, 'car': 3, 'chair': 4,
- 'earphone': 5, 'guitar': 6, 'knife': 7, 'lamp': 8, 'laptop': 9,
- 'motor': 10, 'mug': 11, 'pistol': 12, 'rocket': 13, 'skateboard': 14, 'table': 15}
- self.seg_num = [4, 2, 2, 4, 4, 3, 3, 2, 4, 2, 6, 2, 3, 3, 3, 3]
- self.index_start = [0, 4, 6, 8, 12, 16, 19, 22, 24, 28, 30, 36, 38, 41, 44, 47]
- self.num_points = num_points
- self.partition = partition
- self.class_choice = class_choice
-
- if self.class_choice != None:
- id_choice = self.cat2id[self.class_choice]
- indices = (self.label == id_choice).squeeze()
- self.data = self.data[indices]
- self.label = self.label[indices]
- self.seg = self.seg[indices]
- self.seg_num_all = self.seg_num[id_choice]
- self.seg_start_index = self.index_start[id_choice]
- else:
- self.seg_num_all = 50
- self.seg_start_index = 0
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- label = self.label[item]
- seg = self.seg[item][:self.num_points]
- if self.partition == 'train':
- # pointcloud = translate_pointcloud(pointcloud)
- indices = list(range(pointcloud.shape[0]))
- np.random.shuffle(indices)
- pointcloud = pointcloud[indices]
- seg = seg[indices]
- return pointcloud, label, seg
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class S3DIS(Dataset):
- def __init__(self, num_points=4096, partition='train', test_area='1'):
- self.data, self.seg = load_data_semseg(partition, test_area)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- seg = self.seg[item][:self.num_points]
- if self.partition == 'train':
- indices = list(range(pointcloud.shape[0]))
- np.random.shuffle(indices)
- pointcloud = pointcloud[indices]
- seg = seg[indices]
- seg = torch.LongTensor(seg)
- return pointcloud, seg
-
- def __len__(self):
- return self.data.shape[0]
-
-
- class S3DIS_LLE(Dataset):
- def __init__(self, num_points=4096, partition='train', test_area='1'):
- self.data, self.seg = load_data_semseg_lle(partition, test_area)
- self.num_points = num_points
- self.partition = partition
-
- def __getitem__(self, item):
- pointcloud = self.data[item][:self.num_points]
- seg = self.seg[item][:self.num_points]
- if self.partition == 'train':
- indices = list(range(pointcloud.shape[0]))
- np.random.shuffle(indices)
- pointcloud = pointcloud[indices]
- seg = seg[indices]
- seg = torch.LongTensor(seg)
- return pointcloud, seg
-
- def __len__(self):
- return self.data.shape[0]
-
-
- if __name__ == '__main__':
- train = ModelNet40(1024)
- test = ModelNet40(1024, 'test')
- data, label = train[0]
- print(data.shape)
- print(label.shape)
-
- trainval = ShapeNetPart(2048, 'trainval')
- test = ShapeNetPart(2048, 'test')
- data, label, seg = trainval[0]
- print(data.shape)
- print(label.shape)
- print(seg.shape)
-
- train = S3DIS(4096)
- test = S3DIS(4096, 'test')
- data, seg = train[0]
- print(data.shape)
- print(seg.shape)
|