|
- # Copyright 2022 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """310 data_processing"""
- import os
- import json
- from src.dataset import create_dataset
- from src.config import eval_config
- from src.greedydecoder import MSGreedyDecoder
- import numpy as np
- def get_seq_lens(seq_len):
- """
- Given a 1D Tensor or Variable containing integer sequence lengths, return a 1D tensor or variable
- containing the size sequences that will be output by the network.
- """
- stride1 = [2, 1]
- pre1 = [-1, -1]
-
- for i in range(len(stride1)):
- seq_len = ((seq_len + pre1[i]) / stride1[i])+ 1
- return seq_len
-
- def preprocess_data():
- config = eval_config
- with open(config.DataConfig.labels_path) as label_file:
- labels = json.load(label_file)
- ds_eval = create_dataset(audio_conf=config.DataConfig.SpectConfig,
- manifest_filepath=config.DataConfig.test_manifest,
- labels=labels, normalize=True, train_mode=False,
- batch_size=config.DataConfig.batch_size, rank=0, group_size=1)
- result_path = "./preprocess_Result_" + str(config.DataConfig.batch_size)
- target_decoder = MSGreedyDecoder(labels, blank_index=labels.index('_'))
-
- feature_path = os.path.join(result_path, "00_data")
- length_path = os.path.join(result_path, "01_data")
- output_length = os.path.join(result_path, "outputlength_data")
- if not os.path.exists(feature_path):
- os.makedirs(feature_path)
- os.makedirs(length_path)
- os.makedirs(output_length)
- file_target = 'target_'+ str(config.DataConfig.batch_size) +'.txt'
- with open(file_target, 'w', encoding='utf-8') as f:
- for i, data in enumerate(ds_eval.create_dict_iterator(output_numpy=True)):
- file_name = "deepspeech2_" + str(
- config.DataConfig.batch_size) + "_" + str(i) + ".bin"
- data['inputs'].tofile(os.path.join(feature_path, file_name))
- data['input_length'].tofile(os.path.join(length_path, file_name))
- outputs_length = get_seq_lens(data['input_length'])
- outputs_length.tofile(os.path.join(output_length, file_name))
- target_indices, targets = data['target_indices'], data['label_values']
- split_targets = []
- start, count, last_id = 0, 0, 0
-
- for j in range(np.shape(targets)[0]):
- if target_indices[j, 0] == last_id:
- count += 1
- else:
- split_targets.append(list(targets[start:count]))
- last_id += 1
- start = count
- count += 1
- split_targets.append(list(targets[start:]))
- target_strings = target_decoder.convert_to_strings(split_targets)
- #print(target_strings)
- #print(i)
- print(len(target_strings))
- for k in range(len(target_strings)):
- f.write(' '.join(target_strings[k]) + '\n')
- #f.write(' '.join(target_strings[0]) + '\n')
- print("=" * 20, "export bin files finished", "=" * 20)
-
-
- if __name__ == '__main__':
- preprocess_data()
|