OpenModelZoo
/
deepspeech2

 
			
							# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""310 data_processing"""
import os
import json
from src.dataset import create_dataset
from src.config import eval_config
from src.greedydecoder import MSGreedyDecoder
import numpy as np
def get_seq_lens(seq_len):
    """
    Given a 1D Tensor or Variable containing integer sequence lengths, return a 1D tensor or variable
    containing the size sequences that will be output by the network.
    """
    stride1 = [2, 1]
    pre1 = [-1, -1]

    for i in range(len(stride1)):
        seq_len = ((seq_len + pre1[i]) / stride1[i])+ 1
    return seq_len

def preprocess_data():
    config = eval_config
    with open(config.DataConfig.labels_path) as label_file:
        labels = json.load(label_file)
    ds_eval = create_dataset(audio_conf=config.DataConfig.SpectConfig,
                             manifest_filepath=config.DataConfig.test_manifest,
                             labels=labels, normalize=True, train_mode=False,
                             batch_size=config.DataConfig.batch_size, rank=0, group_size=1)
    result_path = "./preprocess_Result_" + str(config.DataConfig.batch_size)
    target_decoder = MSGreedyDecoder(labels, blank_index=labels.index('_'))

    feature_path = os.path.join(result_path, "00_data")
    length_path = os.path.join(result_path, "01_data")
    output_length = os.path.join(result_path, "outputlength_data")
    if not os.path.exists(feature_path):
        os.makedirs(feature_path)
        os.makedirs(length_path)
        os.makedirs(output_length)
    file_target = 'target_'+ str(config.DataConfig.batch_size) +'.txt'
    with open(file_target, 'w', encoding='utf-8') as f:
        for i, data in enumerate(ds_eval.create_dict_iterator(output_numpy=True)):
            file_name = "deepspeech2_" + str(
                config.DataConfig.batch_size) + "_" + str(i) + ".bin"
            data['inputs'].tofile(os.path.join(feature_path, file_name))
            data['input_length'].tofile(os.path.join(length_path, file_name))
            outputs_length = get_seq_lens(data['input_length'])
            outputs_length.tofile(os.path.join(output_length, file_name))
            target_indices, targets = data['target_indices'], data['label_values']
            split_targets = []
            start, count, last_id = 0, 0, 0

            for j in range(np.shape(targets)[0]):
                if target_indices[j, 0] == last_id:
                    count += 1
                else:
                    split_targets.append(list(targets[start:count]))
                    last_id += 1
                    start = count
                    count += 1
            split_targets.append(list(targets[start:]))
            target_strings = target_decoder.convert_to_strings(split_targets)
            #print(target_strings)
            #print(i)
            print(len(target_strings))
            for k in range(len(target_strings)):
                f.write(' '.join(target_strings[k]) + '\n')
            #f.write(' '.join(target_strings[0]) + '\n')
    print("=" * 20, "export bin files finished", "=" * 20)


if __name__ == '__main__':
    preprocess_data()