Browse Source

add 310 inference code

master
lvyufeng 2 weeks ago
parent
commit
45043b6ffb
24 changed files with 842 additions and 6443 deletions
  1. +28
    -25
      README.md
  2. +14
    -0
      ascend310_infer/CMakeLists.txt
  3. +29
    -0
      ascend310_infer/build.sh
  4. +32
    -0
      ascend310_infer/inc/utils.h
  5. +135
    -0
      ascend310_infer/src/main.cc
  6. +129
    -0
      ascend310_infer/src/utils.cc
  7. +47
    -0
      export.py
  8. +50
    -0
      postprocess.py
  9. +46
    -0
      preprocess.py
  10. +0
    -177
      scripts/multi-bleu.perl
  11. +52
    -0
      scripts/run_distribute_train_gpu.sh
  12. +2
    -2
      scripts/run_eval_ascend.sh
  13. +3
    -3
      scripts/run_eval_gpu.sh
  14. +122
    -0
      scripts/run_infer_310.sh
  15. +2
    -2
      scripts/run_standalone_train_ascend.sh
  16. +2
    -2
      scripts/run_standalone_train_gpu.sh
  17. +0
    -6113
      scripts/vocab.en
  18. +1
    -1
      src/dataset.py
  19. +3
    -5
      src/gru_for_train.py
  20. +42
    -34
      src/rnn_cells.py
  21. +59
    -41
      src/rnns.py
  22. +4
    -2
      src/seq2seq.py
  23. +20
    -24
      src/utils.py
  24. +20
    -12
      train.py

+ 28
- 25
README.md View File

@@ -199,9 +199,12 @@ Parameters for both training and evaluation can be set in config.py. All the dat
- Running scripts for distributed training of GRU. Task training on multiple device and run the following command in bash to be executed in `scripts/`:

``` bash
# if you use Ascend platform
cd ./scripts
sh run_distributed_train_{platform}.sh [RANK_TABLE_PATH] [DATASET_PATH]
# platform: gpu or ascend
sh run_distributed_train_ascend.sh [RANK_TABLE_PATH] [DATASET_PATH]
# if you use GPU platform
cd ./scripts
sh run_distributed_train_gpu.sh [DATASET_PATH]
```

## [Inference Process](#content)
@@ -271,35 +274,35 @@ perl multi-bleu.perl target.txt.forbleu < output.txt.forbleu

### Training Performance

| Parameters | Ascend | GPU|
| -------------------------- | -------------------------------------------------------------- |--------------------------------------------------------------|
| Resource | Ascend 910; OS Euler2.8 | GTX1080Ti, Ubuntu 18.04|
| uploaded Date | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year) |
| MindSpore Version | 1.2.0 |1.2.0
| Dataset | Multi30k Dataset | Multi30k Dataset |
| Training Parameters | epoch=30, batch_size=16 | epoch=30, batch_size=16 |
| Optimizer | Adam | Adam |
| Loss Function | NLLLoss | NLLLoss |
| outputs | probability | probability |
| Speed | 35ms/step (1pcs) | 200ms/step (1pcs) |
| Epoch Time | 64.4s (1pcs) | 361.5s (1pcs) |
| Loss | 3.86888 |2.533958|
| Params (M) | 21 | 21 |
| Checkpoint for inference | 272M (.ckpt file) | 272M (.ckpt file)
| Parameters | Ascend | GPU |
| -------------------------- | ----------------------------- |---------------------------|
| Resource | Ascend 910; OS Euler2.8 | GTX1080Ti, Ubuntu 18.04 |
| uploaded Date | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year) |
| MindSpore Version | 1.2.0 |1.2.0 |
| Dataset | Multi30k Dataset | Multi30k Dataset |
| Training Parameters | epoch=30, batch_size=16 | epoch=30, batch_size=16 |
| Optimizer | Adam | Adam |
| Loss Function | NLLLoss | NLLLoss |
| outputs | probability | probability |
| Speed | 35ms/step (1pcs) | 200ms/step (1pcs) |
| Epoch Time | 64.4s (1pcs) | 361.5s (1pcs) |
| Loss | 3.86888 |2.533958 |
| Params (M) | 21 | 21 |
| Checkpoint for inference | 272M (.ckpt file) | 272M (.ckpt file) |
| Scripts | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |[gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |

### Inference Performance

| Parameters | Ascend | GPU |
| ------------------- | --------------------------- |---------------------------|
| Resource | Ascend 910; OS Euler2.8 | GTX1080Ti, Ubuntu 18.04|
| Uploaded Date | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year) |
| MindSpore Version | 1.2.0 | 1.2.0|
| Dataset | Multi30K | Multi30K|
| batch_size | 1 | 1|
| outputs | label index | label index
| Accuracy | BLEU: 31.26 | BLEU: 29.30
| Model for inference | 272M (.ckpt file) | 272M (.ckpt file) |
| Resource | Ascend 910; OS Euler2.8 | GTX1080Ti, Ubuntu 18.04 |
| Uploaded Date | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year)|
| MindSpore Version | 1.2.0 | 1.2.0 |
| Dataset | Multi30K | Multi30K |
| batch_size | 1 | 1 |
| outputs | label index | label index |
| Accuracy | BLEU: 31.26 | BLEU: 29.30 |
| Model for inference | 272M (.ckpt file) | 272M (.ckpt file) |

# [Random Situation Description](#content)



+ 14
- 0
ascend310_infer/CMakeLists.txt View File

@@ -0,0 +1,14 @@
cmake_minimum_required(VERSION 3.14.1)
project(Ascend310Infer)
add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
option(MINDSPORE_PATH "mindspore install path" "")
include_directories(${MINDSPORE_PATH})
include_directories(${MINDSPORE_PATH}/include)
include_directories(${PROJECT_SRC_ROOT})
find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)
file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)

add_executable(main src/main.cc src/utils.cc)
target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags)

+ 29
- 0
ascend310_infer/build.sh View File

@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [ -d out ]; then
rm -rf out
fi

mkdir out
cd out || exit

if [ -f "Makefile" ]; then
make clean
fi

cmake .. \
-DMINDSPORE_PATH="`pip3.7 show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`"
make

+ 32
- 0
ascend310_infer/inc/utils.h View File

@@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_INFERENCE_UTILS_H_
#define MINDSPORE_INFERENCE_UTILS_H_

#include <sys/stat.h>
#include <dirent.h>
#include <vector>
#include <string>
#include <memory>
#include "include/api/types.h"

std::vector<std::string> GetAllFiles(std::string_view dirName);
DIR *OpenDir(std::string_view dirName);
std::string RealPath(std::string_view path);
mindspore::MSTensor ReadFileToTensor(const std::string &file);
int WriteResult(const std::string& imageFile, const std::vector<mindspore::MSTensor> &outputs);
#endif

+ 135
- 0
ascend310_infer/src/main.cc View File

@@ -0,0 +1,135 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/time.h>
#include <gflags/gflags.h>
#include <dirent.h>
#include <iostream>
#include <string>
#include <algorithm>
#include <iosfwd>
#include <vector>
#include <fstream>
#include <sstream>

#include "include/api/model.h"
#include "include/api/context.h"
#include "include/api/types.h"
#include "include/api/serialization.h"
#include "include/dataset/execute.h"
#include "include/dataset/vision.h"
#include "inc/utils.h"

using mindspore::Context;
using mindspore::Serialization;
using mindspore::Model;
using mindspore::Status;
using mindspore::MSTensor;
using mindspore::dataset::Execute;
using mindspore::ModelType;
using mindspore::GraphCell;
using mindspore::kSuccess;

DEFINE_string(mindir_path, "", "mindir path");
DEFINE_string(input0_path, ".", "input0 path");
DEFINE_string(input1_path, ".", "input1 path");
DEFINE_int32(device_id, 0, "device id");

int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (RealPath(FLAGS_mindir_path).empty()) {
std::cout << "Invalid mindir" << std::endl;
return 1;
}

auto context = std::make_shared<Context>();
auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>();
ascend310->SetDeviceID(FLAGS_device_id);
context->MutableDeviceInfo().push_back(ascend310);
mindspore::Graph graph;
Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph);

Model model;
Status ret = model.Build(GraphCell(graph), context);
if (ret != kSuccess) {
std::cout << "ERROR: Build failed." << std::endl;
return 1;
}

std::vector<MSTensor> model_inputs = model.GetInputs();
if (model_inputs.empty()) {
std::cout << "Invalid model, inputs is empty." << std::endl;
return 1;
}

auto input0_files = GetAllFiles(FLAGS_input0_path);
auto input1_files = GetAllFiles(FLAGS_input1_path);

if (input0_files.empty() || input1_files.empty()) {
std::cout << "ERROR: input data empty." << std::endl;
return 1;
}

std::map<double, double> costTime_map;
size_t size = input0_files.size();

for (size_t i = 0; i < size; ++i) {
struct timeval start = {0};
struct timeval end = {0};
double startTimeMs;
double endTimeMs;
std::vector<MSTensor> inputs;
std::vector<MSTensor> outputs;
std::cout << "Start predict input files:" << input0_files[i] << std::endl;

auto input0 = ReadFileToTensor(input0_files[i]);
auto input1 = ReadFileToTensor(input1_files[i]);
inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(),
input0.Data().get(), input0.DataSize());
inputs.emplace_back(model_inputs[1].Name(), model_inputs[1].DataType(), model_inputs[1].Shape(),
input1.Data().get(), input1.DataSize());

gettimeofday(&start, nullptr);
ret = model.Predict(inputs, &outputs);
gettimeofday(&end, nullptr);
if (ret != kSuccess) {
std::cout << "Predict " << input0_files[i] << " failed." << std::endl;
return 1;
}
startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000;
costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs));
WriteResult(input0_files[i], outputs);
}
double average = 0.0;
int inferCount = 0;

for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) {
double diff = 0.0;
diff = iter->second - iter->first;
average += diff;
inferCount++;
}
average = average / inferCount;
std::stringstream timeCost;
timeCost << "NN inference cost average time: "<< average << " ms of infer_count " << inferCount << std::endl;
std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << inferCount << std::endl;
std::string fileName = "./time_Result" + std::string("/test_perform_static.txt");
std::ofstream fileStream(fileName.c_str(), std::ios::trunc);
fileStream << timeCost.str();
fileStream.close();
costTime_map.clear();
return 0;
}

+ 129
- 0
ascend310_infer/src/utils.cc View File

@@ -0,0 +1,129 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <fstream>
#include <algorithm>
#include <iostream>
#include "inc/utils.h"

using mindspore::MSTensor;
using mindspore::DataType;

std::vector<std::string> GetAllFiles(std::string_view dirName) {
struct dirent *filename;
DIR *dir = OpenDir(dirName);
if (dir == nullptr) {
return {};
}
std::vector<std::string> res;
while ((filename = readdir(dir)) != nullptr) {
std::string dName = std::string(filename->d_name);
if (dName == "." || dName == ".." || filename->d_type != DT_REG) {
continue;
}
res.emplace_back(std::string(dirName) + "/" + filename->d_name);
}
std::sort(res.begin(), res.end());
for (auto &f : res) {
std::cout << "image file: " << f << std::endl;
}
return res;
}

int WriteResult(const std::string& imageFile, const std::vector<MSTensor> &outputs) {
std::string homePath = "./result_Files";
for (size_t i = 0; i < outputs.size(); ++i) {
size_t outputSize;
std::shared_ptr<const void> netOutput;
netOutput = outputs[i].Data();
outputSize = outputs[i].DataSize();
int pos = imageFile.rfind('/');
std::string fileName(imageFile, pos + 1);
fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin");
std::string outFileName = homePath + "/" + fileName;
FILE * outputFile = fopen(outFileName.c_str(), "wb");
fwrite(netOutput.get(), outputSize, sizeof(char), outputFile);
fclose(outputFile);
outputFile = nullptr;
}
return 0;
}

mindspore::MSTensor ReadFileToTensor(const std::string &file) {
if (file.empty()) {
std::cout << "Pointer file is nullptr" << std::endl;
return mindspore::MSTensor();
}

std::ifstream ifs(file);
if (!ifs.good()) {
std::cout << "File: " << file << " is not exist" << std::endl;
return mindspore::MSTensor();
}

if (!ifs.is_open()) {
std::cout << "File: " << file << "open failed" << std::endl;
return mindspore::MSTensor();
}

ifs.seekg(0, std::ios::end);
size_t size = ifs.tellg();
mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size);

ifs.seekg(0, std::ios::beg);
ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size);
ifs.close();

return buffer;
}


DIR *OpenDir(std::string_view dirName) {
if (dirName.empty()) {
std::cout << " dirName is null ! " << std::endl;
return nullptr;
}
std::string realPath = RealPath(dirName);
struct stat s;
lstat(realPath.c_str(), &s);
if (!S_ISDIR(s.st_mode)) {
std::cout << "dirName is not a valid directory !" << std::endl;
return nullptr;
}
DIR *dir;
dir = opendir(realPath.c_str());
if (dir == nullptr) {
std::cout << "Can not open dir " << dirName << std::endl;
return nullptr;
}
std::cout << "Successfully opened the dir " << dirName << std::endl;
return dir;
}

std::string RealPath(std::string_view path) {
char realPathMem[PATH_MAX] = {0};
char *realPathRet = nullptr;
realPathRet = realpath(path.data(), realPathMem);

if (realPathRet == nullptr) {
std::cout << "File: " << path << " is not exist.";
return "";
}

std::string realPath(realPathMem);
std::cout << path << " realpath is: " << realPath << std::endl;
return realPath;
}

+ 47
- 0
export.py View File

@@ -0,0 +1,47 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""export script."""
import argparse
import numpy as np
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.train.serialization import load_checkpoint, load_param_into_net, export
from src.seq2seq import Seq2Seq
from src.gru_for_infer import GRUInferCell
from src.config import config

parser = argparse.ArgumentParser(description='export')
parser.add_argument("--device_target", type=str, default="Ascend",
help="device where the code will be implemented, default is Ascend")
parser.add_argument('--device_id', type=int, default=0, help='device id of GPU or Ascend, default is 0')
parser.add_argument('--file_name', type=str, default="gru", help='output file name.')
parser.add_argument("--file_format", type=str, choices=["AIR", "MINDIR"], default="MINDIR", help="file format.")
parser.add_argument('--ckpt_file', type=str, required=True, help='ckpt file path')
args = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, reserve_class_name_in_scope=False, \
device_id=args.device_id, save_graphs=False)

if __name__ == "__main__":
network = Seq2Seq(config, is_training=False)
network = GRUInferCell(network)
network.set_train(False)
if args.ckpt_file != "":
parameter_dict = load_checkpoint(args.ckpt_file)
load_param_into_net(network, parameter_dict)

source_ids = Tensor(np.random.uniform(0.0, 1e5, size=[config.eval_batch_size, config.max_length]).astype(np.int32))
target_ids = Tensor(np.random.uniform(0.0, 1e5, size=[config.eval_batch_size, config.max_length]).astype(np.int32))
export(network, source_ids, target_ids, file_name=args.file_name, file_format=args.file_format)

+ 50
- 0
postprocess.py View File

@@ -0,0 +1,50 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

'''
postprocess script.
'''

import os
import argparse
import numpy as np
from src.config import config

parser = argparse.ArgumentParser(description="postprocess")
parser.add_argument("--label_dir", type=str, default="", help="label data dir")
parser.add_argument("--result_dir", type=str, default="./result_Files", help="infer result Files")

args, _ = parser.parse_known_args()

if __name__ == "__main__":
file_name = os.listdir(args.label_dir)
predictions = []
target_sents = []
for f in file_name:
target_ids = np.fromfile(os.path.join(args.label_dir, f), np.int32)
target_sents.append(target_ids.reshape(config.eval_batch_size, config.max_length))
predicted_ids = np.fromfile(os.path.join(args.result_dir, f.split('.')[0] + '_0.bin'), np.int32)
predictions.append(predicted_ids.reshape(config.eval_batch_size, config.max_length - 1))

f_output = open(config.output_file, 'w')
f_target = open(config.target_file, 'w')
for batch_out, true_sentence in zip(predictions, target_sents):
for i in range(config.eval_batch_size):
target_ids = [str(x) for x in true_sentence[i].tolist()]
f_target.write(" ".join(target_ids) + "\n")
token_ids = [str(x) for x in batch_out[i].tolist()]
f_output.write(" ".join(token_ids) + "\n")
f_output.close()
f_target.close()

+ 46
- 0
preprocess.py View File

@@ -0,0 +1,46 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""GRU preprocess script."""
import os
import argparse
from src.dataset import create_gru_dataset
from src.config import config

parser = argparse.ArgumentParser(description='GRU preprocess')
parser.add_argument("--dataset_path", type=str, default="",
help="Dataset path, default: f`sns.")
parser.add_argument('--device_num', type=int, default=1, help='Use device nums, default is 1')
parser.add_argument('--result_path', type=str, default='./preprocess_Result/', help='result path')
args = parser.parse_args()

if __name__ == "__main__":
mindrecord_file = args.dataset_path
if not os.path.exists(mindrecord_file):
print("dataset file {} not exists, please check!".format(mindrecord_file))
raise ValueError(mindrecord_file)
dataset = create_gru_dataset(epoch_count=config.num_epochs, batch_size=config.eval_batch_size, \
dataset_path=mindrecord_file, rank_size=args.device_num, rank_id=0, do_shuffle=False, is_training=False)

source_ids_path = os.path.join(args.result_path, "00_data")
target_ids_path = os.path.join(args.result_path, "01_data")
os.makedirs(source_ids_path)
os.makedirs(target_ids_path)

for i, data in enumerate(dataset.create_dict_iterator(output_numpy=True, num_epochs=1)):
file_name = "gru_bs" + str(config.eval_batch_size) + "_" + str(i) + ".bin"
data["source_ids"].tofile(os.path.join(source_ids_path, file_name))
data["target_ids"].tofile(os.path.join(target_ids_path, file_name))

print("="*20, "export bin files finished", "="*20)

+ 0
- 177
scripts/multi-bleu.perl View File

@@ -1,177 +0,0 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

# $Id$
use warnings;
use strict;

my $lowercase = 0;
if ($ARGV[0] eq "-lc") {
$lowercase = 1;
shift;
}

my $stem = $ARGV[0];
if (!defined $stem) {
print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
print STDERR "Reads the references from reference or reference0, reference1, ...\n";
exit(1);
}

$stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";

my @REF;
my $ref=0;
while(-e "$stem$ref") {
&add_to_ref("$stem$ref",\@REF);
$ref++;
}
&add_to_ref($stem,\@REF) if -e $stem;
die("ERROR: could not find reference file $stem") unless scalar @REF;

# add additional references explicitly specified on the command line
shift;
foreach my $stem (@ARGV) {
&add_to_ref($stem,\@REF) if -e $stem;
}



sub add_to_ref {
my ($file,$REF) = @_;
my $s=0;
if ($file =~ /.gz$/) {
open(REF,"gzip -dc $file|") or die "Can't read $file";
} else {
open(REF,$file) or die "Can't read $file";
}
while(<REF>) {
chomp;
push @{$$REF[$s++]}, $_;
}
close(REF);
}

my(@CORRECT,@TOTAL,$length_translation,$length_reference);
my $s=0;
while(<STDIN>) {
chomp;
$_ = lc if $lowercase;
my @WORD = split;
my %REF_NGRAM = ();
my $length_translation_this_sentence = scalar(@WORD);
my ($closest_diff,$closest_length) = (9999,9999);
foreach my $reference (@{$REF[$s]}) {
# print "$s $_ <=> $reference\n";
$reference = lc($reference) if $lowercase;
my @WORD = split(' ',$reference);
my $length = scalar(@WORD);
my $diff = abs($length_translation_this_sentence-$length);
if ($diff < $closest_diff) {
$closest_diff = $diff;
$closest_length = $length;
# print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
} elsif ($diff == $closest_diff) {
$closest_length = $length if $length < $closest_length;
# from two references with the same closeness to me
# take the *shorter* into account, not the "first" one.
}
for(my $n=1;$n<=4;$n++) {
my %REF_NGRAM_N = ();
for(my $start=0;$start<=$#WORD-($n-1);$start++) {
my $ngram = "$n";
for(my $w=0;$w<$n;$w++) {
$ngram .= " ".$WORD[$start+$w];
}
$REF_NGRAM_N{$ngram}++;
}
foreach my $ngram (keys %REF_NGRAM_N) {
if (!defined($REF_NGRAM{$ngram}) ||
$REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
$REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
# print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
}
}
}
}
$length_translation += $length_translation_this_sentence;
$length_reference += $closest_length;
for(my $n=1;$n<=4;$n++) {
my %T_NGRAM = ();
for(my $start=0;$start<=$#WORD-($n-1);$start++) {
my $ngram = "$n";
for(my $w=0;$w<$n;$w++) {
$ngram .= " ".$WORD[$start+$w];
}
$T_NGRAM{$ngram}++;
}
foreach my $ngram (keys %T_NGRAM) {
$ngram =~ /^(\d+) /;
my $n = $1;
# my $corr = 0;
# print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
$TOTAL[$n] += $T_NGRAM{$ngram};
if (defined($REF_NGRAM{$ngram})) {
if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
$CORRECT[$n] += $T_NGRAM{$ngram};
# $corr = $T_NGRAM{$ngram};
# print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
}
else {
$CORRECT[$n] += $REF_NGRAM{$ngram};
# $corr = $REF_NGRAM{$ngram};
# print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
}
}
# $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
# print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
}
}
$s++;
}
my $brevity_penalty = 1;
my $bleu = 0;

my @bleu=();

for(my $n=1;$n<=4;$n++) {
if (defined ($TOTAL[$n])){
$bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
# print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
}else{
$bleu[$n]=0;
}
}

if ($length_reference==0){
printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
exit(1);
}

if ($length_translation<$length_reference) {
$brevity_penalty = exp(1-$length_reference/$length_translation);
}
$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
my_log( $bleu[2] ) +
my_log( $bleu[3] ) +
my_log( $bleu[4] ) ) / 4) ;
printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
100*$bleu,
100*$bleu[1],
100*$bleu[2],
100*$bleu[3],
100*$bleu[4],
$brevity_penalty,
$length_translation / $length_reference,
$length_translation,
$length_reference;


print STDERR "It is not advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.\n";

sub my_log {
return -9999999999 unless $_[0];
return log($_[0]);
}

+ 52
- 0
scripts/run_distribute_train_gpu.sh View File

@@ -0,0 +1,52 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# -ne 1 ]
then
echo "Usage: sh run_distribute_train_gpu.sh [DATASET_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}

DATASET_PATH=$(get_real_path $1)
echo $DATASET_PATH
if [ ! -f $DATASET_PATH ]
then
echo "error: DATASET_PATH=$DATASET_PATH is not a file"
exit 1
fi

ulimit -u unlimited
export DEVICE_TARGET="GPU"
export DEVICE_NUM=8

rm -rf ./train
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training for $DEVICE_NUM GPUs"
env > env.log
mpirun -n $DEVICE_NUM python train.py --run_distribute=True --device_target=$DEVICE_TARGET --device_num=$DEVICE_NUM --dataset_path=$DATASET_PATH &> log &
cd ..

+ 2
- 2
scripts/run_eval_ascend.sh View File

@@ -15,12 +15,12 @@
# ============================================================================
if [ $# -ne 2 ]
then
echo "Usage: sh run_eval.sh [CKPT_FILE] [DATASET_PATH]"
echo "Usage: sh run_eval_ascend.sh [CKPT_FILE] [DATASET_PATH]"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=4
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
export DEVICE_TARGET="Ascend"


+ 3
- 3
scripts/run_eval_gpu.sh View File

@@ -15,15 +15,15 @@
# ============================================================================
if [ $# -ne 2 ]
then
echo "Usage: sh run_eval.sh [CKPT_FILE] [DATASET_PATH]"
echo "Usage: sh run_eval_gpu.sh [CKPT_FILE] [DATASET_PATH]"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=4
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
export DEVICE_TARGET="Ascend"
export DEVICE_TARGET="GPU"

get_real_path(){
if [ "${1:0:1}" == "/" ]; then


+ 122
- 0
scripts/run_infer_310.sh View File

@@ -0,0 +1,122 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [[ $# -lt 3 || $# -gt 4 ]]; then
echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [NEED_PREPROCESS] [DEVICE_ID]
NEED_PREPROCESS means weather need preprocess or not, it's value is 'y' or 'n'.
DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
model=$(get_real_path $1)
dataset_path=$(get_real_path $2)

if [ "$3" == "y" ] || [ "$3" == "n" ];then
need_preprocess=$3
else
echo "weather need preprocess or not, it's value must be in [y, n]"
exit 1
fi

device_id=0
if [ $# == 4 ]; then
device_id=$4
fi

echo "mindir name: "$model
echo "dataset path: "$dataset_path
echo "need preprocess: "$need_preprocess
echo "device id: "$device_id

export ASCEND_HOME=/usr/local/Ascend/
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
else
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/opp
fi

function preprocess_data()
{
if [ -d preprocess_Result ]; then
rm -rf ./preprocess_Result
fi
mkdir preprocess_Result
python3.7 ../preprocess.py --dataset_path=$dataset_path --result_path=./preprocess_Result/
}

function compile_app()
{
cd ../ascend310_infer || exit
bash build.sh &> build.log
}

function infer()
{
cd - || exit
if [ -d result_Files ]; then
rm -rf ./result_Files
fi
if [ -d time_Result ]; then
rm -rf ./time_Result
fi
mkdir result_Files
mkdir time_Result

../ascend310_infer/out/main --mindir_path=$model --input0_path=./preprocess_Result/00_data --input1_path=./preprocess_Result/01_data --device_id=$device_id &> infer.log

}

function cal_acc()
{
python3.7 ../postprocess.py --result_dir=./result_Files --label_dir=./preprocess_Result/01_data &> acc.log
}

if [ $need_preprocess == "y" ]; then
preprocess_data
if [ $? -ne 0 ]; then
echo "preprocess dataset failed"
exit 1
fi
fi
compile_app
if [ $? -ne 0 ]; then
echo "compile app code failed"
exit 1
fi
infer
if [ $? -ne 0 ]; then
echo " execute inference failed"
exit 1
fi
cal_acc
if [ $? -ne 0 ]; then
echo "calculate accuracy failed"
exit 1
fi

+ 2
- 2
scripts/run_standalone_train_ascend.sh View File

@@ -15,12 +15,12 @@
# ============================================================================
if [ $# -ne 1 ]
then
echo "Usage: sh run_distribute_train_ascend.sh [DATASET_PATH]"
echo "Usage: sh run_standalone_train_ascend.sh [DATASET_PATH]"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=4
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
export DEVICE_TARGET="Ascend"


+ 2
- 2
scripts/run_standalone_train_gpu.sh View File

@@ -15,12 +15,12 @@
# ============================================================================
if [ $# -ne 1 ]
then
echo "Usage: sh run_distribute_train_ascend.sh [DATASET_PATH]"
echo "Usage: sh run_standalone_train_gpu.sh [DATASET_PATH]"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=4
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
export DEVICE_TARGET="GPU"


+ 0
- 6113
scripts/vocab.en
File diff suppressed because it is too large
View File


+ 1
- 1
src/dataset.py View File

@@ -31,7 +31,7 @@ def random_teacher_force(source_ids, target_ids, target_mask):
def create_gru_dataset(epoch_count=1, batch_size=1, rank_size=1, rank_id=0, do_shuffle=True, dataset_path=None,
is_training=True):
"""create dataset"""
ds = de.MindDataset(dataset_path, num_parallel_workers=4,
ds = de.MindDataset(dataset_path, num_parallel_workers=4,
columns_list=["source_ids", "target_ids",
"target_mask"],
shuffle=do_shuffle, num_shards=rank_size, shard_id=rank_id)


+ 3
- 5
src/gru_for_train.py View File

@@ -241,7 +241,6 @@ class GRUTrainOneStepWithLossScaleCell(nn.Cell):
ret = (loss, cond, scaling_sens)
return F.depend(ret, succ)

class GRUTrainOneStepCell(nn.TrainOneStepCell):
"""
Encapsulation class of bert network training.
@@ -254,7 +253,7 @@ class GRUTrainOneStepCell(nn.TrainOneStepCell):
optimizer (Optimizer): Optimizer for updating the weights.
sens (Number): The adjust parameter. Default: 1.0.
"""
def __init__(self, network, optimizer, sens=1.0):
super(GRUTrainOneStepCell, self).__init__(network, optimizer, sens)
self.cast = P.Cast()
@@ -262,8 +261,7 @@ class GRUTrainOneStepCell(nn.TrainOneStepCell):

def set_sens(self, value):
self.sens = value

def construct(self,
encoder_inputs,
decoder_inputs,
@@ -275,7 +273,7 @@ class GRUTrainOneStepCell(nn.TrainOneStepCell):
loss = self.network(encoder_inputs,
decoder_inputs,
teacher_force)
grads = self.grad(self.network, weights)(encoder_inputs,
decoder_inputs,
teacher_force,


+ 42
- 34
src/rnn_cells.py View File

@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
'''RNN Cells module, include RNNCell, GRUCell, LSTMCell'''
import math
import numpy as np
import mindspore.nn as nn
@@ -19,61 +20,65 @@ import mindspore.ops as P
from mindspore import Tensor, Parameter
from mindspore.common.initializer import initializer, Uniform

def rnn_tanh_cell(input, hidden, w_ih, w_hh, b_ih, b_hh):
def rnn_tanh_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
'''RNN cell function with tanh activation'''
if b_ih is None:
igates = P.MatMul(False, True)(input, w_ih)
igates = P.MatMul(False, True)(inputs, w_ih)
hgates = P.MatMul(False, True)(hidden, w_hh)
else:
igates = P.MatMul(False, True)(input, w_ih) + b_ih
igates = P.MatMul(False, True)(inputs, w_ih) + b_ih
hgates = P.MatMul(False, True)(hidden, w_hh) + b_hh
return P.Tanh()(igates + hgates)

def rnn_relu_cell(input, hidden, w_ih, w_hh, b_ih, b_hh):
def rnn_relu_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
'''RNN cell function with relu activation'''
if b_ih is None:
igates = P.MatMul(False, True)(input, w_ih)
igates = P.MatMul(False, True)(inputs, w_ih)
hgates = P.MatMul(False, True)(hidden, w_hh)
else:
igates = P.MatMul(False, True)(input, w_ih) + b_ih
igates = P.MatMul(False, True)(inputs, w_ih) + b_ih
hgates = P.MatMul(False, True)(hidden, w_hh) + b_hh
return P.ReLU()(igates + hgates)

def lstm_cell(input, hidden, w_ih, w_hh, b_ih, b_hh):
def lstm_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
'''LSTM cell function'''
hx, cx = hidden
if b_ih is None:
gates = P.MatMul(False, True)(input, w_ih) + P.MatMul(False, True)(hx, w_hh)
gates = P.MatMul(False, True)(inputs, w_ih) + P.MatMul(False, True)(hx, w_hh)
else:
gates = P.MatMul(False, True)(input, w_ih) + P.MatMul(False, True)(hx, w_hh) + b_ih + b_hh
gates = P.MatMul(False, True)(inputs, w_ih) + P.MatMul(False, True)(hx, w_hh) + b_ih + b_hh
ingate, forgetgate, cellgate, outgate = P.Split(1, 4)(gates)
ingate = P.Sigmoid()(ingate)
forgetgate = P.Sigmoid()(forgetgate)
cellgate = P.Tanh()(cellgate)
outgate = P.Sigmoid()(outgate)
cy = (forgetgate * cx) + (ingate * cellgate)
hy = outgate * P.Tanh()(cy)
return hy, cy

def gru_cell(input, hidden, w_ih, w_hh, b_ih, b_hh):
def gru_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
'''GRU cell function'''
if b_ih is None:
gi = P.MatMul(False, True)(input, w_ih)
gi = P.MatMul(False, True)(inputs, w_ih)
gh = P.MatMul(False, True)(hidden, w_hh)
else:
gi = P.MatMul(False, True)(input, w_ih) + b_ih
gi = P.MatMul(False, True)(inputs, w_ih) + b_ih
gh = P.MatMul(False, True)(hidden, w_hh) + b_hh
i_r, i_i, i_n = P.Split(1, 3)(gi)
h_r, h_i, h_n = P.Split(1, 3)(gh)
resetgate = P.Sigmoid()(i_r + h_r)
inputgate = P.Sigmoid()(i_i + h_i)
newgate = P.Tanh()(i_n + resetgate * h_n)
hy = newgate + inputgate * (hidden - newgate)
return hy

return hy

class RNNCellBase(nn.Cell):
'''Basic class for RNN Cells'''
def __init__(self, input_size: int, hidden_size: int, bias: bool, num_chunks: int):
super().__init__()
self.input_size = input_size
@@ -85,38 +90,41 @@ class RNNCellBase(nn.Cell):
self.bias_ih = Parameter(Tensor(np.random.randn(num_chunks * hidden_size).astype(np.float32)))
self.bias_hh = Parameter(Tensor(np.random.randn(num_chunks * hidden_size).astype(np.float32)))
self.reset_parameters()
def reset_parameters(self):
stdv = 1 / math.sqrt(self.hidden_size)
for weight in self.get_parameters():
weight.set_data(initializer(Uniform(stdv), weight.shape))
class RNNCell(RNNCellBase):
'''RNNCell operator class'''
_non_linearity = ['tanh', 'relu']
def __init__(self, input_size: int, hidden_size: int, bias: bool=True, nonlinearity: str = "tanh"):
def __init__(self, input_size: int, hidden_size: int, bias: bool = True, nonlinearity: str = "tanh"):
super().__init__(input_size, hidden_size, bias, num_chunks=1)
if nonlinearity not in self._non_linearity:
raise ValueError("Unknown nonlinearity: {}".format(self.nonlinearity))
raise ValueError("Unknown nonlinearity: {}".format(nonlinearity))
self.nonlinearity = nonlinearity
def construct(self, input, hx):
def construct(self, inputs, hx):
if self.nonlinearity == "tanh":
ret = rnn_tanh_cell(input, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
ret = rnn_tanh_cell(inputs, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
else:
ret = rnn_relu_cell(input, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
ret = rnn_relu_cell(inputs, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
return ret
class LSTMCell(RNNCellBase):
'''LSTMCell operator class'''
def __init__(self, input_size: int, hidden_size: int, bias: bool = True):
super().__init__(input_size, hidden_size, bias, num_chunks=4)
self.support_non_tensor_inputs = True
def construct(self, input, hx):
return lstm_cell(input, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
def construct(self, inputs, hx):
return lstm_cell(inputs, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
class GRUCell(RNNCellBase):
'''GRUCell operator class'''
def __init__(self, input_size: int, hidden_size: int, bias: bool = True):
super().__init__(input_size, hidden_size, bias, num_chunks=3)
def construct(self, input, hx):
return gru_cell(input, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)
def construct(self, inputs, hx):
return gru_cell(inputs, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh)

+ 59
- 41
src/rnns.py View File

@@ -12,16 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
'''RNN operators module, include RNN, GRU, LSTM'''
import math
import mindspore
import numpy as np
import mindspore
import mindspore.nn as nn
import mindspore.ops as P
from mindspore.ops.primitive import constexpr
from mindspore import Tensor, Parameter, ParameterTuple
from mindspore import log as logger
from mindspore import context
from src.rnn_cells import rnn_relu_cell, rnn_tanh_cell, lstm_cell, gru_cell
from mindspore.ops.primitive import constexpr
from src.utils import Reverse, ReverseSequence

@constexpr
@@ -30,10 +31,10 @@ def _init_state(shape, dtype, is_lstm):
cx = Tensor(np.zeros(shape), dtype)
if is_lstm:
return (hx, cx)
else:
return hx
return hx

class DynamicRNN(nn.Cell):
'''Dynamic RNN module to compute RNN cell by timesteps'''
def __init__(self, mode):
super().__init__()
if mode == "RNN_RELU":
@@ -48,8 +49,9 @@ class DynamicRNN(nn.Cell):
raise ValueError("Unrecognized RNN mode: " + mode)
self.cell = cell
self.is_lstm = mode == "LSTM"
def recurrent(self, x, h_0, w_ih, w_hh, b_ih, b_hh):
'''recurrent steps without sequence length'''
time_step = x.shape[0]
outputs = []
t = 0
@@ -64,9 +66,10 @@ class DynamicRNN(nn.Cell):
outputs.append(h)
t += 1
outputs = P.Stack()(outputs)
return outputs, h
return outputs, h
def variable_recurrent(self, x, h, seq_length, w_ih, w_hh, b_ih, b_hh):
'''recurrent steps with sequence length'''
time_step = x.shape[0]
h_t = h
if self.is_lstm:
@@ -79,7 +82,7 @@ class DynamicRNN(nn.Cell):
seq_length = P.BroadcastTo((hidden_size, -1))(seq_length)
seq_length = P.Cast()(seq_length, mindspore.int32)
seq_length = P.Transpose()(seq_length, (1, 0))
outputs = []
state_t = h_t
t = 0
@@ -100,26 +103,27 @@ class DynamicRNN(nn.Cell):
t += 1
outputs = P.Stack()(outputs)
return outputs, state_t
def construct(self, x, h, seq_length, w_ih, w_hh, b_ih, b_hh):
if seq_length is None:
return self.recurrent(x, h, w_ih, w_hh, b_ih, b_hh)
else:
return self.variable_recurrent(x, h, seq_length, w_ih, w_hh, b_ih, b_hh)
return self.variable_recurrent(x, h, seq_length, w_ih, w_hh, b_ih, b_hh)

class RNNBase(nn.Cell):
def __init__(self, mode, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False):
'''Basic class for RNN operators'''
def __init__(self, mode, input_size, hidden_size, num_layers=1, has_bias=True,
batch_first=False, dropout=0, bidirectional=False):
super().__init__()
if not 0 <= dropout <= 1:
raise ValueError("dropout should be a number in range [0, 1] "
"representing the probability of an element being "
"zeroed")
if dropout > 0 and num_layers == 1:
logger.warning("dropout option adds dropout after all but last "
"recurrent layer, so non-zero dropout expects "
"num_layers greater than 1, but got dropout={} and "
"num_layers={}".format(dropout, num_layers))
"recurrent layer, so non-zero dropout expects "
"num_layers greater than 1, but got dropout={} and "
"num_layers={}".format(dropout, num_layers))
if mode == "LSTM":
gate_size = 4 * hidden_size
elif mode == "GRU":
@@ -130,7 +134,7 @@ class RNNBase(nn.Cell):
gate_size = hidden_size
else:
raise ValueError("Unrecognized RNN mode: " + mode)
self.is_ascend = context.get_context("device_target") == "Ascend"
if self.is_ascend:
self.reverse = P.ReverseV2([0])
@@ -138,7 +142,7 @@ class RNNBase(nn.Cell):
else:
self.reverse = Reverse(0)
self.reverse_sequence = ReverseSequence(0, 1)
self.hidden_size = hidden_size
self.hidden_size = hidden_size
self.batch_first = batch_first
self.num_layers = num_layers
self.dropout = dropout
@@ -148,7 +152,7 @@ class RNNBase(nn.Cell):
self.rnn = DynamicRNN(mode)
num_directions = 2 if bidirectional else 1
self.is_lstm = mode == "LSTM"
self.w_ih_list = []
self.w_hh_list = []
self.b_ih_list = []
@@ -158,17 +162,25 @@ class RNNBase(nn.Cell):
for direction in range(num_directions):
layer_input_size = input_size if layer == 0 else hidden_size * num_directions
suffix = '_reverse' if direction == 1 else ''
self.w_ih_list.append(Parameter(Tensor(np.random.uniform(-stdv, stdv, (gate_size, layer_input_size)).astype(np.float32)), name='weight_ih_l{}{}'.format(layer, suffix)))
self.w_hh_list.append(Parameter(Tensor(np.random.uniform(-stdv, stdv, (gate_size, hidden_size)).astype(np.float32)), name='weight_hh_l{}{}'.format(layer, suffix)))

self.w_ih_list.append(Parameter(
Tensor(np.random.uniform(-stdv, stdv, (gate_size, layer_input_size)).astype(np.float32)),
name='weight_ih_l{}{}'.format(layer, suffix)))
self.w_hh_list.append(Parameter(
Tensor(np.random.uniform(-stdv, stdv, (gate_size, hidden_size)).astype(np.float32)),
name='weight_hh_l{}{}'.format(layer, suffix)))
if has_bias:
self.b_ih_list.append(Parameter(Tensor(np.random.uniform(-stdv, stdv, (gate_size)).astype(np.float32)), name='bias_ih_l{}{}'.format(layer, suffix)))
self.b_hh_list.append(Parameter(Tensor(np.random.uniform(-stdv, stdv, (gate_size)).astype(np.float32)), name='bias_hh_l{}{}'.format(layer, suffix)))
self.b_ih_list.append(Parameter(
Tensor(np.random.uniform(-stdv, stdv, (gate_size)).astype(np.float32)),
name='bias_ih_l{}{}'.format(layer, suffix)))
self.b_hh_list.append(Parameter(
Tensor(np.random.uniform(-stdv, stdv, (gate_size)).astype(np.float32)),
name='bias_hh_l{}{}'.format(layer, suffix)))
self.w_ih_list = ParameterTuple(self.w_ih_list)
self.w_hh_list = ParameterTuple(self.w_hh_list)
self.b_ih_list = ParameterTuple(self.b_ih_list)
self.b_hh_list = ParameterTuple(self.b_hh_list)
self.b_hh_list = ParameterTuple(self.b_hh_list)
def _stacked_bi_dynamic_rnn(self, x, h, seq_length):
"""stacked bidirectional dynamic_rnn"""
pre_layer = x
@@ -178,8 +190,12 @@ class RNNBase(nn.Cell):
for i in range(self.num_layers):
offset = i * 2
if self.has_bias:
w_f_ih, w_f_hh, b_f_ih, b_f_hh = self.w_ih_list[offset], self.w_hh_list[offset], self.b_ih_list[offset], self.b_hh_list[offset]
w_b_ih, w_b_hh, b_b_ih, b_b_hh = self.w_ih_list[offset + 1], self.w_hh_list[offset + 1], self.b_ih_list[offset + 1], self.b_hh_list[offset + 1]
w_f_ih, w_f_hh, b_f_ih, b_f_hh = \
self.w_ih_list[offset], self.w_hh_list[offset], \
self.b_ih_list[offset], self.b_hh_list[offset]
w_b_ih, w_b_hh, b_b_ih, b_b_hh = \
self.w_ih_list[offset + 1], self.w_hh_list[offset + 1], \
self.b_ih_list[offset + 1], self.b_hh_list[offset + 1]
else:
w_f_ih, w_f_hh = self.w_ih_list[offset], self.w_hh_list[offset]
w_b_ih, w_b_hh = self.w_ih_list[offset + 1], self.w_hh_list[offset + 1]
@@ -213,10 +229,9 @@ class RNNBase(nn.Cell):
h_n = h_n.view(h[0].shape)
c_n = c_n.view(h[1].shape)
return output, (h_n.view(h[0].shape), c_n.view(h[1].shape))
else:
h_n = P.Concat(0)(h_n)
return output, h_n.view(h.shape)
h_n = P.Concat(0)(h_n)
return output, h_n.view(h.shape)

def _stacked_dynamic_rnn(self, x, h, seq_length):
"""stacked mutil_layer dynamic_rnn"""
pre_layer = x
@@ -246,11 +261,11 @@ class RNNBase(nn.Cell):
h_n = h_n.view(h[0].shape)
c_n = c_n.view(h[1].shape)
return output, (h_n.view(h[0].shape), c_n.view(h[1].shape))
else:
h_n = P.Concat(0)(h_n)
return output, h_n.view(h.shape)
h_n = P.Concat(0)(h_n)
return output, h_n.view(h.shape)

def construct(self, x, h=None, seq_length=None):
'''Defines the RNN like operators performed'''
max_batch_size = x.shape[0] if self.batch_first else x.shape[1]
num_directions = 2 if self.bidirectional else 1
if h is None:
@@ -264,8 +279,9 @@ class RNNBase(nn.Cell):
if self.batch_first:
x = P.Transpose()(x, (1, 0, 2))
return x, h
class RNN(RNNBase):
'''RNN operator class'''
def __init__(self, *args, **kwargs):
if 'nonlinearity' in kwargs:
if kwargs['nonlinearity'] == 'tanh':
@@ -280,14 +296,16 @@ class RNN(RNNBase):
mode = 'RNN_TANH'

super(RNN, self).__init__(mode, *args, **kwargs)
class GRU(RNNBase):
'''GRU operator class'''
def __init__(self, *args, **kwargs):
mode = 'GRU'
super(GRU, self).__init__(mode, *args, **kwargs)
class LSTM(RNNBase):
'''LSTM operator class'''
def __init__(self, *args, **kwargs):
mode = 'LSTM'
super(LSTM, self).__init__(mode, *args, **kwargs)
self.support_non_tensor_inputs = True
self.support_non_tensor_inputs = True

+ 4
- 2
src/seq2seq.py View File

@@ -79,7 +79,8 @@ class Encoder(nn.Cell):
self.vocab_size = config.src_vocab_size
self.embedding_size = config.encoder_embedding_size
self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
self.rnn = GRU(input_size=self.embedding_size, hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type)
self.rnn = GRU(input_size=self.embedding_size, \
hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type)
self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(config.compute_type)
self.shape = P.Shape()
self.transpose = P.Transpose()
@@ -125,7 +126,8 @@ class Decoder(nn.Cell):
self.vocab_size = config.trg_vocab_size
self.embedding_size = config.decoder_embedding_size
self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, hidden_size=self.hidden_size).to_float(config.compute_type)
self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \
hidden_size=self.hidden_size).to_float(config.compute_type)
self.text_len = config.max_length
self.shape = P.Shape()
self.transpose = P.Transpose()


+ 20
- 24
src/utils.py View File

@@ -12,69 +12,65 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
'''Utils for GPU version GRU, like Reverse operators'''
import mindspore
import mindspore.nn as nn
import mindspore.ops as ops
from mindspore import Tensor
from mindspore.ops import constexpr

@constexpr
def Range(length_input, reverse=False):
if reverse:
return Tensor(list(reversed(range(length_input))), mindspore.int32)
return Tensor(list(range(length_input)), mindspore.int32)
import mindspore.numpy as np

class Reverse(nn.Cell):
"""Reverse operator, like Reverse in mindspore"""
def __init__(self, dim):
super().__init__()
self.dim = dim

def construct(self, input_x):
shape = input_x.shape
dim_size = shape[self.dim]
reversed_indexes = Range(dim_size, True)
reversed_indexes = np.arange(dim_size-1, -1, -1)
output = ops.Gather()(input_x, reversed_indexes, self.dim)
return output
class ReverseSequence(nn.Cell):
"""Reverse sequence operator, like ReverseSequenceV2 in mindspore"""
def __init__(self, seq_dim, batch_dim=0):
super().__init__()
self.seq_dim = seq_dim
self.batch_dim = batch_dim
def construct(self, x, seq_lengths):
"""Defines the ReverseSequence operator computation performed."""
batch_size = x.shape[self.batch_dim]
max_seq_len = x.shape[self.seq_dim]
seq_lens_type = seq_lengths.dtype
# Create [batch, sequence, 2] tensor that contains the indices where the
# real data belongs

back = ops.Sub()(seq_lengths, ops.OnesLike()(seq_lengths))

batch_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type, 0)
forward_idx= self.make_shape((batch_size, max_seq_len), seq_lens_type, 1)
forward_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type, 1)
back = back.view(-1, 1)
reverse_idx = ops.Sub()(back, forward_idx)
condition = ops.Less()(reverse_idx, ops.ZerosLike()(reverse_idx))
reverse_idx = ops.Select()(condition, forward_idx, reverse_idx)
reverse_idx = ops.ExpandDims()(reverse_idx, 2)
batch_idx = ops.ExpandDims()(batch_idx, 2)
if self.batch_dim > self.seq_dim:
batch_idx = ops.Transpose()(batch_idx, (1, 0, 2))
reverse_idx = ops.Transpose()(reverse_idx, (1, 0, 2))
x = ops.Transpose()(x, (1, 0, 2))
start_indices = ops.Concat(2)((batch_idx, reverse_idx))
output = ops.GatherNd()(x, start_indices)
return output
def make_shape(self, shape, dtype, range_dim):
output = ops.Ones()(shape, mindspore.float32)
output = ops.CumSum()(output, range_dim)
output = ops.Cast()(output, dtype)
output = output - 1
return output
return output

+ 20
- 12
train.py View File

@@ -29,7 +29,7 @@ from mindspore.train.loss_scale_manager import DynamicLossScaleManager
from mindspore.nn.optim import Adam
from src.config import config
from src.seq2seq import Seq2Seq
from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell, GRUTrainOneStepCell
from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell
from src.dataset import create_gru_dataset
from src.lr_schedule import dynamic_lr
set_seed(1)
@@ -47,12 +47,6 @@ parser.add_argument('--ckpt_path', type=str, default='outputs/', help='Checkpoin
parser.add_argument('--outputs_dir', type=str, default='./', help='Checkpoint save location. Default: outputs/')
args = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id, save_graphs=False)
if args.device_target == "GPU":
if config.compute_type != mstype.float32:
logger.warning('GPU only support fp32 temporarily, run with fp32.')
config.compute_type = mstype.float32

def get_ms_timestamp():
t = time.time()
return int(round(t * 1000))
@@ -98,12 +92,26 @@ class LossCallBack(Callback):
f.write('\n')

if __name__ == '__main__':
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, \
device_id=args.device_id, save_graphs=False)
if args.device_target == "GPU":
if config.compute_type != mstype.float32:
logger.warning('GPU only support fp32 temporarily, run with fp32.')
config.compute_type = mstype.float32
if args.run_distribute:
rank = args.rank_id
device_num = args.device_num
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
gradients_mean=True)
init()
if args.device_target == "Ascend":
rank = args.rank_id
device_num = args.device_num
context.set_auto_parallel_context(device_num=device_num,
parallel_mode=ParallelMode.DATA_PARALLEL,
gradients_mean=True)
init()
elif args.device_target == "GPU":
init("nccl")
context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL,
gradients_mean=True)
else:
raise ValueError(args.device_target)
else:
rank = 0
device_num = 1


Loading…
Cancel
Save