Browse Source

init

master
poohRui 11 months ago
commit
cfffabf948
100 changed files with 18480 additions and 0 deletions
  1. +138
    -0
      CMakeLists.txt
  2. +2526
    -0
      Doxyfile.in
  3. +37
    -0
      README.md
  4. +16
    -0
      auto_test/auto_test.h
  5. +74
    -0
      auto_test/basic.cpp
  6. +77
    -0
      auto_test/basic.h
  7. +152
    -0
      auto_test/sample.cpp
  8. +255
    -0
      auto_test/sample.h
  9. +169
    -0
      auto_test/test_code/activation_test.h
  10. +226
    -0
      auto_test/test_code/binary_op_test.h
  11. +230
    -0
      auto_test/test_code/conv_test.h
  12. +224
    -0
      auto_test/test_code/matmul_test.h
  13. +191
    -0
      cmake/local/DownloadProject.cmake
  14. +21
    -0
      cmake/local/DownloadProject.cmake.in
  15. +36
    -0
      cmake/local/GTest.cmake
  16. +35
    -0
      cmake/public/Doxygen.cmake
  17. +15
    -0
      cmake/public/GTest.cmake
  18. +197
    -0
      cmake/public/Utils.cmake
  19. +1
    -0
      docs/_config.yml
  20. +78
    -0
      docs/html/README_8md.html
  21. +107
    -0
      docs/html/allocator_8c.html
  22. +241
    -0
      docs/html/allocator_8h.html
  23. +122
    -0
      docs/html/allocator_8h_source.html
  24. +89
    -0
      docs/html/annotated.html
  25. +565
    -0
      docs/html/batch__norm_8c.html
  26. +176
    -0
      docs/html/batch__norm_8h.html
  27. +98
    -0
      docs/html/batch__norm_8h_source.html
  28. BIN
      docs/html/bc_s.png
  29. BIN
      docs/html/bdwn.png
  30. +795
    -0
      docs/html/binary__op_8c.html
  31. +402
    -0
      docs/html/binary__op_8h.html
  32. +127
    -0
      docs/html/binary__op_8h_source.html
  33. +170
    -0
      docs/html/broadcast_8c.html
  34. +170
    -0
      docs/html/broadcast_8h.html
  35. +96
    -0
      docs/html/broadcast_8h_source.html
  36. +240
    -0
      docs/html/cast_8c.html
  37. +140
    -0
      docs/html/cast_8h.html
  38. +96
    -0
      docs/html/cast_8h_source.html
  39. +106
    -0
      docs/html/classes.html
  40. BIN
      docs/html/closed.png
  41. +306
    -0
      docs/html/conv_8c.html
  42. +168
    -0
      docs/html/conv_8h.html
  43. +97
    -0
      docs/html/conv_8h_source.html
  44. +82
    -0
      docs/html/device_8c.html
  45. +127
    -0
      docs/html/device_8h.html
  46. +105
    -0
      docs/html/device_8h_source.html
  47. +90
    -0
      docs/html/dir_19cc486433ea5376139db663715c98a5.html
  48. +118
    -0
      docs/html/dir_38cd9eca440b760dc966f21b1f8d8f41.html
  49. +82
    -0
      docs/html/dir_49e56c817e5e54854c35e136979f97ca.html
  50. +94
    -0
      docs/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html
  51. +114
    -0
      docs/html/dir_8ddd85c06ebeec0a6efc6a7da38f01d9.html
  52. +102
    -0
      docs/html/dir_ae9a93452e2a84339148a16bcf2eb561.html
  53. +120
    -0
      docs/html/dir_aebb8dcc11953d78e620bbef0b9e2183.html
  54. +489
    -0
      docs/html/dispatch_8h.html
  55. +201
    -0
      docs/html/dispatch_8h_source.html
  56. BIN
      docs/html/doc.png
  57. +526
    -0
      docs/html/dot_8c.html
  58. +141
    -0
      docs/html/dot_8h.html
  59. +95
    -0
      docs/html/dot_8h_source.html
  60. +1766
    -0
      docs/html/doxygen.css
  61. BIN
      docs/html/doxygen.png
  62. +212
    -0
      docs/html/dropout_8c.html
  63. +140
    -0
      docs/html/dropout_8h.html
  64. +96
    -0
      docs/html/dropout_8h_source.html
  65. +133
    -0
      docs/html/duplicate_8c.html
  66. +133
    -0
      docs/html/duplicate_8h.html
  67. +95
    -0
      docs/html/duplicate_8h_source.html
  68. +120
    -0
      docs/html/dynsections.js
  69. +159
    -0
      docs/html/factories_8c.html
  70. +161
    -0
      docs/html/factories_8h.html
  71. +98
    -0
      docs/html/factories_8h_source.html
  72. +143
    -0
      docs/html/files.html
  73. BIN
      docs/html/folderclosed.png
  74. BIN
      docs/html/folderopen.png
  75. +127
    -0
      docs/html/functions.html
  76. +127
    -0
      docs/html/functions_vars.html
  77. +82
    -0
      docs/html/globals.html
  78. +333
    -0
      docs/html/globals_a.html
  79. +108
    -0
      docs/html/globals_b.html
  80. +156
    -0
      docs/html/globals_c.html
  81. +105
    -0
      docs/html/globals_d.html
  82. +209
    -0
      docs/html/globals_defs.html
  83. +81
    -0
      docs/html/globals_e.html
  84. +94
    -0
      docs/html/globals_enum.html
  85. +202
    -0
      docs/html/globals_eval.html
  86. +81
    -0
      docs/html/globals_f.html
  87. +606
    -0
      docs/html/globals_func.html
  88. +99
    -0
      docs/html/globals_func_b.html
  89. +144
    -0
      docs/html/globals_func_c.html
  90. +87
    -0
      docs/html/globals_func_d.html
  91. +141
    -0
      docs/html/globals_func_g.html
  92. +99
    -0
      docs/html/globals_func_m.html
  93. +90
    -0
      docs/html/globals_func_n.html
  94. +87
    -0
      docs/html/globals_func_p.html
  95. +177
    -0
      docs/html/globals_func_s.html
  96. +89
    -0
      docs/html/globals_func_t.html
  97. +84
    -0
      docs/html/globals_func_v.html
  98. +153
    -0
      docs/html/globals_g.html
  99. +81
    -0
      docs/html/globals_i.html
  100. +87
    -0
      docs/html/globals_l.html

+ 138
- 0
CMakeLists.txt View File

@@ -0,0 +1,138 @@
cmake_minimum_required(VERSION 3.11 FATAL_ERROR)
project(aitisa_api VERSION 0.4.0 LANGUAGES C CXX)

##############################################
# General options
##############################################
option(AITISA_API_BUILD_SHARED_LIBS "Compiled as a shared library" ON)
option(AITISA_API_BUILD_TESTING "Build with testing enabled" ON)
option(AITISA_API_BUILD_AUTO_TESTING "Compile auto testing framework as a shared library" OFF)
option(AITISA_API_BUILD_DOCS "Generate documents with Doxygen" ON)
option(AITISA_API_USE_GTEST_LOCAL "Download and build with GTest" ON)

if(AITISA_API_BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS ON CACHE INTERNAL "")
endif()

# Show the detail of compile messages
set(CMAKE_VERBOSE_MAKEFILE OFF)
set(CMAKE_COLOR_MAKEFILE ON)

##############################################
# General setup
##############################################
# https://blog.kitware.com/cmake-and-the-default-build-type
#set(default_build_type "Release")
set(default_build_type "Debug")
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

set(AITISA_API_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(AITISA_API_CMAKE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
set(AITISA_API_EXTERNAL_DIR ${AITISA_API_ROOT_DIR}/third_party)


include(GNUInstallDirs)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR})

##############################################
# Create target and set properties
##############################################

include(cmake/public/Utils.cmake)

# Find all the files under `src/`
FILE(GLOB_RECURSE aitisa_api_srcs "src/*.c" "src/*.h")

# Create library target with all the source codes.
if(BUILD_SHARED_LIBS)
add_library(aitisa_api SHARED ${aitisa_api_srcs})
set_target_properties(aitisa_api
PROPERTIES
INTERFACE_POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME "aitisa_api"
SOVERSION ${PROJECT_VERSION_MAJOR}
C_VISIBILITY_PRESET "hidden"
VISIBILITY_INLINES_HIDDEN ON
)
target_compile_definitions(aitisa_api
PRIVATE
AITISA_API_SHARED_LIBS
AITISA_API_SHARED_LIBS_EXPORTS
)
else()
add_library(aitisa_api STATIC ${aitisa_api_srcs})
endif()

target_compile_features(aitisa_api PUBLIC c_std_99)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
target_include_directories(aitisa_api
PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)

aitisa_api_make_interface_library(aitisa_api aitisa_api_interface)
add_library(aitisa_api::aitisa_api ALIAS aitisa_api_interface)

##############################################
# Testing
##############################################
if(AITISA_API_BUILD_TESTING)
enable_testing()
if(AITISA_API_USE_GTEST_LOCAL)
include(cmake/local/GTest.cmake)
else()
include(cmake/public/GTest.cmake)
endif()
add_subdirectory(test)
#include_directories("auto_test")
#add_subdirectory(auto_test)
endif()

##############################################
# Auto Testing
##############################################
if(AITISA_API_BUILD_AUTO_TESTING)
if(NOT AITISA_API_BUILD_TESTING)
enable_testing()
if(AITISA_API_USE_GTEST_LOCAL)
include(cmake/local/GTest.cmake)
else()
include(cmake/public/GTest.cmake)
endif()
endif()
FILE(GLOB_RECURSE auto_test_srcs "auto_test/*.cpp" "auto_test/*.h")
add_library(auto_test SHARED ${auto_test_srcs})
set_target_properties(auto_test
PROPERTIES
INTERFACE_POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME "auto_test"
SOVERSION ${PROJECT_VERSION_MAJOR}
C_VISIBILITY_PRESET "hidden"
VISIBILITY_INLINES_HIDDEN ON
)
target_compile_definitions(auto_test
PRIVATE
AUTO_TEST_SHARED_LIBS
AUTO_TEST_SHARED_LIBS_EXPORTS
)
target_link_libraries(auto_test aitisa_api::aitisa_api aitisa_api::gmock)
add_library(aitisa_api::auto_test ALIAS auto_test)
endif()

##############################################
# Generate Documents
##############################################
if(AITISA_API_BUILD_DOCS)
set(Doxygen_dir "/home/amax107/hice/ayl/doxygen-1.8.16")
include(cmake/public/Doxygen.cmake)
endif()

+ 2526
- 0
Doxyfile.in
File diff suppressed because it is too large
View File


+ 37
- 0
README.md View File

@@ -0,0 +1,37 @@
Standard APIs for AI operations

# Environment Requirement

| Dependency | Version required |
| ---------- | ------------------- |
| gcc | `5.0` or higher |
| CMake | `3.11` or higher |

# Setup
You can setup *Standard APIs for AI operations* by following instructions:
1. Use **git clone** instruction to download source code

```bash
git clone http://bggit.ihub.org.cn/p63541902/aitisa_api.git
```

2. Make a new directory **build** under the project directory, then use **cmake** instruction

```bash
mkdir build
cd build
cmake ..
```

3. Use **make** instruction to compile the code

```bash
make
```
4. Run testing file. Take running convolution operator testing file as an example

```bash
cd bin
./conv_test
```

+ 16
- 0
auto_test/auto_test.h View File

@@ -0,0 +1,16 @@
#pragma once

#include "auto_test/test_code/binary_op_test.h"
#include "auto_test/test_code/matmul_test.h"
#include "auto_test/test_code/conv_test.h"
#include "auto_test/test_code/activation_test.h"

#define REGISTER_OP(ADD, SUB, MUL, DIV, MATMUL, CONV) \
REGISTER_BINARY_OP(ADD, SUB, MUL, DIV); \
REGISTER_MATMUL(MATMUL); \
REGISTER_CONV(CONV);

#define PERFORM_TEST \
::testing::InitGoogleTest(&argc, argv); \
return RUN_ALL_TESTS();


+ 74
- 0
auto_test/basic.cpp View File

@@ -0,0 +1,74 @@
#include "auto_test/basic.h"
#include <random>

namespace aitisa_api {

const DataType aitisa_dtypes[10] = {kInt8, kUint8,
kInt16, kUint16,
kInt32, kUint32,
kInt64, kUint64,
kFloat, kDouble};
const Device aitisa_devices[2] = { {DEVICE_CPU, 0},
{DEVICE_CUDA, 0} };

template <typename DATATYPE>
inline void natural_assign_int(DATATYPE *data, unsigned int nelem){
for(unsigned int i=0; i<nelem; i++){
data[i] = (DATATYPE)i + 1;
}
}
template <typename DATATYPE>
inline void natural_assign_float(DATATYPE *data, unsigned int nelem){
for(unsigned int i=0; i<nelem; i++){
data[i] = (DATATYPE)i * 0.1 + 0.1;
}
}
void natural_assign(void *data, unsigned int len, int dtype){
switch (dtype) {
case 0: natural_assign_int((int8_t*)data, len/sizeof(int8_t)); break;
case 1: natural_assign_int((uint8_t*)data, len/sizeof(uint8_t)); break;
case 2: natural_assign_int((int16_t*)data, len/sizeof(int16_t)); break;
case 3: natural_assign_int((uint16_t*)data, len/sizeof(uint16_t)); break;
case 4: natural_assign_int((int32_t*)data, len/sizeof(int32_t)); break;
case 5: natural_assign_int((uint32_t*)data, len/sizeof(uint32_t)); break;
case 6: natural_assign_int((int64_t*)data, len/sizeof(int64_t)); break;
case 7: natural_assign_int((uint64_t*)data, len/sizeof(uint64_t)); break;
case 8: natural_assign_float((float*)data, len/sizeof(float)); break;
case 9: natural_assign_float((double*)data, len/sizeof(double)); break;
default: break;
}
}

template <typename DATATYPE>
inline void random_assign_int(DATATYPE *data, unsigned int nelem){
std::default_random_engine gen(/*seed*/0);
std::uniform_int_distribution<DATATYPE> dis(0, 10);
for(unsigned int i=0; i<nelem; i++){
data[i] = dis(gen);
}
}
template <typename DATATYPE>
inline void random_assign_float(DATATYPE *data, unsigned int nelem){
std::default_random_engine gen(/*seed*/0);
std::normal_distribution<DATATYPE> dis(0,1);
for(unsigned int i=0; i<nelem; i++){
data[i] = dis(gen);
}
}
void random_assign(void *data, unsigned int len, int dtype){
switch (dtype) {
case 0: random_assign_int((int8_t*)data, len/sizeof(int8_t)); break;
case 1: random_assign_int((uint8_t*)data, len/sizeof(uint8_t)); break;
case 2: random_assign_int((int16_t*)data, len/sizeof(int16_t)); break;
case 3: random_assign_int((uint16_t*)data, len/sizeof(uint16_t)); break;
case 4: random_assign_int((int32_t*)data, len/sizeof(int32_t)); break;
case 5: random_assign_int((uint32_t*)data, len/sizeof(uint32_t)); break;
case 6: random_assign_int((int64_t*)data, len/sizeof(int64_t)); break;
case 7: random_assign_int((uint64_t*)data, len/sizeof(uint64_t)); break;
case 8: random_assign_float((float*)data, len/sizeof(float)); break;
case 9: random_assign_float((double*)data, len/sizeof(double)); break;
default: break;
}
}

} // namespace aitisa_api

+ 77
- 0
auto_test/basic.h View File

@@ -0,0 +1,77 @@
#pragma once

#include <iostream>
#include "gtest/gtest.h"
extern "C" {
#include "src/core/tensor.h"
}
namespace aitisa_api {

extern const DataType aitisa_dtypes[];
extern const Device aitisa_devices[];
inline DataType aitisa_int_to_dtype(int n){ return aitisa_dtypes[n]; }
inline Device aitisa_int_to_device(int n){ return aitisa_devices[n]; }
inline int aitisa_dtype_to_int(DataType dtype){ return static_cast<int>(dtype.code); }
inline int aitisa_device_to_int(Device device){ return static_cast<int>(device.type); }
inline unsigned int elem_size(int dtype) { return static_cast<unsigned int>(aitisa_dtypes[dtype].size); }

void natural_assign(void *data, unsigned int len, int dtype);
void random_assign(void *data, unsigned int len, int dtype);

using AITISA_Tensor = Tensor;
using AITISA_Device = Device;
using AITISA_DataType = DataType;

#define GREEN "\033[32m"
#define RESET "\033[0m"

#define REGISTER_BASIC(TENSOR, DATA_TYPE, INT_TO_DTYPE, DTYPE_TO_INT, DEVICE, INT_TO_DEVICE, DEVICE_TO_INT, CREATE, RESOLVE) \
class Basic { \
public: \
using UserTensor = TENSOR; \
using UserDataType = DATA_TYPE; \
using UserDevice = DEVICE; \
static UserDataType user_int_to_dtype(int data_type_num){ \
return INT_TO_DTYPE(data_type_num); \
} \
static UserDevice user_int_to_device(int device_type_num){ \
return INT_TO_DEVICE(device_type_num); \
} \
static int user_dtype_to_int(UserDataType dtype){ \
return DTYPE_TO_INT(dtype); \
} \
static int user_device_to_int(UserDevice device){ \
return DEVICE_TO_INT(device); \
} \
static void user_create(UserDataType dtype, UserDevice device, int64_t *dims, int64_t ndim, \
void *data, unsigned int len, UserTensor *tensor){ \
CREATE(dtype, device, dims, ndim, data, len, tensor); \
} \
static void user_resolve(UserTensor tensor, UserDataType *dtype, UserDevice *device, \
int64_t **dims, int64_t *ndim, void **data, unsigned int *len){ \
RESOLVE(tensor, dtype, device, dims, ndim, data, len); \
} \
};


// functions for debug
template <typename T>
void print_data(T* data, unsigned int n){
for(unsigned int i=0; i<n; i++){
std::cout << data[i] << " ";
}
std::cout << std::endl;
}

template <typename T>
void print_data2d(T* data, unsigned int m, unsigned int n){
for(unsigned int i=0; i<m; i++){
for(unsigned int j=0; j<n; j++){
std::cout << data[i*n+j] << " ";
}
std::cout << std::endl;
}
std::cout << "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" << std::endl;
}

} // namespace aitisa_api

+ 152
- 0
auto_test/sample.cpp View File

@@ -0,0 +1,152 @@
#include "auto_test/sample.h"
#include <cstring>

namespace aitisa_api {

Unary_Input::Unary_Input(int64_t ndim, int64_t *dims, int dtype, int device,
void *data, unsigned int len):
ndim_(ndim), dims_(dims), dtype_(dtype), device_(device), data_(data), len_(len) {
count_ = new int;
*count_ = 1;
}

Unary_Input::Unary_Input(int64_t ndim, std::vector<int64_t> dims, int dtype,
int device, void *data, unsigned int len):
ndim_(ndim), dims_(nullptr),dtype_(dtype), device_(device), data_(data), len_(len) {
dims_ = new int64_t[ndim];
for(int64_t i=0; i<ndim; i++){
dims_[i] = dims[i];
}
count_ = new int;
*count_ = 1;
}

Unary_Input::Unary_Input(Unary_Input& input):
ndim_(input.ndim()), dims_(input.dims()), dtype_(input.dtype()),
device_(input.device()), data_(input.data()), len_(input.len()),
count_(input.count()) {
(*count_)++;
}

Unary_Input::Unary_Input(Unary_Input && input):
ndim_(input.ndim()), dims_(input.dims()), dtype_(input.dtype()),
device_(input.device()), data_(input.data()), len_(input.len()),
count_(input.count()) {
input.to_nullptr();
}

Unary_Input & Unary_Input::operator=(Unary_Input& right){
(*(this->count_))--;
if(*(this->count_) == 0){
delete [] this->dims_;
delete [] (char*)this->data_;
}
this->ndim_ = right.ndim();
this->dims_ = right.dims();
this->dtype_ = right.dtype();
this->device_ = right.device();
this->len_ = right.len();
this->data_ = right.data();
this->count_ = right.count();
(*(this->count_))++;
return *this;
}

Binary_Input::Binary_Input(int64_t ndim1, int64_t *dims1, int dtype1,
int device1, void *data1, unsigned int len1,
int64_t ndim2, int64_t *dims2, int dtype2,
int device2, void *data2, unsigned int len2):
ndim1_(ndim1), dims1_(dims1), dtype1_(dtype1), device1_(device1), data1_(data1), len1_(len1),
ndim2_(ndim2), dims2_(dims2), dtype2_(dtype2), device2_(device2), data2_(data2), len2_(len2) {
count_ = new int;
*count_ = 1;
}

Binary_Input::Binary_Input(int64_t ndim1, std::vector<int64_t> dims1, int dtype1,
int device1, void *data1, unsigned int len1,
int64_t ndim2, std::vector<int64_t> dims2, int dtype2,
int device2, void *data2, unsigned int len2):
ndim1_(ndim1), dims1_(nullptr), dtype1_(dtype1), device1_(device1), data1_(data1), len1_(len1),
ndim2_(ndim2), dims2_(nullptr), dtype2_(dtype2), device2_(device2), data2_(data2), len2_(len2) {
dims1_ = new int64_t[ndim1];
dims2_ = new int64_t[ndim2];
for(int64_t i=0; i<ndim1; i++){
dims1_[i] = dims1[i];
}
for(int64_t i=0; i<ndim2; i++){
dims2_[i] = dims2[i];
}
count_ = new int;
*count_ = 1;
}

Binary_Input::Binary_Input(Binary_Input & input):
ndim1_(input.ndim1()), dims1_(input.dims1()), dtype1_(input.dtype1()),
device1_(input.device1()), data1_(input.data1()), len1_(input.len1()),
ndim2_(input.ndim2()), dims2_(input.dims2()), dtype2_(input.dtype2()),
device2_(input.device2()), data2_(input.data2()), len2_(input.len2()),
count_(input.count()) {
(*count_)++;
}

Binary_Input::Binary_Input(Binary_Input && input):
ndim1_(input.ndim1()), dims1_(input.dims1()), dtype1_(input.dtype1()),
device1_(input.device1()), data1_(input.data1()), len1_(input.len1()),
ndim2_(input.ndim2()), dims2_(input.dims2()), dtype2_(input.dtype2()),
device2_(input.device2()), data2_(input.data2()), len2_(input.len2()),
count_(input.count()) {
input.to_nullptr();
}

Binary_Input & Binary_Input::operator=(Binary_Input& right){
(*(this->count_))--;
if(*(this->count_) == 0){
delete [] this->dims1_;
delete [] this->dims2_;
delete [] (char*)this->data1_;
delete [] (char*)this->data2_;
}
this->ndim1_ = right.ndim1();
this->ndim2_ = right.ndim2();
this->dims1_ = right.dims1();
this->dims2_ = right.dims2();
this->dtype1_ = right.dtype1();
this->dtype2_ = right.dtype2();
this->device1_ = right.device1();
this->device2_ = right.device2();
this->len1_ = right.len1();
this->len2_ = right.len2();
this->data1_ = right.data1();
this->data2_ = right.data2();
this->count_ = right.count();
(*(this->count_))++;
return *this;
}

Result::Result(Result& result):
ndim_(result.ndim()), dims_(result.dims()), dtype_(result.dtype()),
device_(result.device()), data_(result.data()), len_(result.len()),
count_(result.count()){
(*count_)++;
}

Result::Result(Result && result):
ndim_(result.ndim()), dims_(result.dims()), dtype_(result.dtype()),
device_(result.device()), data_(result.data()), len_(result.len()),
count_(result.count()){
result.to_nullptr();
}

void Result::set_result(int64_t ndim, int64_t *dims, int dtype,
int device, void *data, unsigned int len) {
ndim_ = ndim;
dtype_ = dtype;
device_ = device;
len_ = len;
dims_ = new int64_t[ndim];
data_ = (void*) new char[len];
memcpy(dims_, dims, ndim*sizeof(int64_t));
memcpy(data_, data, len);
}

} // namespace aitisa_api

+ 255
- 0
auto_test/sample.h View File

@@ -0,0 +1,255 @@
#pragma once

extern "C"{
#include <stdint.h>
}
#include <vector>
#include <cstring>
#include "basic.h"

namespace aitisa_api {

class Unary_Input {
public:
Unary_Input() { count_ = new int; *count_ = 1; }
Unary_Input(int64_t ndim, int64_t *dims, int dtype, int device,
void *data, unsigned int len);
Unary_Input(int64_t ndim, std::vector<int64_t> dims, int dtype,
int device, void *data, unsigned int len);
Unary_Input(Unary_Input & input);
Unary_Input(Unary_Input && input);
virtual ~Unary_Input() {
if(count_){
(*count_)--;
if(*count_ == 0){
delete [] (char*)dims_;
delete [] (char*)data_;
delete count_;
}
}
}
Unary_Input & operator=(Unary_Input& right);
void to_CUDA() { device_ = 1; }
void to_CPU() { device_ = 0; }
int64_t ndim() { return ndim_; }
int64_t* dims() { return dims_; }
int dtype() { return dtype_; }
int device() { return device_; }
void* data() { return data_; }
unsigned int len() { return len_; }
void set_data(void *data, unsigned int len) { data_ = data; len_ = len; }
int * count() { return count_; }
void to_nullptr() { count_ = nullptr; dims_ = nullptr; data_ = nullptr; }
private:
int64_t ndim_ = 0;
int64_t *dims_ = nullptr;
int dtype_ = 0;
int device_ = 0;
void *data_ = nullptr;
unsigned int len_ = 0;
int *count_ = nullptr;
};

class Binary_Input {
public:
Binary_Input() { count_ = new int; *count_ = 1; }
Binary_Input(int64_t ndim1, int64_t *dims1, int dtype1,
int device1, void *data1, unsigned int len1,
int64_t ndim2, int64_t *dims2, int dtype2,
int device2, void *data2, unsigned int len2);
Binary_Input(int64_t ndim1, std::vector<int64_t> dims1, int dtype1,
int device1, void *data1, unsigned int len1,
int64_t ndim2, std::vector<int64_t> dims2, int dtype2,
int device2, void *data2, unsigned int len2);
Binary_Input(Binary_Input & input);
Binary_Input(Binary_Input && input);
virtual ~Binary_Input() {
if(count_){
(*count_)--;
if(*count_ == 0){
delete [] (char*)dims1_;
delete [] (char*)dims2_;
delete [] (char*)data1_;
delete [] (char*)data2_;
delete count_;
}
}
}
Binary_Input & operator=(Binary_Input& right);
void to_CUDA() { device1_ = 1; device2_ = 1; }
void to_CPU() { device1_ = 0; device2_ = 0; }
int64_t ndim1() { return ndim1_; }
int64_t ndim2() { return ndim2_; }
int64_t* dims1() { return dims1_; }
int64_t* dims2() { return dims2_; }
int dtype1() { return dtype1_; }
int dtype2() { return dtype2_; }
int device1() { return device1_; }
int device2() { return device2_; }
void* data1() { return data1_; }
void* data2() { return data2_; }
unsigned int len1() { return len1_; }
unsigned int len2() { return len2_; }
void set_data1(void *data, unsigned int len) { data1_ = data; len1_ = len; }
void set_data2(void *data, unsigned int len) { data2_ = data; len2_ = len; }
int * count() { return count_; }
void to_nullptr() {
count_ = nullptr;
dims1_ = nullptr;
dims2_ = nullptr;
data1_ = nullptr;
data2_ = nullptr;
}
private:
int64_t ndim1_ = 0;
int64_t *dims1_ = nullptr;
int dtype1_ = 0;
int device1_ = 0;
void *data1_ = nullptr;
unsigned int len1_ = 0;
int64_t ndim2_ = 0;
int64_t *dims2_ = nullptr;
int dtype2_ = 0;
int device2_ = 0;
void *data2_ = nullptr;
unsigned int len2_ = 0;
int *count_ = nullptr;
};

class Result {
public:
Result() { count_ = new int; *count_ = 1;}
Result(Result & result);
Result(Result && result);
virtual ~Result() {
if(count_){
(*count_)--;
if(*count_ == 0){
delete [] dims_;
delete [] (char*)data_;
}
}
}
int dtype() const { return dtype_; }
int device() const { return device_; }
int64_t ndim() const { return ndim_; }
int64_t* dims() const { return dims_; }
void* data() const { return data_; }
unsigned int len() const { return len_; }
int * count() { return count_; }
virtual void set_result(int64_t ndim, int64_t *dims, int dtype,
int device, void *data, unsigned int len);
virtual void to_nullptr() {
dims_ = nullptr;
data_ = nullptr;
count_ = nullptr;
}
private:
int64_t ndim_ = 0;
int64_t *dims_ = nullptr;
int dtype_ = 0;
int device_ = 0;
void *data_ = nullptr;
unsigned int len_ = 0;
int *count_ = nullptr;
};

template <typename InputType>
class Sample {
public:
Sample() {}
Sample(InputType& in):input_(in), result_() {}
Sample(Sample<InputType>& sample): input_(sample.input()), result_(sample.result()) {}
Sample(Sample<InputType> && sample): input_(sample.input()), result_(sample.result()) {}
virtual ~Sample() {}
Sample & operator=(Sample& right){
input_ = right.input();
result_ = right.result();
return *this;
}
void set_input(InputType& in) { input_ = in; }
InputType & input() { return input_; }
Result & result() { return result_; }
void set_result(int64_t ndim, int64_t *dims, int dtype,
int device ,void *data, unsigned int len){
result_.set_result(ndim, dims, dtype, device, data, len);
}
private:
InputType input_;
Result result_;
};

// To make child class of ::testing::Test a concrete class to be instantiated
template <typename T>
class Concrete : public T {
public:
virtual void TestBody() {}
};

template <typename TestType>
Sample<Binary_Input> get_binary_sample(int sample_num){
Sample<Binary_Input> sample;
Concrete<TestType> test;
int64_t ndim_out, *dims_out;
void *data_out;
unsigned int len_out;
int dtype_out_num, device_out_num;
AITISA_DataType dtype1, dtype2, dtype_out;
AITISA_Device device1, device2, device_out;
AITISA_Tensor tensor_in1, tensor_in2, tensor_out;
sample.set_input(*(test.input[sample_num]));
Binary_Input& input = sample.input();
dtype1 = aitisa_int_to_dtype(input.dtype1());
dtype2 = aitisa_int_to_dtype(input.dtype2());
device1 = aitisa_int_to_device(input.device1());
device2 = aitisa_int_to_device(input.device2());
aitisa_create(dtype1, device1, input.dims1(), input.ndim1(),
input.data1(), input.len1(), &tensor_in1);
aitisa_create(dtype2, device2, input.dims2(), input.ndim2(),
input.data2(), input.len2(), &tensor_in2);
TestType::aitisa_kernel(tensor_in1, tensor_in2, &tensor_out);
aitisa_resolve(tensor_out, &dtype_out, &device_out,
&dims_out, &ndim_out, &data_out, &len_out);
dtype_out_num = aitisa_dtype_to_int(dtype_out);
device_out_num = aitisa_device_to_int(device_out);
sample.set_result(ndim_out, dims_out, dtype_out_num,
device_out_num, data_out, len_out);
aitisa_destroy(&tensor_in1);
aitisa_destroy(&tensor_in2);
aitisa_destroy(&tensor_out);
return sample;
}

// template <typename TestType>
// void get_binary_sample(Sample<Binary_Input>& sample, int sample_num){
// Concrete<TestType> test;
// int64_t ndim_out, *dims_out;
// void *data_out;
// unsigned int len_out;
// int dtype_out_num, device_out_num;
// AITISA_DataType dtype1, dtype2, dtype_out;
// AITISA_Device device1, device2, device_out;
// AITISA_Tensor tensor_in1, tensor_in2, tensor_out;
// sample.set_input(*(test.input[sample_num]));
// Binary_Input& input = sample.input();
// dtype1 = aitisa_int_to_dtype(input.dtype1());
// dtype2 = aitisa_int_to_dtype(input.dtype2());
// device1 = aitisa_int_to_device(input.device1());
// device2 = aitisa_int_to_device(input.device2());
// aitisa_create(dtype1, device1, input.dims1(), input.ndim1(),
// input.data1(), input.len1(), &tensor_in1);
// aitisa_create(dtype2, device2, input.dims2(), input.ndim2(),
// input.data2(), input.len2(), &tensor_in2);
// TestType::aitisa_kernel(tensor_in1, tensor_in2, &tensor_out);
// aitisa_resolve(tensor_out, &dtype_out, &device_out,
// &dims_out, &ndim_out, &data_out, &len_out);
// dtype_out_num = aitisa_dtype_to_int(dtype_out);
// device_out_num = aitisa_device_to_int(device_out);
// sample.set_result(ndim_out, dims_out, dtype_out_num,
// device_out_num, data_out, len_out);
// aitisa_destroy(&tensor_in1);
// aitisa_destroy(&tensor_in2);
// aitisa_destroy(&tensor_out);
// }

} //namespace aitisa_api

+ 169
- 0
auto_test/test_code/activation_test.h View File

@@ -0,0 +1,169 @@
#pragma once

// #include <ctime>
#include <string>
#include "auto_test/basic.h"
#include "auto_test/sample.h"
extern "C" {
#include "src/nn/relu.h"
#include "src/nn/sigmoid.h"
#include "src/nn/tanh.h"
#include <math.h>
#include <sys/time.h>
}

namespace aitisa_api {

template <typename InterfaceType>
class ActivationTest : public ::testing::Test{
public:
ActivationTest():
input0(/*ndim*/5, /*dims*/{3,6,10,120,600}, /*dtype=float*/9,
/*device1=cuda*/1, /*data*/nullptr, /*len*/0),
input1(/*ndim*/4, /*dims*/{3,40,100,600}, /*dtype=double*/8,
/*device1=cuda*/1, /*data*/nullptr, /*len*/0),
input2(/*ndim*/4, /*dims*/{3,4,120,60}, /*dtype=float*/8,
/*device1=cuda*/1, /*data*/nullptr, /*len*/0){
input[0] = &input0;
input[1] = &input1;
input[2] = &input2;
ninput = 3;
for(int i=0; i<ninput; i++){
unsigned int input_nelem = 1;
for(unsigned int j=0; j<input[i]->ndim(); j++){
input_nelem *= input[i]->dims()[j];
}
unsigned int input_len = input_nelem * elem_size(input[i]->dtype());
void *input_data = (void*) new char[input_len];
random_assign(input_data, input_len, input[i]->dtype());
input[i]->set_data(input_data, input_len);
}
}
virtual ~ActivationTest(){}
using InputType = Unary_Input;
using UserInterface = InterfaceType;
// inputs
Unary_Input input0;
Unary_Input input1;
Unary_Input input2;
Unary_Input *input[3] = {&input0, &input1, &input2};
std::string input0_name = "Random Double CUDA with Dims{3,6,10,120,600} for ReLU";
std::string input1_name = "Random Float CUDA with Dims{30,40,100,600} for Sigmoid";
std::string input2_name = "Natural FLoat CUDA with Dims{3,4,120,60} for Tanh";
std::string *input_name[3] = {&input0_name, &input1_name, &input2_name};
int ninput = 3;
};
TYPED_TEST_CASE_P(ActivationTest);

TYPED_TEST_P(ActivationTest, ThreeTests){
using UserDataType = typename TestFixture::UserInterface::UserDataType;
using UserDevice = typename TestFixture::UserInterface::UserDevice;
using UserTensor = typename TestFixture::UserInterface::UserTensor;
using UserFuncs = typename TestFixture::UserInterface;
for(int i=0; i<this->ninput; i++){
// if(i != 2) continue;
struct timeval aitisa_start, aitisa_end, user_start, user_end;
double aitisa_time, user_time;
int64_t aitisa_result_ndim, user_result_ndim;
int64_t *aitisa_result_dims=nullptr, *user_result_dims=nullptr;
float *aitisa_result_data=nullptr, *user_result_data=nullptr;
unsigned int aitisa_result_len, user_result_len;
AITISA_Tensor aitisa_tensor, aitisa_result;
AITISA_DataType aitisa_result_dtype;
AITISA_Device aitisa_result_device;
UserTensor user_tensor, user_result;
UserDataType user_result_dtype;
UserDevice user_result_device;
// aitisa
AITISA_DataType aitisa_dtype = aitisa_int_to_dtype(this->input[i]->dtype());
AITISA_Device aitisa_device = aitisa_int_to_device(0); // cpu supoorted only
aitisa_create(aitisa_dtype, aitisa_device, this->input[i]->dims(), this->input[i]->ndim(),
(void*)(this->input[i]->data()), this->input[i]->len(), &aitisa_tensor);
gettimeofday(&aitisa_start,NULL);
switch(i){
case 0: aitisa_relu(aitisa_tensor, &aitisa_result); break;
case 1: aitisa_sigmoid(aitisa_tensor, &aitisa_result); break;
case 2: aitisa_tanh(aitisa_tensor, &aitisa_result); break;
default: break;
}
gettimeofday(&aitisa_end,NULL);
aitisa_time = (aitisa_end.tv_sec - aitisa_start.tv_sec) * 1000.0
+ (aitisa_end.tv_usec - aitisa_start.tv_usec) / 1000.0 ;
aitisa_resolve(aitisa_result, &aitisa_result_dtype, &aitisa_result_device, &aitisa_result_dims,
&aitisa_result_ndim, (void**)&aitisa_result_data, &aitisa_result_len);
// user
UserDataType user_dtype = UserFuncs::user_int_to_dtype(this->input[i]->dtype());
UserDevice user_device = UserFuncs::user_int_to_device(this->input[i]->device());
UserFuncs::user_create(user_dtype, user_device, this->input[i]->dims(),
this->input[i]->ndim(), this->input[i]->data(),
this->input[i]->len(), &user_tensor);
gettimeofday(&user_start,NULL);
switch(i){
case 0: UserFuncs::user_relu(user_tensor, &user_result); break;
case 1: UserFuncs::user_sigmoid(user_tensor, &user_result); break;
case 2: UserFuncs::user_tanh(user_tensor, &user_result); break;
default: break;
}
gettimeofday(&user_end,NULL);
user_time = (user_end.tv_sec - user_start.tv_sec) * 1000.0
+ (user_end.tv_usec - user_start.tv_usec) / 1000.0;
UserFuncs::user_resolve(user_result, &user_result_dtype, &user_result_device,
&user_result_dims, &user_result_ndim,
(void**)&user_result_data, &user_result_len);
// compare
int64_t tensor_size = 1;
ASSERT_EQ(aitisa_result_ndim, user_result_ndim);
ASSERT_EQ(
/*CUDA*/1, UserFuncs::user_device_to_int(user_result_device));
ASSERT_EQ(aitisa_dtype_to_int(aitisa_result_dtype),
UserFuncs::user_dtype_to_int(user_result_dtype));
for(int64_t j=0; j<aitisa_result_ndim; j++){
tensor_size *= aitisa_result_dims[j];
ASSERT_EQ(aitisa_result_dims[j], user_result_dims[j]);
}
ASSERT_EQ(aitisa_result_len, user_result_len);
switch(i){
case 0: {
double *aitisa_data = (double*)aitisa_result_data;
double *user_data = (double*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_FLOAT_EQ(aitisa_data[j], user_data[j]);
}
break;
}
default: {
float *aitisa_data = (float*)aitisa_result_data;
float *user_data = (float*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_FLOAT_EQ(aitisa_data[j], user_data[j]);
}
break;
}
}
// print result of test
std::cout<< /*GREEN <<*/ "[ Activation sample"<< i << " / "
<< *(this->input_name[i]) << " ] " << /*RESET <<*/ std::endl;
std::cout<< /*GREEN <<*/ "\t[ AITISA ] " << /*RESET <<*/ aitisa_time << " ms" << std::endl;
std::cout<< /*GREEN <<*/ "\t[ USER ] " << /*RESET <<*/ user_time << " ms" << std::endl;
}
}
REGISTER_TYPED_TEST_CASE_P(ActivationTest, ThreeTests);

#define REGISTER_ACTIVATION(RELU, SIGMOID, TANH) \
class Activation : public Basic { \
public: \
static void user_relu(UserTensor tensor, UserTensor* result){ \
RELU(tensor, result); \
} \
static void user_sigmoid(UserTensor tensor, UserTensor* result){ \
SIGMOID(tensor, result); \
} \
static void user_tanh(UserTensor tensor, UserTensor* result){ \
TANH(tensor, result); \
} \
}; \
namespace aitisa_api{ \
INSTANTIATE_TYPED_TEST_CASE_P(aitisa_api, ActivationTest, Activation); \
}

} // namespace aitisa_api

+ 226
- 0
auto_test/test_code/binary_op_test.h View File

@@ -0,0 +1,226 @@
#pragma once

#include <ctime>
#include <string>
#include "auto_test/basic.h"
#include "auto_test/sample.h"
extern "C" {
#include "src/math/binary_op.h"
#include <math.h>
}

namespace aitisa_api {

template <typename InterfaceType>
class BinaryOPTest : public ::testing::Test{
public:
BinaryOPTest():
input0(/*ndim1*/2, /*dims1*/{10,6}, /*dtype1=int32*/4,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/2, /*dims2*/{10,6}, /*dtype2=float*/4,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input1(/*ndim1*/2, /*dims1*/{2013,2018}, /*dtype1=double*/9,
/*device1=cuda*/1, /*data1*/nullptr, /*len1*/0,
/*ndim2*/2, /*dims2*/{2013,2018}, /*dtype2=double*/9,
/*device2=cuda*/1, /*data2*/nullptr, /*len2*/0),
input2(/*ndim1*/3, /*dims1*/{10,3,2}, /*dtype1=uint64*/7,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/3, /*dims2*/{10,3,2}, /*dtype2=uint64*/7,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input3(/*ndim1*/1, /*dims1*/{5}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/1, /*dims2*/{5}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0){
input[0] = &input0;
input[1] = &input1;
input[2] = &input2;
input[3] = &input3;
ninput = 4;
for(int i=0; i<ninput; i++){
unsigned int input_nelem1 = 1;
unsigned int input_nelem2 = 1;
for(unsigned int j=0; j<input[i]->ndim1(); j++){
input_nelem1 *= input[i]->dims1()[j];
}
for(unsigned int j=0; j<input[i]->ndim2(); j++){
input_nelem2 *= input[i]->dims2()[j];
}
unsigned int input_len1 = input_nelem1 * elem_size(input[i]->dtype1());
unsigned int input_len2 = input_nelem2 * elem_size(input[i]->dtype2());
void *input_data1 = (void*) new char[input_len1];
void *input_data2 = (void*) new char[input_len2];
if(i == 1){
random_assign(input_data1, input_len1, input[i]->dtype1());
random_assign(input_data2, input_len2, input[i]->dtype2());
}else{
natural_assign(input_data1, input_len1, input[i]->dtype1());
natural_assign(input_data2, input_len2, input[i]->dtype2());
}
input[i]->set_data1(input_data1, input_len1);
input[i]->set_data2(input_data2, input_len2);
}
}
virtual ~BinaryOPTest(){}
using InputType = Binary_Input;
using UserInterface = InterfaceType;
// inputs
Binary_Input input0; // Natural assigned int32 type input of CPU with dims1{10,6} and dims2{10,6} for add
Binary_Input input1; // Random assigned double type input of CUDA with dims1{2013,2018} and dims2{2013,2018} for sub
Binary_Input input2; // Natural assigned uint64 type input of CPU with dims1{10,3,2} and dims2{10,3,2} for mul
Binary_Input input3; // Natural assigned float type input of CPU with dims1{5} and dims2{5} for div
Binary_Input *input[4] = {&input0, &input1, &input2, &input3};
std::string input0_name = "Natural int32 CPU with Dims{10,6} and Dims{10,6} for add";
std::string input1_name = "Random Double CUDA with Dims{2013,2018} and Dims{2013,2018} dor sub";
std::string input2_name = "Natural uint64 CPU with Dims{10,3,2} and Dims{10,3,2} for mul";
std::string input3_name = "Natural Float CPU with Dims{5} and Dims{5} for div";
std::string *input_name[4] = {&input0_name, &input1_name, &input2_name, &input3_name};
int ninput = 4;
};
TYPED_TEST_CASE_P(BinaryOPTest);

TYPED_TEST_P(BinaryOPTest, FourTests){
using UserDataType = typename TestFixture::UserInterface::UserDataType;
using UserDevice = typename TestFixture::UserInterface::UserDevice;
using UserTensor = typename TestFixture::UserInterface::UserTensor;
using UserFuncs = typename TestFixture::UserInterface;
for(int i=0; i<this->ninput; i++){
// if(i==1) continue;
std::clock_t aitisa_start, aitisa_end, user_start, user_end;
double aitisa_time, user_time;
int64_t aitisa_result_ndim, user_result_ndim;
int64_t *aitisa_result_dims=nullptr, *user_result_dims=nullptr;
float *aitisa_result_data=nullptr, *user_result_data=nullptr;
unsigned int aitisa_result_len, user_result_len;
AITISA_Tensor aitisa_tensor1, aitisa_tensor2, aitisa_result;
AITISA_DataType aitisa_result_dtype;
AITISA_Device aitisa_result_device;
UserTensor user_tensor1, user_tensor2, user_result;
UserDataType user_result_dtype;
UserDevice user_result_device;
// aitisa
AITISA_DataType aitisa_dtype1 = aitisa_int_to_dtype(this->input[i]->dtype1());
AITISA_DataType aitisa_dtype2 = aitisa_int_to_dtype(this->input[i]->dtype2());
AITISA_Device aitisa_device1 = aitisa_int_to_device(0); // cpu supoorted only
AITISA_Device aitisa_device2 = aitisa_int_to_device(0); // cpu supported only
aitisa_create(aitisa_dtype1, aitisa_device1, this->input[i]->dims1(), this->input[i]->ndim1(),
(void*)(this->input[i]->data1()), this->input[i]->len1(), &aitisa_tensor1);
aitisa_create(aitisa_dtype2, aitisa_device2, this->input[i]->dims2(), this->input[i]->ndim2(),
(void*)(this->input[i]->data2()), this->input[i]->len2(), &aitisa_tensor2);
aitisa_start = std::clock();
switch(i){
case 0: aitisa_add(aitisa_tensor1, aitisa_tensor2, &aitisa_result); break;
case 1: aitisa_sub(aitisa_tensor1, aitisa_tensor2, &aitisa_result); break;
case 2: aitisa_mul(aitisa_tensor1, aitisa_tensor2, &aitisa_result); break;
case 3: aitisa_div(aitisa_tensor1, aitisa_tensor2, &aitisa_result); break;
default: break;
}
aitisa_end = std::clock();
aitisa_time = (double)(aitisa_end - aitisa_start) / CLOCKS_PER_SEC * 1000;
aitisa_resolve(aitisa_result, &aitisa_result_dtype, &aitisa_result_device, &aitisa_result_dims,
&aitisa_result_ndim, (void**)&aitisa_result_data, &aitisa_result_len);
// user
UserDataType user_dtype1 = UserFuncs::user_int_to_dtype(this->input[i]->dtype1());
UserDataType user_dtype2 = UserFuncs::user_int_to_dtype(this->input[i]->dtype2());
UserDevice user_device1 = UserFuncs::user_int_to_device(this->input[i]->device1());
UserDevice user_device2 = UserFuncs::user_int_to_device(this->input[i]->device2());
UserFuncs::user_create(user_dtype1, user_device1, this->input[i]->dims1(),
this->input[i]->ndim1(), this->input[i]->data1(),
this->input[i]->len1(), &user_tensor1);
UserFuncs::user_create(user_dtype2, user_device2, this->input[i]->dims2(),
this->input[i]->ndim2(), this->input[i]->data2(),
this->input[i]->len2(), &user_tensor2);
user_start = std::clock();
switch(i){
case 0: UserFuncs::user_add(user_tensor1, user_tensor2, &user_result); break;
case 1: UserFuncs::user_sub(user_tensor1, user_tensor2, &user_result); break;
case 2: UserFuncs::user_mul(user_tensor1, user_tensor2, &user_result); break;
case 3: UserFuncs::user_div(user_tensor1, user_tensor2, &user_result); break;
default: break;
}
user_end = std::clock();
user_time = (double)(user_end - user_start) / CLOCKS_PER_SEC * 1000;
UserFuncs::user_resolve(user_result, &user_result_dtype, &user_result_device,
&user_result_dims, &user_result_ndim,
(void**)&user_result_data, &user_result_len);
// compare
int64_t tensor_size = 1;
ASSERT_EQ(aitisa_result_ndim, user_result_ndim);
if(i == 1){ // CUDA
ASSERT_EQ(
/*CUDA*/1, UserFuncs::user_device_to_int(user_result_device));
}else{ // CPU
ASSERT_EQ(aitisa_device_to_int(aitisa_result_device),
UserFuncs::user_device_to_int(user_result_device));
}
ASSERT_EQ(aitisa_dtype_to_int(aitisa_result_dtype),
UserFuncs::user_dtype_to_int(user_result_dtype));
for(int64_t j=0; j<aitisa_result_ndim; j++){
tensor_size *= aitisa_result_dims[j];
ASSERT_EQ(aitisa_result_dims[j], user_result_dims[j]);
}
ASSERT_EQ(aitisa_result_len, user_result_len);
switch(i){
case 0: {
int32_t *aitisa_data = (int32_t*)aitisa_result_data;
int32_t *user_data = (int32_t*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_EQ(aitisa_data[j], user_data[j]);
}
break;
}
case 1: {
double *aitisa_data = (double*)aitisa_result_data;
double *user_data = (double*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_TRUE(abs(aitisa_data[j] - user_data[j]) < 1e-3);
}
break;
}
case 2: {
uint64_t *aitisa_data = (uint64_t*)aitisa_result_data;
uint64_t *user_data = (uint64_t*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_EQ(aitisa_data[j], user_data[j]);
}
break;
}
case 3: {
float *aitisa_data = (float*)aitisa_result_data;
float *user_data = (float*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_TRUE(abs(aitisa_data[j] - user_data[j]) < 1e-3);
}
break;
}
default: break;
}
// print result of test
std::cout<< /*GREEN <<*/ "[ BinaryOP sample"<< i << " / "
<< *(this->input_name[i]) << " ] " << /*RESET <<*/ std::endl;
std::cout<< /*GREEN <<*/ "\t[ AITISA ] " << /*RESET <<*/ aitisa_time << " ms" << std::endl;
std::cout<< /*GREEN <<*/ "\t[ USER ] " << /*RESET <<*/ user_time << " ms" << std::endl;
}
}
REGISTER_TYPED_TEST_CASE_P(BinaryOPTest, FourTests);

#define REGISTER_BINARY_OP(ADD, SUB, MUL, DIV) \
class BinaryOP : public Basic { \
public: \
static void user_add(UserTensor tensor1, UserTensor tensor2, UserTensor* result){ \
ADD(tensor1, tensor2, result); \
} \
static void user_sub(UserTensor tensor1, UserTensor tensor2, UserTensor* result){ \
SUB(tensor1, tensor2, result); \
} \
static void user_mul(UserTensor tensor1, UserTensor tensor2, UserTensor* result){ \
MUL(tensor1, tensor2, result); \
} \
static void user_div(UserTensor tensor1, UserTensor tensor2, UserTensor* result){ \
DIV(tensor1, tensor2, result); \
} \
}; \
namespace aitisa_api{ \
INSTANTIATE_TYPED_TEST_CASE_P(aitisa_api, BinaryOPTest, BinaryOP); \
}

} // namespace aitisa_api

+ 230
- 0
auto_test/test_code/conv_test.h View File

@@ -0,0 +1,230 @@
#pragma once

#include <string>
#include "auto_test/basic.h"
#include "auto_test/sample.h"
extern "C" {
#include "src/nn/conv.h"
#include <math.h>
#include <sys/time.h>
}

namespace aitisa_api {

namespace {

class Conv_Input : public Binary_Input {
public:
Conv_Input() {};
Conv_Input(int64_t ndim1, int64_t *dims1, int dtype1,
int device1, void *data1, unsigned int len1,
int64_t ndim2, int64_t *dims2, int dtype2,
int device2, void *data2, unsigned int len2,
int *stride, int *padding, int *dilation, int groups):
Binary_Input(ndim1, dims1, dtype1, device1, data1, len1,
ndim2, dims2, dtype2, device2, data2, len2),
stride_(stride), padding_(padding), dilation_(dilation), groups_(groups) {}
Conv_Input(int64_t ndim1, std::vector<int64_t> dims1, int dtype1,
int device1, void *data1, unsigned int len1,
int64_t ndim2, std::vector<int64_t> dims2, int dtype2,
int device2, void *data2, unsigned int len2,
std::vector<int> stride, std::vector<int> padding,
std::vector<int> dilation, int groups):
Binary_Input(ndim1, dims1, dtype1, device1, data1, len1,
ndim2, dims2, dtype2, device2, data2, len2),
stride_(nullptr), padding_(nullptr), dilation_(nullptr), groups_(groups) {
int spatial_len = ndim1 - 2;
this->stride_ = new int[spatial_len];
this->padding_ = new int[spatial_len];
this->dilation_ = new int[spatial_len];
for(int i=0; i<spatial_len; i++){
this->stride_[i] = stride[i];
this->padding_[i] = padding[i];
this->dilation_[i] = dilation[i];
}
}
virtual ~Conv_Input() {
delete [] stride_;
delete [] padding_;
delete [] dilation_;
}
Conv_Input & operator=(Conv_Input& right) {
int spatial_len = right.ndim1() - 2;
Binary_Input& left = (Binary_Input&)(*this);
left = (Binary_Input&)right;
this->stride_ = new int[spatial_len];
this->padding_ = new int[spatial_len];
this->dilation_ = new int[spatial_len];
memcpy(this->stride_, right.stride(), spatial_len*sizeof(int));
memcpy(this->padding_, right.padding(), spatial_len*sizeof(int));
memcpy(this->dilation_, right.dilation(), spatial_len*sizeof(int));
}
int* stride() { return stride_; }
int* padding() { return padding_; }
int* dilation() { return dilation_; }
int groups() { return groups_; }
private:
int *stride_ = nullptr;
int *padding_ = nullptr;
int *dilation_ = nullptr;
int groups_ = 1;
};

} // namespace anonymous

template <typename InterfaceType>
class ConvTest : public ::testing::Test{
public:
ConvTest():
input0(/*ndim1*/4, /*dims1*/{6,32,124,128}, /*dtype1=float*/8,
/*device1=cuda*/1, /*data1*/nullptr, /*len1*/0,
/*ndim2*/4, /*dims2*/{64,32,2,2}, /*dtype2=float*/8,
/*device2=cuda*/1, /*data2*/nullptr, /*len2*/0,
/*stride*/{2,2}, /*padding*/{0,0}, /*dilation*/{1,1},
/*groups*/1),
input1(/*ndim1*/4, /*dims1*/{7,16,100,180}, /*dtype1=float*/8,
/*device1=cuda*/1, /*data1*/nullptr, /*len1*/0,
/*ndim2*/4, /*dims2*/{32,16,3,3}, /*dtype2=float*/8,
/*device2=cuda*/1, /*data2*/nullptr, /*len2*/0,
/*stride*/{3,3}, /*padding*/{1,0}, /*dilation*/{2,2},
/*groups*/1){
input[0] = &input0;
input[1] = &input1;
ninput = 2;
for(int i=0; i<ninput; i++){
unsigned int input_nelem1 = 1;
unsigned int input_nelem2 = 1;
for(unsigned int j=0; j<input[i]->ndim1(); j++){
input_nelem1 *= input[i]->dims1()[j];
}
for(unsigned int j=0; j<input[i]->ndim2(); j++){
input_nelem2 *= input[i]->dims2()[j];
}
unsigned int input_len1 = input_nelem1 * elem_size(input[i]->dtype1());
unsigned int input_len2 = input_nelem2 * elem_size(input[i]->dtype2());
void *input_data1 = (void*) new char[input_len1];
void *input_data2 = (void*) new char[input_len2];
random_assign(input_data1, input_len1, input[i]->dtype1());
random_assign(input_data2, input_len2, input[i]->dtype2());
input[i]->set_data1(input_data1, input_len1);
input[i]->set_data2(input_data2, input_len2);
//.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// print_data2d((float*)input_data1, 4, 4);
// print_data2d((float*)input_data2, 2, 2);
}
}
virtual ~ConvTest(){}
using InputType = Conv_Input;
using UserInterface = InterfaceType;
static void aitisa_kernel(const AITISA_Tensor input, const AITISA_Tensor filter,
int *stride, const int *padding, const int *dilation,
const int groups, AITISA_Tensor *output){
aitisa_conv(input, filter, stride, padding, dilation, groups, output);
}
// inputs
Conv_Input input0; // Natural assigned int32 type input of CPU with InputDims1{3,3,10,6}, FilterDims2{5,3,2,2}, stride{2,2}, padding{0,0}, dilation{1,1}
Conv_Input input1; // Random assigned double type input of CUDA with InputDims1{10,3,100,124,20}, FilterDims2{10,3,5,5,5}, stride{5,5,5}, padding{0,1,0}, dilation{1,1,1}
Conv_Input *input[2] = {&input0, &input1};
std::string input0_name = "Random float of CUDA with InputDims{6,32,124,128}, FilterDims{64,32,2,2}, stride{2,2}, padding{0,0}, dilation{1,1}";
std::string input1_name = "Random float of CUDA with InputDims{7,16,100,100}, FilterDims{32,16,3,3}, stride{3,3}, padding{1,0}, dilation{2,2}";
std::string *input_name[2] = {&input0_name, &input1_name};
int ninput = 2;
};
TYPED_TEST_CASE_P(ConvTest);

TYPED_TEST_P(ConvTest, TwoTests){
using UserDataType = typename TestFixture::UserInterface::UserDataType;
using UserDevice = typename TestFixture::UserInterface::UserDevice;
using UserTensor = typename TestFixture::UserInterface::UserTensor;
using UserFuncs = typename TestFixture::UserInterface;
for(int i=0; i<this->ninput; i++){
// if(i==0) continue;
struct timeval aitisa_start, aitisa_end, user_start, user_end;
double aitisa_time, user_time;
int64_t aitisa_result_ndim, user_result_ndim;
int64_t *aitisa_result_dims=nullptr, *user_result_dims=nullptr;
float *aitisa_result_data=nullptr, *user_result_data=nullptr;
unsigned int aitisa_result_len, user_result_len;
AITISA_Tensor aitisa_tensor1, aitisa_tensor2, aitisa_result;
AITISA_DataType aitisa_result_dtype;
AITISA_Device aitisa_result_device;
UserTensor user_tensor1, user_tensor2, user_result;
UserDataType user_result_dtype;
UserDevice user_result_device;
// aitisa
AITISA_DataType aitisa_dtype1 = aitisa_int_to_dtype(this->input[i]->dtype1());
AITISA_DataType aitisa_dtype2 = aitisa_int_to_dtype(this->input[i]->dtype2());
AITISA_Device aitisa_device1 = aitisa_int_to_device(0); // cpu supoorted only
AITISA_Device aitisa_device2 = aitisa_int_to_device(0); // cpu supported only
aitisa_create(aitisa_dtype1, aitisa_device1, this->input[i]->dims1(), this->input[i]->ndim1(),
(void*)(this->input[i]->data1()), this->input[i]->len1(), &aitisa_tensor1);
aitisa_create(aitisa_dtype2, aitisa_device2, this->input[i]->dims2(), this->input[i]->ndim2(),
(void*)(this->input[i]->data2()), this->input[i]->len2(), &aitisa_tensor2);
gettimeofday(&aitisa_start,NULL);
aitisa_conv(aitisa_tensor1, aitisa_tensor2, this->input[i]->stride(), this->input[i]->padding(),
this->input[i]->dilation(), this->input[i]->groups(), &aitisa_result);
gettimeofday(&aitisa_end,NULL);
aitisa_time = (aitisa_end.tv_sec - aitisa_start.tv_sec) * 1000.0
+ (aitisa_end.tv_usec - aitisa_start.tv_usec) / 1000.0;
aitisa_resolve(aitisa_result, &aitisa_result_dtype, &aitisa_result_device, &aitisa_result_dims,
&aitisa_result_ndim, (void**)&aitisa_result_data, &aitisa_result_len);
// user
UserDataType user_dtype1 = UserFuncs::user_int_to_dtype(this->input[i]->dtype1());
UserDataType user_dtype2 = UserFuncs::user_int_to_dtype(this->input[i]->dtype2());
UserDevice user_device1 = UserFuncs::user_int_to_device(this->input[i]->device1());
UserDevice user_device2 = UserFuncs::user_int_to_device(this->input[i]->device2());
UserFuncs::user_create(user_dtype1, user_device1, this->input[i]->dims1(),
this->input[i]->ndim1(), this->input[i]->data1(),
this->input[i]->len1(), &user_tensor1);
UserFuncs::user_create(user_dtype2, user_device2, this->input[i]->dims2(),
this->input[i]->ndim2(), this->input[i]->data2(),
this->input[i]->len2(), &user_tensor2);
gettimeofday(&user_start,NULL);
UserFuncs::user_conv(user_tensor1, user_tensor2, this->input[i]->stride(), this->input[i]->padding(),
this->input[i]->dilation(), this->input[i]->groups(), &user_result);
gettimeofday(&user_end,NULL);
user_time = (user_end.tv_sec - user_start.tv_sec) * 1000.0
+ (user_end.tv_usec - user_start.tv_usec) / 1000.0;
UserFuncs::user_resolve(user_result, &user_result_dtype, &user_result_device,
&user_result_dims, &user_result_ndim,
(void**)&user_result_data, &user_result_len);
// compare
int64_t tensor_size = 1;
ASSERT_EQ(aitisa_result_ndim, user_result_ndim);
ASSERT_EQ(/*CUDA*/1, UserFuncs::user_device_to_int(user_result_device));
ASSERT_EQ(aitisa_dtype_to_int(aitisa_result_dtype),
UserFuncs::user_dtype_to_int(user_result_dtype));
for(int64_t j=0; j<aitisa_result_ndim; j++){
tensor_size *= aitisa_result_dims[j];
ASSERT_EQ(aitisa_result_dims[j], user_result_dims[j]);
}
ASSERT_EQ(aitisa_result_len, user_result_len);
float *aitisa_data = (float*)aitisa_result_data;
float *user_data = (float*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_TRUE(abs(aitisa_data[j] - user_data[j]) < 1e-3);
// ASSERT_FLOAT_EQ(aitisa_data[j], user_data[j]);
}
// print result of test
std::cout<< /*GREEN <<*/ "[ Conv sample"<< i << " / "
<< *(this->input_name[i]) << " ] " << /*RESET <<*/ std::endl;
std::cout<< /*GREEN <<*/ "\t[ AITISA ] " << /*RESET <<*/ aitisa_time << " ms" << std::endl;
std::cout<< /*GREEN <<*/ "\t[ USER ] " << /*RESET <<*/ user_time << " ms" << std::endl;
}
}
REGISTER_TYPED_TEST_CASE_P(ConvTest, TwoTests);

#define REGISTER_CONV(CONV) \
class Conv : public Basic { \
public: \
static void user_conv(UserTensor input, UserTensor filter, const int *stride, \
const int *padding, const int *dilation, const int groups, \
UserTensor *output){ \
CONV(input, filter, stride, padding, dilation, groups, output); \
} \
}; \
namespace aitisa_api{ \
INSTANTIATE_TYPED_TEST_CASE_P(aitisa_api, ConvTest, Conv); \
}

} // namespace aitisa_api

+ 224
- 0
auto_test/test_code/matmul_test.h View File

@@ -0,0 +1,224 @@
#pragma once

#include <ctime>
#include <string>
#include "auto_test/basic.h"
#include "auto_test/sample.h"
extern "C" {
#include "src/math/matmul.h"
#include <math.h>
}

namespace aitisa_api {

template <typename InterfaceType>
class MatmulTest : public ::testing::Test{
public:
MatmulTest():
input0(/*ndim1*/1, /*dims1*/{10}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/1, /*dims2*/{10}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input1(/*ndim1*/2, /*dims1*/{1995,2020}, /*dtype1=double*/9,
/*device1=cuda*/1, /*data1*/nullptr, /*len1*/0,
/*ndim2*/2, /*dims2*/{2020,2018}, /*dtype2=double*/9,
/*device2=cuda*/1, /*data2*/nullptr, /*len2*/0),
input2(/*ndim1*/1, /*dims1*/{10}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/2, /*dims2*/{10,5}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input3(/*ndim1*/2, /*dims1*/{10,5}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/1, /*dims2*/{5}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input4(/*ndim1*/1, /*dims1*/{3}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/5, /*dims2*/{2,2,4,3,2}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input5(/*ndim1*/5, /*dims1*/{2,2,4,2,3}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/1, /*dims2*/{3}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0),
input6(/*ndim1*/3, /*dims1*/{2,4,3}, /*dtype1=float*/8,
/*device1=cpu*/0, /*data1*/nullptr, /*len1*/0,
/*ndim2*/4, /*dims2*/{3,2,3,2}, /*dtype2=float*/8,
/*device2=cpu*/0, /*data2*/nullptr, /*len2*/0){
input[0] = &input0;
input[1] = &input1;
input[2] = &input2;
input[3] = &input3;
input[4] = &input4;
input[5] = &input5;
input[6] = &input6;
ninput = 7;
for(int i=0; i<ninput; i++){
unsigned int input_nelem1 = 1;
unsigned int input_nelem2 = 1;
for(unsigned int j=0; j<input[i]->ndim1(); j++){
input_nelem1 *= input[i]->dims1()[j];
}
for(unsigned int j=0; j<input[i]->ndim2(); j++){
input_nelem2 *= input[i]->dims2()[j];
}
unsigned int input_len1 = input_nelem1 * elem_size(input[i]->dtype1());
unsigned int input_len2 = input_nelem2 * elem_size(input[i]->dtype2());
void *input_data1 = (void*) new char[input_len1];
void *input_data2 = (void*) new char[input_len2];
if(i == 1){
random_assign(input_data1, input_len1, input[i]->dtype1());
random_assign(input_data2, input_len2, input[i]->dtype2());
}else{
natural_assign(input_data1, input_len1, input[i]->dtype1());
natural_assign(input_data2, input_len2, input[i]->dtype2());
}
input[i]->set_data1(input_data1, input_len1);
input[i]->set_data2(input_data2, input_len2);
}
}
virtual ~MatmulTest(){}
using InputType = Binary_Input;
using UserInterface = InterfaceType;
static void aitisa_kernel(AITISA_Tensor in1, AITISA_Tensor in2, AITISA_Tensor *out){
aitisa_matmul(in1, in2, out);
}
// inputs
Binary_Input input0; // Natural assigned float type input of CPU with dims1{10} and dims2{10}
Binary_Input input1; // Random assigned double type input of CUDA with dims1{1995,2020} and dims2{2020,2018}
Binary_Input input2; // Natural assigned float type input of CPU with dims1{10} and dims2{10,5}
Binary_Input input3; // Natural assigned float type input of CPU with dims1{10,5} and dims2{5}
Binary_Input input4; // Natural assigned float type input of CPU with dims1{3} and dims2{2,2,4,3,2}
Binary_Input input5; // Natural assigned float type input of CPU with dims1{2,2,4,2,3} and dims2{3}
Binary_Input input6; // Natural assigned float type input of CPU with dims1{2,4,3} and dims2{3,2,3,2}
Binary_Input *input[7] = {&input0, &input1, &input2, &input3, &input4, &input5, &input6};
std::string input0_name = "Natural Float CPU with Dims{10} and Dims{10}";
std::string input1_name = "Random Double CUDA with Dims{199,202} and Dims{202,201}";
std::string input2_name = "Natural Float CPU with Dims{10} and Dims{10,5}";
std::string input3_name = "Natural Float CPU with Dims{10,5} and Dims{5}";
std::string input4_name = "Natural Float CPU with Dims{3} and Dims{2,2,4,3,2}";
std::string input5_name = "Natural Float CPU with Dims{2,2,4,2,3} and Dims{3}";
std::string input6_name = "Natural Float CPU with Dims{2,4,3} and Dims{3,2,3,2}";
std::string *input_name[7] = {&input0_name, &input1_name, &input2_name, &input3_name,
&input4_name, &input5_name, &input6_name};
int ninput = 7;
};
TYPED_TEST_CASE_P(MatmulTest);

TYPED_TEST_P(MatmulTest, SevenTests){
using UserDataType = typename TestFixture::UserInterface::UserDataType;
using UserDevice = typename TestFixture::UserInterface::UserDevice;
using UserTensor = typename TestFixture::UserInterface::UserTensor;
using UserFuncs = typename TestFixture::UserInterface;
for(int i=0; i<this->ninput; i++){
std::clock_t aitisa_start, aitisa_end, user_start, user_end;
double aitisa_time, user_time;
int64_t aitisa_result_ndim, user_result_ndim;
int64_t *aitisa_result_dims=nullptr, *user_result_dims=nullptr;
float *aitisa_result_data=nullptr, *user_result_data=nullptr;
unsigned int aitisa_result_len, user_result_len;
AITISA_Tensor aitisa_tensor1, aitisa_tensor2, aitisa_result;
AITISA_DataType aitisa_result_dtype;
AITISA_Device aitisa_result_device;
UserTensor user_tensor1, user_tensor2, user_result;
UserDataType user_result_dtype;
UserDevice user_result_device;
// aitisa
AITISA_DataType aitisa_dtype1 = aitisa_int_to_dtype(this->input[i]->dtype1());
AITISA_DataType aitisa_dtype2 = aitisa_int_to_dtype(this->input[i]->dtype2());
AITISA_Device aitisa_device1 = aitisa_int_to_device(0); // cpu supoorted only
AITISA_Device aitisa_device2 = aitisa_int_to_device(0); // cpu supported only
aitisa_create(aitisa_dtype1, aitisa_device1, this->input[i]->dims1(), this->input[i]->ndim1(),
(void*)(this->input[i]->data1()), this->input[i]->len1(), &aitisa_tensor1);
aitisa_create(aitisa_dtype2, aitisa_device2, this->input[i]->dims2(), this->input[i]->ndim2(),
(void*)(this->input[i]->data2()), this->input[i]->len2(), &aitisa_tensor2);
aitisa_start = std::clock();
aitisa_matmul(aitisa_tensor1, aitisa_tensor2, &aitisa_result);
aitisa_end = std::clock();
aitisa_time = 1000.0 * (aitisa_end - aitisa_start) / static_cast<double>(CLOCKS_PER_SEC);
aitisa_resolve(aitisa_result, &aitisa_result_dtype, &aitisa_result_device, &aitisa_result_dims,
&aitisa_result_ndim, (void**)&aitisa_result_data, &aitisa_result_len);
//for debug, please delete it when it is done!
// // print_data2d((float*)this->input2.data1(), 3, 3);
// // print_data2d((float*)this->input2.data2(), 3, 3);
// // print_data2d(aitisa_result_data, 3, 3);
// // if(aitisa_result_dtype.code == TYPE_FLOAT) std::cout<<"dtype yes!"<<std::endl;
// // if(aitisa_result_device.type == DEVICE_CPU) std::cout<<"device yes!"<<std::endl;
// // if(aitisa_result_ndim == 2) std::cout<<"ndim yes!"<<std::endl;
// // if(aitisa_result_dims[0]==3 && aitisa_result_dims[1]==2) std::cout<<"ndim yes!"<<std::endl;
// // for(int64_t i=0; i<aitisa_tensor_size(aitisa_result); i++) std::cout<< aitisa_result_data[i] <<std::endl;
// user
UserDataType user_dtype1 = UserFuncs::user_int_to_dtype(this->input[i]->dtype1());
UserDataType user_dtype2 = UserFuncs::user_int_to_dtype(this->input[i]->dtype2());
UserDevice user_device1 = UserFuncs::user_int_to_device(this->input[i]->device1());
UserDevice user_device2 = UserFuncs::user_int_to_device(this->input[i]->device2());
UserFuncs::user_create(user_dtype1, user_device1, this->input[i]->dims1(),
this->input[i]->ndim1(), this->input[i]->data1(),
this->input[i]->len1(), &user_tensor1);
UserFuncs::user_create(user_dtype2, user_device2, this->input[i]->dims2(),
this->input[i]->ndim2(), this->input[i]->data2(),
this->input[i]->len2(), &user_tensor2);
user_start = std::clock();
UserFuncs::user_matmul(user_tensor1, user_tensor2, &user_result);
user_end = std::clock();
user_time = 1000.0 * (user_end - user_start) / static_cast<double>(CLOCKS_PER_SEC);
UserFuncs::user_resolve(user_result, &user_result_dtype, &user_result_device,
&user_result_dims, &user_result_ndim,
(void**)&user_result_data, &user_result_len);
// compare
int64_t tensor_size = 1;
ASSERT_EQ(aitisa_result_ndim, user_result_ndim);
if(i == 1){ // CUDA
ASSERT_EQ(
/*CUDA*/1, UserFuncs::user_device_to_int(user_result_device));
}else{ // CPU
ASSERT_EQ(aitisa_device_to_int(aitisa_result_device),
UserFuncs::user_device_to_int(user_result_device));
}
ASSERT_EQ(aitisa_dtype_to_int(aitisa_result_dtype),
UserFuncs::user_dtype_to_int(user_result_dtype));
for(int64_t j=0; j<aitisa_result_ndim; j++){
tensor_size *= aitisa_result_dims[j];
ASSERT_EQ(aitisa_result_dims[j], user_result_dims[j]);
}
ASSERT_EQ(aitisa_result_len, user_result_len);
if(i == 1){ // Double
// std::cout<< "ok1" << std::endl;
double *aitisa_data = (double*)aitisa_result_data;
double *user_data = (double*)user_result_data;
for(int64_t j=0; j<tensor_size; j++){
ASSERT_TRUE(abs(aitisa_data[j] - user_data[j]) < 1e-3);
}
// std::cout<< "ok2" << std::endl;
}else{ // Float
for(int64_t j=0; j<tensor_size; j++){
ASSERT_TRUE(abs(aitisa_result_data[j] - user_result_data[j]) < 1e-3);
}
}
// print result of test
std::cout<< /*GREEN <<*/ "[ Matmul sample"<< i << " / "
<< *(this->input_name[i]) << " ] " << /*RESET <<*/ std::endl;
std::cout<< /*GREEN <<*/ "\t[ AITISA ] " << /*RESET <<*/ aitisa_time << " ms" << std::endl;
std::cout<< /*GREEN <<*/ "\t[ USER ] " << /*RESET <<*/ user_time << " ms" << std::endl;
}
}
REGISTER_TYPED_TEST_CASE_P(MatmulTest, SevenTests);

Sample<Binary_Input> get_sample_matmul(int sample_num){
return get_binary_sample<MatmulTest<void>>(sample_num);
}

#define GET_SAMPLE_MATMUL(SAMPLE, NUM) \
aitisa_api::Sample<aitisa_api::Binary_Input> SAMPLE; \
aitisa_api::get_binary_sample<aitisa_api::MatmulTest<void>>(SAMPLE, NUM);

#define REGISTER_MATMUL(MATMUL_FUNC) \
class Matmul : public Basic { \
public: \
static void user_matmul(UserTensor tensor1, UserTensor tensor2, UserTensor* result){ \
MATMUL_FUNC(tensor1, tensor2, result); \
} \
}; \
namespace aitisa_api{ \
INSTANTIATE_TYPED_TEST_CASE_P(aitisa_api, MatmulTest, Matmul); \
}

} // namespace aitisa_api

+ 191
- 0
cmake/local/DownloadProject.cmake View File

@@ -0,0 +1,191 @@
# Distributed under the OSI-approved MIT License. See accompanying
# file LICENSE or https://github.com/Crascit/DownloadProject for details.
#
# MODULE: DownloadProject
#
# PROVIDES:
# download_project( PROJ projectName
# [PREFIX prefixDir]
# [DOWNLOAD_DIR downloadDir]
# [SOURCE_DIR srcDir]
# [BINARY_DIR binDir]
# [QUIET]
# ...
# )
#
# Provides the ability to download and unpack a tarball, zip file, git repository,
# etc. at configure time (i.e. when the cmake command is run). How the downloaded
# and unpacked contents are used is up to the caller, but the motivating case is
# to download source code which can then be included directly in the build with
# add_subdirectory() after the call to download_project(). Source and build
# directories are set up with this in mind.
#
# The PROJ argument is required. The projectName value will be used to construct
# the following variables upon exit (obviously replace projectName with its actual
# value):
#
# projectName_SOURCE_DIR
# projectName_BINARY_DIR
#
# The SOURCE_DIR and BINARY_DIR arguments are optional and would not typically
# need to be provided. They can be specified if you want the downloaded source
# and build directories to be located in a specific place. The contents of
# projectName_SOURCE_DIR and projectName_BINARY_DIR will be populated with the
# locations used whether you provide SOURCE_DIR/BINARY_DIR or not.
#
# The DOWNLOAD_DIR argument does not normally need to be set. It controls the
# location of the temporary CMake build used to perform the download.
#
# The PREFIX argument can be provided to change the base location of the default
# values of DOWNLOAD_DIR, SOURCE_DIR and BINARY_DIR. If all of those three arguments
# are provided, then PREFIX will have no effect. The default value for PREFIX is
# CMAKE_BINARY_DIR.
#
# The QUIET option can be given if you do not want to show the output associated
# with downloading the specified project.
#
# In addition to the above, any other options are passed through unmodified to
# ExternalProject_Add() to perform the actual download, patch and update steps.
# The following ExternalProject_Add() options are explicitly prohibited (they
# are reserved for use by the download_project() command):
#
# CONFIGURE_COMMAND
# BUILD_COMMAND
# INSTALL_COMMAND
# TEST_COMMAND
#
# Only those ExternalProject_Add() arguments which relate to downloading, patching
# and updating of the project sources are intended to be used. Also note that at
# least one set of download-related arguments are required.
#
# If using CMake 3.2 or later, the UPDATE_DISCONNECTED option can be used to
# prevent a check at the remote end for changes every time CMake is run
# after the first successful download. See the documentation of the ExternalProject
# module for more information. It is likely you will want to use this option if it
# is available to you. Note, however, that the ExternalProject implementation contains
# bugs which result in incorrect handling of the UPDATE_DISCONNECTED option when
# using the URL download method or when specifying a SOURCE_DIR with no download
# method. Fixes for these have been created, the last of which is scheduled for
# inclusion in CMake 3.8.0. Details can be found here:
#
# https://gitlab.kitware.com/cmake/cmake/commit/bdca68388bd57f8302d3c1d83d691034b7ffa70c
# https://gitlab.kitware.com/cmake/cmake/issues/16428
#
# If you experience build errors related to the update step, consider avoiding
# the use of UPDATE_DISCONNECTED.
#
# EXAMPLE USAGE:
#
# include(DownloadProject)
# download_project(PROJ googletest
# GIT_REPOSITORY https://github.com/google/googletest.git
# GIT_TAG master
# UPDATE_DISCONNECTED 1
# QUIET
# )
#
# add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
#
#========================================================================================


set(_DownloadProjectDir "${CMAKE_CURRENT_LIST_DIR}")

include(CMakeParseArguments)

function(download_project)

set(options QUIET)
set(oneValueArgs
PROJ
PREFIX
DOWNLOAD_DIR
SOURCE_DIR
BINARY_DIR
TMP_DIR
STAMP_DIR
# Prevent the following from being passed through
CONFIGURE_COMMAND
BUILD_COMMAND
INSTALL_COMMAND
TEST_COMMAND
)
set(multiValueArgs "")

cmake_parse_arguments(DL_ARGS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

# Hide output if requested
if (DL_ARGS_QUIET)
set(OUTPUT_QUIET "OUTPUT_QUIET")
else()
unset(OUTPUT_QUIET)
message(STATUS "Downloading/updating ${DL_ARGS_PROJ}")
endif()

# Set up where we will put our temporary CMakeLists.txt file and also
# the base point below which the default source and binary dirs will be.
# The prefix must always be an absolute path.
if (NOT DL_ARGS_PREFIX)
set(DL_ARGS_PREFIX "${CMAKE_BINARY_DIR}")
else()
get_filename_component(DL_ARGS_PREFIX "${DL_ARGS_PREFIX}" ABSOLUTE
BASE_DIR "${CMAKE_CURRENT_BINARY_DIR}")
endif()
if (NOT DL_ARGS_DOWNLOAD_DIR)
set(DL_ARGS_DOWNLOAD_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-download")
endif()

# Ensure the caller can know where to find the source and build directories
if (NOT DL_ARGS_SOURCE_DIR)
set(DL_ARGS_SOURCE_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-src")
endif()
if (NOT DL_ARGS_BINARY_DIR)
set(DL_ARGS_BINARY_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-build")
endif()
# Ensure the caller can know where to find the tmp and stamp directories
if (NOT DL_ARGS_TMP_DIR)
set(DL_ARGS_TMP_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-tmp")
endif()
if (NOT DL_ARGS_STAMP_DIR)
set(DL_ARGS_STAMP_DIR "${DL_ARGS_PREFIX}/${DL_ARGS_PROJ}-stamp")
endif()
set(${DL_ARGS_PROJ}_SOURCE_DIR "${DL_ARGS_SOURCE_DIR}" PARENT_SCOPE)
set(${DL_ARGS_PROJ}_BINARY_DIR "${DL_ARGS_BINARY_DIR}" PARENT_SCOPE)

# The way that CLion manages multiple configurations, it causes a copy of
# the CMakeCache.txt to be copied across due to it not expecting there to
# be a project within a project. This causes the hard-coded paths in the
# cache to be copied and builds to fail. To mitigate this, we simply
# remove the cache if it exists before we configure the new project. It
# is safe to do so because it will be re-generated. Since this is only
# executed at the configure step, it should not cause additional builds or
# downloads.
file(REMOVE "${DL_ARGS_DOWNLOAD_DIR}/CMakeCache.txt")

# Create and build a separate CMake project to carry out the download.
# If we've already previously done these steps, they will not cause
# anything to be updated, so extra rebuilds of the project won't occur.
# Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project
# has this set to something not findable on the PATH.
configure_file("${_DownloadProjectDir}/DownloadProject.cmake.in"
"${DL_ARGS_DOWNLOAD_DIR}/CMakeLists.txt")
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
-D "CMAKE_MAKE_PROGRAM:FILE=${CMAKE_MAKE_PROGRAM}"
.
RESULT_VARIABLE result
${OUTPUT_QUIET}
WORKING_DIRECTORY "${DL_ARGS_DOWNLOAD_DIR}"
)
if(result)
message(FATAL_ERROR "CMake step for ${DL_ARGS_PROJ} failed: ${result}")
endif()
execute_process(COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
${OUTPUT_QUIET}
WORKING_DIRECTORY "${DL_ARGS_DOWNLOAD_DIR}"
)
if(result)
message(FATAL_ERROR "Build step for ${DL_ARGS_PROJ} failed: ${result}")
endif()

endfunction()

+ 21
- 0
cmake/local/DownloadProject.cmake.in View File

@@ -0,0 +1,21 @@
# Distributed under the OSI-approved MIT License. See accompanying
# file LICENSE or https://github.com/Crascit/DownloadProject for details.

cmake_minimum_required(VERSION 2.8.2)

project(${DL_ARGS_PROJ}-download NONE)

include(ExternalProject)
ExternalProject_Add(${DL_ARGS_PROJ}-download
${DL_ARGS_UNPARSED_ARGUMENTS}
PREFIX "${DL_ARGS_PREFIX}"
DOWNLOAD_DIR "${DL_ARGS_SOURCE_DIR}"
SOURCE_DIR "${DL_ARGS_SOURCE_DIR}"
BINARY_DIR "${DL_ARGS_BINARY_DIR}"
TMP_DIR "${DL_ARGS_TMP_DIR}"
STAMP_DIR "${DL_ARGS_STAMP_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)

+ 36
- 0
cmake/local/GTest.cmake View File

@@ -0,0 +1,36 @@
set(_external_target_name gtest)

if (CMAKE_VERSION VERSION_LESS 3.2)
set(UPDATE_DISCONNECTED_IF_AVAILABLE "")
else()
set(UPDATE_DISCONNECTED_IF_AVAILABLE "UPDATE_DISCONNECTED 1")
endif()

include(cmake/local/DownloadProject.cmake)
download_project(PROJ ${_external_target_name}
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.8.x
GIT_PROGRESS TRUE
${UPDATE_DISCONNECTED_IF_AVAILABLE}
PREFIX "${AITISA_API_EXTERNAL_DIR}/${_external_target_name}"
)


#set(BUILD_GMOCK OFF CACHE BOOL "" FORCE)
# Prevent overriding the parent project's compiler/linker
# settings on Windows
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)

# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
if(NOT TARGET gtest_main)
add_subdirectory(
${${_external_target_name}_SOURCE_DIR}
${${_external_target_name}_BINARY_DIR}
EXCLUDE_FROM_ALL)
endif()

add_library(aitisa_api::gtest ALIAS gtest_main)
add_library(aitisa_api::gmock ALIAS gmock_main)

unset(_external_target_name)

+ 35
- 0
cmake/public/Doxygen.cmake View File

@@ -0,0 +1,35 @@
##############################################################################
# Locates Doxygen and configures documentation generation
##############################################################################

if(aitisa_api_public_doxygen_cmake_included)
return()
endif()
set(aitisa_api_public_doxygen_cmake_included true)

set(aitisa_api_public_doxygen_cmake_included true)

find_package(Doxygen)
if(DOXYGEN_FOUND)
set(DOXYGEN_OUTPUT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/docs)
set(DOXYGEN_STAMP_FILE ${CMAKE_CURRENT_BINARY_DIR}/docs.stamp)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
@ONLY)
file(GLOB_RECURSE HEADERS ${PROJECT_SOURCE_DIR}/src/*.h)
add_custom_command(
OUTPUT ${DOXYGEN_STAMP_FILE}
DEPENDS ${HEADERS}
COMMAND ${DOXYGEN_EXECUTABLE} Doxyfile
COMMAND ${CMAKE_COMMAND} -E touch ${DOXYGEN_STAMP_FILE}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen" VERBATIM)
add_custom_target(docs DEPENDS ${DOXYGEN_STAMP_FILE})

if(NOT AITISA_API_INSTALL_MODE STREQUAL "BUNDLE")
install(
DIRECTORY ${DOXYGEN_OUTPUT_DIR}
DESTINATION share/doc/${LIB_NAME} OPTIONAL)
endif()
endif(DOXYGEN_FOUND)

+ 15
- 0
cmake/public/GTest.cmake View File

@@ -0,0 +1,15 @@
if(aitisa_api_public_gtest_cmake_included)
return()
endif()
set(aitisa_api_public_gtest_cmake_included true)

aitisa_api_update_option(GTEST_ROOT ${GTEST_ROOT_DIR})

find_package(GTEST REQUIRED)
add_library(aitisa_api::gtest INTERFACE IMPORTED)
set_property(
TARGET aitisa_api::gtest PROPERTY INTERFACE_INCLUDE_DIRECTORIES
${GTEST_INCLUDE_DIRS})
set_property(
TARGET aitisa_api::gtest PROPERTY INTERFACE_LINK_LIBRARIES
${GTEST_BOTH_LIBRARIES})

+ 197
- 0
cmake/public/Utils.cmake View File

@@ -0,0 +1,197 @@
if(aitisa_api_public_utils_cmake_included)
return()
endif()
set(aitisa_api_public_utils_cmake_included true)

##############################################################################
# Macro to update cached options.
##############################################################################
macro (aitisa_api_update_option variable value)
get_property(_help_string CACHE ${variable} PROPERTY HELPSTRING)
set(${variable} ${value} CACHE BOOL ${_help_string} FORCE)
endmacro()

##############################################################################
# Fucntion to include some items from list
##############################################################################
function(exclude output input)
set(excludes ${ARGN})
foreach(exclude ${excludes})
list(REMOVE_ITEM input "${exclude}")
endforeach()
set(${output} ${input} PARENT_SCOPE)
endfunction(exclude)

##############################################################################
# This function works around a CMake issue with setting include directories of
# imported libraries built with `ExternalProject_Add`.
# https://gitlab.kitware.com/cmake/cmake/issues/15052
##############################################################################
function(aitisa_api_make_include_dir target)
get_target_property(DIR ${target} INTERFACE_INCLUDE_DIRECTORIES)
file(MAKE_DIRECTORY ${DIR})
endfunction()

##############################################################################
# This function encodes relative rpath for exectuables built
# or installed with HICE
##############################################################################
function(aitisa_api_encode_rpath target rel_intall_libdir)
if(APPLE)
set(_rpath_portable_origin "@loader_path")
else()
set(_rpath_portable_origin "\$ORIGIN")
endif()
file(TO_NATIVE_PATH "${_rpath_portable_origin}/../${rel_install_libdir}"
portable_rpath)
set_target_properties(target
PROPERTIES
MACOSX_RPATH ON
SKIP_BUILD_RPATH OFF
BUILD_WITH_INSTALL_RPATH OFF
INSTALL_RPATH_USE_LINK_PATH ON
INSTALL_RPATH "${portable_rpath}"
)
endfunction()

##############################################################################
# Common configuration for tests / test cases on Windows
##############################################################################
function(aitisa_api_maybe_configure_windows_test name kind)
if(WIN32 OR MINGW)
string(REPLACE ";" "\;" PATH "${CTESTCONFIG_PATH};$ENV{PATH}")
set_property(${kind} ${name} PROPERTY ENVIRONMENT "PATH=${PATH}")
configure_file(${PROJECT_SOURCE_DIR}/cmake/template.vcxproj.user
${name}.vcxproj.user @ONLY)
endif()
endfunction()

##############################################################################
# Register new executable/test
# name -- name of the executable
# srcs -- list of source, if many must be enclosed with ""
# test -- "test" to mark executable as a test, "" otherwise
# arg4 -- (optional) list of extra library dependencies
##############################################################################
function(aitisa_api_register_exe name srcs test)
add_executable(${name} ${srcs})
target_link_libraries(${name} ${ARGN})
if("${test}" STREQUAL "test")
add_test(NAME ${name} COMMAND $<TARGET_FILE:${name}>)
aitisa_api_maybe_configure_windows_test(${name} TEST)
endif()
endfunction()