From 91cf419669e309be235f129312c30b7f09f5d1de Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 29 Oct 2017 11:57:37 +0800 Subject: [PATCH 01/80] context draft --- context.hpp | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 context.hpp diff --git a/context.hpp b/context.hpp new file mode 100644 index 00000000..30b75f1a --- /dev/null +++ b/context.hpp @@ -0,0 +1,168 @@ +#ifndef UTENSOR_CTX_H +#define UTENSOR_CTX_H + +#include +#include +#include "tensor.hpp" + +typedef long long TensorPtr; + +class uTensor { + virtual void init(Context ctx) {}; + virtual void inFocus() {}; + virtual void deFocus() {}; + virtual void finalize() {}; + virtual ~uTensor() = 0; +} + +//isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not +//double dispatch + +//new vs stack +class Operator : uTensor { +protected: + //setup input/output info in derived constructors + vector inputs; + vector dtype_in; + vector outputs; + vector dtype_out; +public: + virtual void compute() = 0; + + void setInputs(vector &_inputs) { + if(_inputs.size() != inputs.size()) ERR_EXIT("Input Tensor list mismatched..."); + + for(uint8_t i = 0; i < input.size(); i++) { + if(dtype_in[i] == inputs.getType()) { + input[i] = _inputs[i]; + } else { + ERR_EXIT("Tensor Type mismatched..."); + } + } + } + + void setOutputs(vector &_outputs) { + if(_outputs.size() != outputs.size()) ERR_EXIT("Input Tensor list mismatched..."); + + for(uint8_t i = 0; i < output.size(); i++) { + output[i] = _output[i] + if(_output[i] == nullptr) continue; + if(dtype_out[i].getType() != output[i].getType()) ERR_EXIT("Tensor Type mismatched..."); + } + } + + vector getInputs(void) { + return inputs; + } + + vector getOutputs(void) { + return outputs; + } +}; + +//TODO: how do we deal with dangling tensors? +// only allow pushing for exact number of inputs +// output reference count are initialized to 0, incremented only on input-push +// outputs are allocated in ops +// output lists can contain nullptr/empty-tensors +// tensors can be all pointers here, but destructors has to set data to nullptr +// push(op, input_t_list, output_t_list) or push(op, init-list, init-list) +// TensorListModifierOp +class Context : uTensor { +protected: + vector op_list; + std::unordered_map tensor_refs; + uint8_t tmp_input_count; + uint8_t tmp_output_count; + vector tmp_input_list; + vector tmp_output_list; + + void runOp(Operator &op); + void initOpTensors(Operator &op); + void deinitOpTensors(Operator &op); + void injectOp(void); + +public: + Context() { + tmp_input_count = 0; + tmp_output_count = 0; + } + void addOp(Operator op); + void addInputs(Operator op); + void push(void); + vector Context::getOutputs(void); + int run(void); +}; + + +void Context::addOp(Operator &op) { + if(tmp_input_count != 0) { + ERR_EXIT("valid number of inputs\r\n"); + } + if(tmp_output_count != 0) { + ERR_EXIT("valid number of outputs\r\n"); + } + + op_list.push_back(op); + tmp_input_count = op.getInputCount(); + tmp_output_count = op.getOutputCount(); +} + +void Context::addInputs(vector t_list) { + int tmp_input_count = tmp_input_count - t_list.size(); + if(tmp_input_count < 0) ERR_EXIT("supplied too many inputs"); + tmp_input_list.insert(tmp_input_list.end(), t_list.begin(), t_list.end()); + + for(auto t:t_list) { + auto ref_count = tensor_refs.find(t); + if(ref_count == tensor_refs.end()) { + tensor_refs[t] = 1; + } else { + tensor_refs[t]++; + } + } + +} + +void Context::push(void) { + if(tmp_input_count != 0 && + tmp_output_count != 0) { + ERR_EXIT("valid number of inputs/outputs\r\n"); + } + + auto op = op_list.back(); + op.setInputs(tmp_input_list); + op.setOutputs(tmp_output_list); + + tmp_input_list.empty(); + tmp_output_list.empty(); + tmp_input_count = 0; + tmp_output_count = 0; +} + +vector Context::getOutputs(void) { + +} + +void Context::runOp(Operator &op) { + +} + +int Context::run(void) { + tensorCleanup(); + + for(auto op:op_list) { + initOpTensors(op.getInputs()); + initOpTensors(op.getOutputs()); + + runOp(op); + + deinitOpTensors(op.getInputs()); + deinitOpTensors(op.getOutputs()); + + decreRefCount(op.getInputs()); + tensorCleanup(); + } +} + +#endif // UTENSOR_CTX_H From e2aaf988ec2e55bb0933e7d5da4e36a859ad8d51 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 29 Oct 2017 15:38:55 +0800 Subject: [PATCH 02/80] re-vised draft --- context.hpp | 93 ++++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/context.hpp b/context.hpp index 30b75f1a..b10be564 100644 --- a/context.hpp +++ b/context.hpp @@ -72,47 +72,42 @@ class Context : uTensor { protected: vector op_list; std::unordered_map tensor_refs; - uint8_t tmp_input_count; - uint8_t tmp_output_count; - vector tmp_input_list; - vector tmp_output_list; - void runOp(Operator &op); - void initOpTensors(Operator &op); - void deinitOpTensors(Operator &op); - void injectOp(void); + void initOpTensors(vector &t_list); + void deinitTensors(vector &t_list); + void registerInputTensors(vector &t_list); + void registerOutputTensors(vector &t_list); + void decreRefCount(vector &t_list); + + //void unref2nullTensors(vector &t_list); + //replace non-referenced output to null-tensors public: Context() { tmp_input_count = 0; tmp_output_count = 0; } - void addOp(Operator op); - void addInputs(Operator op); - void push(void); - vector Context::getOutputs(void); + + void push(Operator op, vector _inputs, vector _outputs); int run(void); }; - -void Context::addOp(Operator &op) { - if(tmp_input_count != 0) { +void push(Operator op, vector _inputs, vector _outputs) { + if(op.getInputCount() != _inputs.size()) { ERR_EXIT("valid number of inputs\r\n"); } - if(tmp_output_count != 0) { - ERR_EXIT("valid number of outputs\r\n"); + if(op.getOutputCount() != _outputs.size()) { + ERR_EXIT("valid number of output\r\n"); } op_list.push_back(op); - tmp_input_count = op.getInputCount(); - tmp_output_count = op.getOutputCount(); + registerInputTensors(_inputs); + registerOutputTensors(_outputs); + } -void Context::addInputs(vector t_list) { - int tmp_input_count = tmp_input_count - t_list.size(); - if(tmp_input_count < 0) ERR_EXIT("supplied too many inputs"); - tmp_input_list.insert(tmp_input_list.end(), t_list.begin(), t_list.end()); +void Context::registerInputTensors(vector &t_list) { for(auto t:t_list) { auto ref_count = tensor_refs.find(t); if(ref_count == tensor_refs.end()) { @@ -121,47 +116,59 @@ void Context::addInputs(vector t_list) { tensor_refs[t]++; } } - } -void Context::push(void) { - if(tmp_input_count != 0 && - tmp_output_count != 0) { - ERR_EXIT("valid number of inputs/outputs\r\n"); +void Context::registerOutputTensors(vector &t_list) { + for(auto t:t_list) { + auto ref_count = tensor_refs.find(t); + if(ref_count == tensor_refs.end()) { + tensor_refs[t] = 0; + } } +} - auto op = op_list.back(); - op.setInputs(tmp_input_list); - op.setOutputs(tmp_output_list); - tmp_input_list.empty(); - tmp_output_list.empty(); - tmp_input_count = 0; - tmp_output_count = 0; +void Context::initOpTensors(vector &t_list) { + for(auto t:t_list) { + t.inFocus(); + } } -vector Context::getOutputs(void) { - +void Context::deinitTensors(vector &t_list) { + for(auto t:t_list) { + t.deFocus(); + } } -void Context::runOp(Operator &op) { +void Context::deinitTensors(vector &t_list) { + for(auto t:t_list) { + t.deFocus(); + } +} +void Context::decreRefCount(vector &t_list) { + for(auto t:t_list) { + tensor_refs[t] = tensor_refs[t] - 1; + if(tensor_refs[t] < 1) { + t.~Tensor(); + } } int Context::run(void) { - tensorCleanup(); + //unref2nullTensors(); for(auto op:op_list) { - initOpTensors(op.getInputs()); - initOpTensors(op.getOutputs()); + initTensors(op.getInputs()); + initTensors(op.getOutputs()); - runOp(op); + op.init(); + op.compute(); + op.deinit(); deinitOpTensors(op.getInputs()); deinitOpTensors(op.getOutputs()); decreRefCount(op.getInputs()); - tensorCleanup(); } } From 7b2a5f978d866e26aa1490bbafc114831b4a2ce6 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 29 Oct 2017 17:57:29 +0800 Subject: [PATCH 03/80] wip --- context.hpp | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/context.hpp b/context.hpp index b10be564..f772691b 100644 --- a/context.hpp +++ b/context.hpp @@ -6,56 +6,57 @@ #include "tensor.hpp" typedef long long TensorPtr; +typedef vector TList; class uTensor { - virtual void init(Context ctx) {}; virtual void inFocus() {}; virtual void deFocus() {}; - virtual void finalize() {}; virtual ~uTensor() = 0; -} +}; //isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not //double dispatch //new vs stack -class Operator : uTensor { +class Operator { protected: //setup input/output info in derived constructors - vector inputs; + TList inputs; vector dtype_in; - vector outputs; + TList outputs; vector dtype_out; public: virtual void compute() = 0; - void setInputs(vector &_inputs) { + void setInputs(TList &_inputs) { if(_inputs.size() != inputs.size()) ERR_EXIT("Input Tensor list mismatched..."); for(uint8_t i = 0; i < input.size(); i++) { - if(dtype_in[i] == inputs.getType()) { - input[i] = _inputs[i]; - } else { + if(dtype_in[i] != inputs.getType()) { ERR_EXIT("Tensor Type mismatched..."); } + + input[i] = _inputs[i]; } } - void setOutputs(vector &_outputs) { + void setOutputs(TList &_outputs) { if(_outputs.size() != outputs.size()) ERR_EXIT("Input Tensor list mismatched..."); for(uint8_t i = 0; i < output.size(); i++) { + if(dtype_out[i].getType() != output[i].getType()) { + ERR_EXIT("Tensor Type mismatched..."); + } + output[i] = _output[i] - if(_output[i] == nullptr) continue; - if(dtype_out[i].getType() != output[i].getType()) ERR_EXIT("Tensor Type mismatched..."); } } - vector getInputs(void) { + TList getInputs(void) { return inputs; } - vector getOutputs(void) { + TList getOutputs(void) { return outputs; } }; @@ -83,16 +84,11 @@ class Context : uTensor { //replace non-referenced output to null-tensors public: - Context() { - tmp_input_count = 0; - tmp_output_count = 0; - } - - void push(Operator op, vector _inputs, vector _outputs); + void push(Operator op, TList _inputs, TList _outputs); int run(void); }; -void push(Operator op, vector _inputs, vector _outputs) { +void push(Operator op, TList _inputs, TList _outputs) { if(op.getInputCount() != _inputs.size()) { ERR_EXIT("valid number of inputs\r\n"); } @@ -107,7 +103,7 @@ void push(Operator op, vector _inputs, vector _outputs } -void Context::registerInputTensors(vector &t_list) { +void Context::registerInputTensors(TList &t_list) { for(auto t:t_list) { auto ref_count = tensor_refs.find(t); if(ref_count == tensor_refs.end()) { @@ -118,7 +114,7 @@ void Context::registerInputTensors(vector &t_list) { } } -void Context::registerOutputTensors(vector &t_list) { +void Context::registerOutputTensors(TList &t_list) { for(auto t:t_list) { auto ref_count = tensor_refs.find(t); if(ref_count == tensor_refs.end()) { From a65a8a9b7d2dfe45a6d3008bf85de68ddcebe0eb Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 29 Oct 2017 18:01:36 +0800 Subject: [PATCH 04/80] tensor extend first commit 1. extend different type tensor for sd, memory 2. inherit super class for polymorphism --- tensor.hpp | 65 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index bebc1e8b..316bb969 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -3,15 +3,20 @@ #include #include -#include +#include "uTensor_util.hpp" #include -#include "mbed.h" #include "stdlib.h" +#include + +class Object { + virtual void initialize() = 0; + virtual void deinitialize() = 0; +}; template class TensorBase { public: - vector shape; + std::vector shape; U* data; uint32_t total_size; @@ -24,11 +29,11 @@ class TensorBase { }; template -class Tensor { +class Tensor : Object { std::shared_ptr> s; // short for states - void init(vector& v) { - s = std::make_shared>(TensorBase()); + void init(std::vector& v) { + s = std::make_shared>(); s->total_size = 0; for (auto i : v) { @@ -47,13 +52,13 @@ class Tensor { public: Tensor(void) { - s = std::make_shared>(TensorBase()); + s = std::make_shared>(); s->total_size = 0; s->data = nullptr; } - Tensor(initializer_list l) { - vector v; + Tensor(std::initializer_list l) { + std::vector v; for (auto i : l) { v.push_back(i); } @@ -61,7 +66,7 @@ class Tensor { init(v); } - Tensor(vector v) { init(v); } + Tensor(std::vector v) { init(v); } // returns how far a given dimension is apart size_t getStride(size_t dim_index) { @@ -78,7 +83,7 @@ class Tensor { // POST: When a degenerative index is supplied, the pointer // lowest specified dimension is returned. // Otherwise, return the pointer to the specific element. - T* getPointer(initializer_list l) { + T* getPointer(std::initializer_list l) { size_t p_offset = 0; signed short current_dim = 0; for (auto i : l) { @@ -90,7 +95,7 @@ class Tensor { return s->data + p_offset; } - T* getPointer(vector v) { + T* getPointer(std::vector v) { size_t p_offset = 0; signed short current_dim = 0; for (auto i : v) { @@ -103,7 +108,7 @@ class Tensor { return s->data + p_offset; } - vector getShape(void) { return s->shape; } + std::vector getShape(void) { return s->shape; } uint32_t getSize(void) { return s->total_size; } @@ -118,8 +123,26 @@ class Tensor { s = nullptr; DEBUG("Tensor Destructed\r\n"); } + virtual T* read(size_t offset, size_t ele) {} + virtual T* write(size_t offset, size_t ele) {} }; +template +class RamTensor : public Tensor { + //need deep copy + public: + RamTensor() : Tensor() { std::cout << "ramtensor " << std::endl; + cursor = nullptr;} + virtual T* read(size_t offset, size_t ele) override { + T* ptr = cursor + offset; + return ptr; + }; + virtual T* write(size_t offset, size_t ele) override {}; + virtual void initialize() override {}; + virtual void deinitialize() override {}; + private: + T* cursor; +}; template Tensor TensorCast(Tensor input) { Tensor output(input.getShape()); @@ -134,7 +157,7 @@ Tensor TensorCast(Tensor input) { } template -Tensor TensorConstant(vector shape, T c) { +Tensor TensorConstant(std::vector shape, T c) { Tensor output(shape); T* outPrt = output.getPointer({}); @@ -146,8 +169,8 @@ Tensor TensorConstant(vector shape, T c) { } template -Tensor TensorConstant(initializer_list l, T c) { - vector v; +Tensor TensorConstant(std::initializer_list l, T c) { + std::vector v; for (auto i : l) { v.push_back(i); } @@ -163,8 +186,8 @@ Tensor TensorConstant(initializer_list l, T c) { class permuteIndexTransform { private: - vector permute; - vector depermute; + std::vector permute; + std::vector depermute; Shape in_shape; Shape in_stride; Shape out_shape; @@ -204,14 +227,14 @@ class permuteIndexTransform { } public: - permuteIndexTransform(Shape input_shape, vector permute) { + permuteIndexTransform(Shape input_shape, std::vector permute) { setInputShape(input_shape); setPermute(permute); apply(); } - vector getPermute(void) { return permute; } - void setPermute(vector& _permute) { + std::vector getPermute(void) { return permute; } + void setPermute(std::vector& _permute) { permute = _permute; depermute.resize(permute.size()); uint8_t i = 0; From d219be5796a4268b24b988c2359ea812abab2aa8 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 29 Oct 2017 18:03:44 +0800 Subject: [PATCH 05/80] draft for merge --- context.hpp | 58 -------------------------------------------- uTensorBase.hpp | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 58 deletions(-) create mode 100644 uTensorBase.hpp diff --git a/context.hpp b/context.hpp index f772691b..8ccd71f4 100644 --- a/context.hpp +++ b/context.hpp @@ -2,64 +2,6 @@ #define UTENSOR_CTX_H #include -#include -#include "tensor.hpp" - -typedef long long TensorPtr; -typedef vector TList; - -class uTensor { - virtual void inFocus() {}; - virtual void deFocus() {}; - virtual ~uTensor() = 0; -}; - -//isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not -//double dispatch - -//new vs stack -class Operator { -protected: - //setup input/output info in derived constructors - TList inputs; - vector dtype_in; - TList outputs; - vector dtype_out; -public: - virtual void compute() = 0; - - void setInputs(TList &_inputs) { - if(_inputs.size() != inputs.size()) ERR_EXIT("Input Tensor list mismatched..."); - - for(uint8_t i = 0; i < input.size(); i++) { - if(dtype_in[i] != inputs.getType()) { - ERR_EXIT("Tensor Type mismatched..."); - } - - input[i] = _inputs[i]; - } - } - - void setOutputs(TList &_outputs) { - if(_outputs.size() != outputs.size()) ERR_EXIT("Input Tensor list mismatched..."); - - for(uint8_t i = 0; i < output.size(); i++) { - if(dtype_out[i].getType() != output[i].getType()) { - ERR_EXIT("Tensor Type mismatched..."); - } - - output[i] = _output[i] - } - } - - TList getInputs(void) { - return inputs; - } - - TList getOutputs(void) { - return outputs; - } -}; //TODO: how do we deal with dangling tensors? // only allow pushing for exact number of inputs diff --git a/uTensorBase.hpp b/uTensorBase.hpp new file mode 100644 index 00000000..e3d437f8 --- /dev/null +++ b/uTensorBase.hpp @@ -0,0 +1,64 @@ +#ifndef UTENSOR_BASE_H +#define UTENSOR_BASE_H + +#include "tensor.hpp" + +typedef long long TensorPtr; +typedef vector TList; + +class uTensor { + virtual void inFocus() {}; + virtual void deFocus() {}; + virtual ~uTensor() = 0; +}; + + +//isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not +//double dispatch + +//new vs stack +class Operator { +protected: + //setup input/output info in derived constructors + TList inputs; + vector dtype_in; + TList outputs; + vector dtype_out; +public: + virtual void compute() = 0; + + void setInputs(TList &_inputs) { + if(_inputs.size() != inputs.size()) ERR_EXIT("Input Tensor list mismatched..."); + + for(uint8_t i = 0; i < input.size(); i++) { + if(dtype_in[i] != inputs.getType()) { + ERR_EXIT("Tensor Type mismatched..."); + } + + input[i] = _inputs[i]; + } + } + + void setOutputs(TList &_outputs) { + if(_outputs.size() != outputs.size()) ERR_EXIT("Input Tensor list mismatched..."); + + for(uint8_t i = 0; i < output.size(); i++) { + if(dtype_out[i].getType() != output[i].getType()) { + ERR_EXIT("Tensor Type mismatched..."); + } + + output[i] = _output[i] + } + } + + TList getInputs(void) { + return inputs; + } + + TList getOutputs(void) { + return outputs; + } +}; + + +#endif //UTENSOR_BASE_H From c09b38ee46f738b30de33b1a46d47784481157fc Mon Sep 17 00:00:00 2001 From: kazami Date: Tue, 31 Oct 2017 18:08:13 +0800 Subject: [PATCH 06/80] Draft for tensor refactor 1. test idea quickly 2. sync idea 3. take type from tensor 4. make type system in ramtensor --- main.cpp | 11 +++--- tensor.hpp | 101 +++++++++++++++++++++++++++++++---------------------- 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/main.cpp b/main.cpp index 76ca2821..f2a4b4ca 100644 --- a/main.cpp +++ b/main.cpp @@ -3,7 +3,9 @@ #include "SDBlockDevice.h" #include "mbed.h" #include "stdio.h" -#include "deep_mnist_mlp.hpp" +#include "uTensor_util.hpp" +#include "tensor.hpp" +//#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -17,9 +19,10 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); - int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - printf("prediction: %d\r\n", prediction); - + // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + // printf("prediction: %d\r\n", prediction); + Tensor *a = new RamTensor(); + delete a; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensor.hpp b/tensor.hpp index 316bb969..bcb35fae 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -8,53 +8,43 @@ #include "stdlib.h" #include -class Object { - virtual void initialize() = 0; - virtual void deinitialize() = 0; + +class uTensor { + virtual void inFocus() {}; + virtual void deFocus() {}; + public: + virtual ~uTensor() = 0; }; -template +uTensor::~uTensor() { +} class TensorBase { public: std::vector shape; - U* data; + void* data; uint32_t total_size; ~TensorBase() { if(data != nullptr) { + std::cout << "i am tensorbase destructor " << std::endl; free(data); DEBUG("TensorBase memory freed..\r\n"); } } }; -template -class Tensor : Object { - std::shared_ptr> s; // short for states - - void init(std::vector& v) { - s = std::make_shared>(); - s->total_size = 0; - - for (auto i : v) { - s->shape.push_back(i); - // total_size = (total_size == 0)? i : total_size *= i; - if (s->total_size == 0) { - s->total_size = i; - } else { - s->total_size *= i; - } - } +class Tensor : uTensor { + std::shared_ptr s; // short for states - s->data = (T*)malloc(unit_size() * s->total_size); - if(s->data == NULL) ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); - } + virtual void* read(size_t offset, size_t ele) { return nullptr;} + virtual void* write(size_t offset, size_t ele) { return nullptr; } public: Tensor(void) { - s = std::make_shared>(); - s->total_size = 0; - s->data = nullptr; + std::cout << "tensor constructor " << std::endl; + //s = std::make_shared(); + //s->total_size = 0; + //s->data = nullptr; } Tensor(std::initializer_list l) { @@ -63,10 +53,10 @@ class Tensor : Object { v.push_back(i); } - init(v); +// init(v); } - Tensor(std::vector v) { init(v); } + // Tensor(std::vector v) { init(v); } // returns how far a given dimension is apart size_t getStride(size_t dim_index) { @@ -78,7 +68,26 @@ class Tensor : Object { return (size_t)size_accm; } + template + void init(std::vector& v) { + std::cout << "initialize with type" << std::endl; + s = std::make_shared(); + s->total_size = 0; + + for (auto i : v) { + s->shape.push_back(i); + // total_size = (total_size == 0)? i : total_size *= i; + if (s->total_size == 0) { + s->total_size = i; + } else { + s->total_size *= i; + } + } + s->data = (void *)malloc(unit_size() * s->total_size); + if(s->data == NULL) ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); + } +/* // PRE: l, initization list, specifying the element/dimension // POST: When a degenerative index is supplied, the pointer // lowest specified dimension is returned. @@ -106,44 +115,52 @@ class Tensor : Object { printf("p_offset: %d\r\n", p_offset); return s->data + p_offset; - } + }*/ std::vector getShape(void) { return s->shape; } uint32_t getSize(void) { return s->total_size; } - uint16_t unit_size(void) { return sizeof(T); } + virtual uint16_t unit_size(void) {} uint32_t getSize_in_bytes(void) { return s->total_size * unit_size(); } // returns the number of dimensions in the tensor size_t getDim(void) { return s->shape.size(); } + + template + T* read(size_t offset, size_t ele) { + return (T*)read(offset, ele); + } ~Tensor() { s = nullptr; + std::cout << "i am tensor destructor " << std::endl; DEBUG("Tensor Destructed\r\n"); } - virtual T* read(size_t offset, size_t ele) {} - virtual T* write(size_t offset, size_t ele) {} }; template -class RamTensor : public Tensor { +class RamTensor : public Tensor { //need deep copy public: - RamTensor() : Tensor() { std::cout << "ramtensor " << std::endl; + RamTensor() : Tensor() { + std::cout << "ramtensor " << std::endl; + std::vector v(3, 3); + Tensor::init(v); cursor = nullptr;} virtual T* read(size_t offset, size_t ele) override { - T* ptr = cursor + offset; - return ptr; }; virtual T* write(size_t offset, size_t ele) override {}; - virtual void initialize() override {}; - virtual void deinitialize() override {}; + virtual uint16_t unit_size(void) { return sizeof(T); } + ~RamTensor() { + std::cout << "i am ramtensor destructor" << std::endl; + } private: T* cursor; }; -template + +/*template Tensor TensorCast(Tensor input) { Tensor output(input.getShape()); Tin* inputPrt = input.getPointer({}); @@ -287,5 +304,5 @@ void tensorChkAlloc(Tensor &t, Shape dim) { } else if (t.getShape() != dim) { ERR_EXIT("Dim mismatched...\r\n"); } -} +}*/ #endif From f669f694f1c4eae74e7523254622ee73a9a30a01 Mon Sep 17 00:00:00 2001 From: kazami Date: Tue, 31 Oct 2017 22:12:03 +0800 Subject: [PATCH 07/80] add feature to ram tensor class 1. implement add function 2. implement customized ram tensor constructor --- tensor.hpp | 175 ++++++++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 89 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index bcb35fae..3d5edaa7 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -2,31 +2,30 @@ #define UTENSOR_TENSOR_H #include +#include #include -#include "uTensor_util.hpp" #include #include "stdlib.h" -#include - +#include "uTensor_util.hpp" class uTensor { - virtual void inFocus() {}; - virtual void deFocus() {}; - public: - virtual ~uTensor() = 0; + virtual void inFocus(){}; + virtual void deFocus(){}; + + public: + virtual ~uTensor() = 0; }; -uTensor::~uTensor() { -} +uTensor::~uTensor() {} class TensorBase { public: - std::vector shape; + std::vector shape; void* data; uint32_t total_size; ~TensorBase() { - if(data != nullptr) { - std::cout << "i am tensorbase destructor " << std::endl; + if (data != nullptr) { + std::cout << "i am tensorbase destructor " << std::endl; free(data); DEBUG("TensorBase memory freed..\r\n"); } @@ -34,29 +33,13 @@ class TensorBase { }; class Tensor : uTensor { - std::shared_ptr s; // short for states - - - virtual void* read(size_t offset, size_t ele) { return nullptr;} + virtual void* read(std::initializer_list l) { return nullptr; } virtual void* write(size_t offset, size_t ele) { return nullptr; } - public: - Tensor(void) { - std::cout << "tensor constructor " << std::endl; - //s = std::make_shared(); - //s->total_size = 0; - //s->data = nullptr; - } - - Tensor(std::initializer_list l) { - std::vector v; - for (auto i : l) { - v.push_back(i); - } - -// init(v); - } - // Tensor(std::vector v) { init(v); } + protected: + std::shared_ptr s; // short for states + public: + Tensor(void) { std::cout << "tensor constructor " << std::endl; } // returns how far a given dimension is apart size_t getStride(size_t dim_index) { @@ -68,15 +51,14 @@ class Tensor : uTensor { return (size_t)size_accm; } - template - void init(std::vector& v) { - std::cout << "initialize with type" << std::endl; + template + void init(std::vector& v) { + std::cout << "initialize with type" << std::endl; s = std::make_shared(); s->total_size = 0; for (auto i : v) { s->shape.push_back(i); - // total_size = (total_size == 0)? i : total_size *= i; if (s->total_size == 0) { s->total_size = i; } else { @@ -84,53 +66,25 @@ class Tensor : uTensor { } } - s->data = (void *)malloc(unit_size() * s->total_size); - if(s->data == NULL) ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); - } -/* - // PRE: l, initization list, specifying the element/dimension - // POST: When a degenerative index is supplied, the pointer - // lowest specified dimension is returned. - // Otherwise, return the pointer to the specific element. - T* getPointer(std::initializer_list l) { - size_t p_offset = 0; - signed short current_dim = 0; - for (auto i : l) { - p_offset += i * getStride(current_dim); - current_dim++; - } - - // printf("p_offset: %d\r\n", p_offset); - return s->data + p_offset; + s->data = (void*)malloc(unit_size() * s->total_size); + if (s->data == NULL) + ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); } - T* getPointer(std::vector v) { - size_t p_offset = 0; - signed short current_dim = 0; - for (auto i : v) { - p_offset += i * getStride(current_dim); - current_dim++; - } - - printf("p_offset: %d\r\n", p_offset); - - return s->data + p_offset; - }*/ - std::vector getShape(void) { return s->shape; } uint32_t getSize(void) { return s->total_size; } - virtual uint16_t unit_size(void) {} + virtual uint16_t unit_size(void) {} uint32_t getSize_in_bytes(void) { return s->total_size * unit_size(); } // returns the number of dimensions in the tensor size_t getDim(void) { return s->shape.size(); } - - template - T* read(size_t offset, size_t ele) { - return (T*)read(offset, ele); + + template + T* read(std::initializer_list l) { + return (T*)read(l); } ~Tensor() { @@ -140,24 +94,67 @@ class Tensor : uTensor { } }; -template +template class RamTensor : public Tensor { - //need deep copy - public: - RamTensor() : Tensor() { - std::cout << "ramtensor " << std::endl; - std::vector v(3, 3); - Tensor::init(v); - cursor = nullptr;} - virtual T* read(size_t offset, size_t ele) override { - }; - virtual T* write(size_t offset, size_t ele) override {}; - virtual uint16_t unit_size(void) { return sizeof(T); } - ~RamTensor() { - std::cout << "i am ramtensor destructor" << std::endl; + // need deep copy + public: + RamTensor() : Tensor() { + std::cout << "ramtensor " << std::endl; + std::vector v(3, 3); + Tensor::init(v); + cursor = nullptr; + } + + RamTensor(std::initializer_list l) : Tensor() { + std::cout << "ram con " << std::endl; + std::vector v; + for (auto i : l) { + v.push_back(i); } - private: - T* cursor; + + Tensor::init(v); + } + + RamTensor(std::vector& v) : Tensor() { + std::cout << "2 ram con " << std::endl; + Tensor::init(v); + } + + // PRE: l, initization list, specifying the element/dimension + // POST: When a degenerative index is supplied, the pointer + // lowest specified dimension is returned. + // Otherwise, return the pointer to the specific element. + virtual void* read(std::initializer_list l) override { + size_t p_offset = 0; + signed short current_dim = 0; + for (auto i : l) { + p_offset += i * getStride(current_dim); + current_dim++; + } + + // printf("p_offset: %d\r\n", p_offset); + return (void*)((T*)s->data + p_offset); + } + + /* T* getPointer(std::vector v) { + size_t p_offset = 0; + signed short current_dim = 0; + for (auto i : v) { + p_offset += i * getStride(current_dim); + current_dim++; + } + + printf("p_offset: %d\r\n", p_offset); + + return s->data + p_offset; + }*/ + // virtual void* read(size_t offset, size_t ele) override{}; + virtual void* write(size_t offset, size_t ele) override{}; + virtual uint16_t unit_size(void) override { std::cout << "my unit size" << std::endl; return sizeof(T); } + ~RamTensor() { std::cout << "i am ramtensor destructor" << std::endl; } + + private: + T* cursor; }; /*template From e5b67bd7a133380bcd8d46dcfca36da024eb31fb Mon Sep 17 00:00:00 2001 From: Michael Bartling Date: Tue, 31 Oct 2017 09:50:55 -0500 Subject: [PATCH 08/80] Add python requirements for SD preparation --- README.md | 7 ++++--- requirements.txt | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index fe627ca1..8a7193d0 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,10 @@ See mbed_app.json ## SD Card Preparation The test data has to be loaded to the SD card for the default binary to run: +1. Install python dependencies `pip install -r requirements.txt` (Note: may have to use `pip3`) 1. Go to the `[project]\TESTS\scripts` folder -2. Run `python3 compileTestData.py`. This will create `[project]\TESTS\scripts\testData` directory. -3. Copy `[project]\TESTS\scripts\testData` to the root of your SD card. +1. Run `python3 compileTestData.py`. This will create `[project]\TESTS\scripts\testData` directory. +1. Copy `[project]\TESTS\scripts\testData` to the root of your SD card. ## Expected Output The quantized weight and input data are stored in the SD. Setting the serial baud rate to 115200, here is what you should see: @@ -55,4 +56,4 @@ Currently, the binary runs the first sample of the [MNIST dataset](http://yann.l ![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mlp_mnist.png "mxnet Handwritten Digit Recognition") - The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. \ No newline at end of file + The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..66e5c439 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +idx2numpy From 96d3186a3b59abcc11d01f0fe9fa0460bd759352 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Wed, 1 Nov 2017 16:10:34 +0800 Subject: [PATCH 09/80] suggesting tensor ref counter --- context.hpp | 60 +++++++++++++++++++++---------------------------- uTensorBase.hpp | 1 - 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/context.hpp b/context.hpp index 48d1aa2c..214e7470 100644 --- a/context.hpp +++ b/context.hpp @@ -1,7 +1,7 @@ #ifndef UTENSOR_CTX_H #define UTENSOR_CTX_H -#include +//#include //TODO: how do we deal with dangling tensors? // only allow pushing for exact number of inputs @@ -14,22 +14,26 @@ class Context : uTensor { protected: vector op_list; - std::unordered_map tensor_lookup; + bool del_after_op; + //std::unordered_map TensorList; //all tensors alive //kill all unused if malloc failed? + //uint32_t m_size; //remaining memory size + //void registerTensor(Tensor* t); + //void gc(void); //garbage collector, delete any tracked unreferenced tensor - void initOpTensors(vector &t_list); - void deinitTensors(vector &t_list); - void registerInputTensors(vector &t_list); - void registerOutputTensors(vector &t_list); - void decreRefCount(vector &t_list); - - //void unref2nullTensors(vector &t_list); - //replace non-referenced output to null-tensors + void initOpTensors(TList &t_list); + void deinitTensors(TList &t_list); + void updateInputTensorRef(TList &t_list); + void dcrRefCount(TList &t_list); public: void push(Operator op, TList &_inputs, TList &_outputs); int run(void); }; +Context() { + del_onsight = true; +} + void Context::push(Operator op, TList &_inputs, TList &_outputs) { if(op.getInputCount() != _inputs.size()) { ERR_EXIT("valid number of inputs\r\n"); @@ -39,35 +43,21 @@ void Context::push(Operator op, TList &_inputs, TList &_outputs) { } op.setInputs(_inputs); - op.setInputs(_outputs); + op.setOutputs(_outputs); op_list.push_back(op); - registerInputTensors(_inputs); - registerOutputTensors(_outputs); - -} - + updateInputTensorRef(_inputs); -void Context::registerInputTensors(TList &t_list) { - for(auto t:t_list) { - auto ref_count = tensor_lookup.find(t); - if(ref_count == tensor_lookup.end()) { - tensor_lookup[t] = 1; - } else { - tensor_lookup[t]++; - } - } } -void Context::registerOutputTensors(TList &t_list) { +void Context::updateInputTensorRef(TList &t_list) { for(auto t:t_list) { - auto ref_count = tensor_lookup.find(t); - if(ref_count == tensor_lookup.end()) { - tensor_lookup[t] = 0; - } + t->incrRef(); //if an initial ref value is supplied to the tensor at compile time + //then this function does nothing + //otherwise, it increment the internal ref count of the tensor + //in internal count is init to 0 by the tensor constructor } } - void Context::initOpTensors(vector &t_list) { for(auto t:t_list) { t->inFocus(); @@ -80,11 +70,11 @@ void Context::deinitTensors(vector &t_list) { } } -void Context::decreRefCount(vector &t_list) { +void Context::dcrRefCount(vector &t_list) { for(auto t:t_list) { - tensor_lookup[t] = tensor_lookup[t] - 1; - if(tensor_lookup[t] < 1) { - t->~Tensor(); + t->dcrRef(); + if(t->getRef() < 1 && del_onsight) { + delete t; } } diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 85940395..66d863e1 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -3,7 +3,6 @@ #include "tensor.hpp" -typedef long long TensorPtr; typedef vector TList; class uTensor { From cd61f37f84ee0ae006cc33deb8143adde726a788 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Wed, 1 Nov 2017 16:17:08 +0800 Subject: [PATCH 10/80] typo --- context.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/context.hpp b/context.hpp index 214e7470..9513914d 100644 --- a/context.hpp +++ b/context.hpp @@ -14,7 +14,7 @@ class Context : uTensor { protected: vector op_list; - bool del_after_op; + bool del_onsight; //std::unordered_map TensorList; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); From 12d77c8572251c6115048b589693458a24ad5c85 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Wed, 1 Nov 2017 16:10:34 +0800 Subject: [PATCH 11/80] suggesting tensor ref counter --- context.hpp | 60 +++++++++++++++++++++---------------------------- uTensorBase.hpp | 1 - 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/context.hpp b/context.hpp index 48d1aa2c..214e7470 100644 --- a/context.hpp +++ b/context.hpp @@ -1,7 +1,7 @@ #ifndef UTENSOR_CTX_H #define UTENSOR_CTX_H -#include +//#include //TODO: how do we deal with dangling tensors? // only allow pushing for exact number of inputs @@ -14,22 +14,26 @@ class Context : uTensor { protected: vector op_list; - std::unordered_map tensor_lookup; + bool del_after_op; + //std::unordered_map TensorList; //all tensors alive //kill all unused if malloc failed? + //uint32_t m_size; //remaining memory size + //void registerTensor(Tensor* t); + //void gc(void); //garbage collector, delete any tracked unreferenced tensor - void initOpTensors(vector &t_list); - void deinitTensors(vector &t_list); - void registerInputTensors(vector &t_list); - void registerOutputTensors(vector &t_list); - void decreRefCount(vector &t_list); - - //void unref2nullTensors(vector &t_list); - //replace non-referenced output to null-tensors + void initOpTensors(TList &t_list); + void deinitTensors(TList &t_list); + void updateInputTensorRef(TList &t_list); + void dcrRefCount(TList &t_list); public: void push(Operator op, TList &_inputs, TList &_outputs); int run(void); }; +Context() { + del_onsight = true; +} + void Context::push(Operator op, TList &_inputs, TList &_outputs) { if(op.getInputCount() != _inputs.size()) { ERR_EXIT("valid number of inputs\r\n"); @@ -39,35 +43,21 @@ void Context::push(Operator op, TList &_inputs, TList &_outputs) { } op.setInputs(_inputs); - op.setInputs(_outputs); + op.setOutputs(_outputs); op_list.push_back(op); - registerInputTensors(_inputs); - registerOutputTensors(_outputs); - -} - + updateInputTensorRef(_inputs); -void Context::registerInputTensors(TList &t_list) { - for(auto t:t_list) { - auto ref_count = tensor_lookup.find(t); - if(ref_count == tensor_lookup.end()) { - tensor_lookup[t] = 1; - } else { - tensor_lookup[t]++; - } - } } -void Context::registerOutputTensors(TList &t_list) { +void Context::updateInputTensorRef(TList &t_list) { for(auto t:t_list) { - auto ref_count = tensor_lookup.find(t); - if(ref_count == tensor_lookup.end()) { - tensor_lookup[t] = 0; - } + t->incrRef(); //if an initial ref value is supplied to the tensor at compile time + //then this function does nothing + //otherwise, it increment the internal ref count of the tensor + //in internal count is init to 0 by the tensor constructor } } - void Context::initOpTensors(vector &t_list) { for(auto t:t_list) { t->inFocus(); @@ -80,11 +70,11 @@ void Context::deinitTensors(vector &t_list) { } } -void Context::decreRefCount(vector &t_list) { +void Context::dcrRefCount(vector &t_list) { for(auto t:t_list) { - tensor_lookup[t] = tensor_lookup[t] - 1; - if(tensor_lookup[t] < 1) { - t->~Tensor(); + t->dcrRef(); + if(t->getRef() < 1 && del_onsight) { + delete t; } } diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 85940395..66d863e1 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -3,7 +3,6 @@ #include "tensor.hpp" -typedef long long TensorPtr; typedef vector TList; class uTensor { From 60e0439b70f884dd23b10d332baa5653bd3e0ffc Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Wed, 1 Nov 2017 16:17:08 +0800 Subject: [PATCH 12/80] typo --- context.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/context.hpp b/context.hpp index 214e7470..9513914d 100644 --- a/context.hpp +++ b/context.hpp @@ -14,7 +14,7 @@ class Context : uTensor { protected: vector op_list; - bool del_after_op; + bool del_onsight; //std::unordered_map TensorList; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); From 68a905f94c6498f29a25a34b7c4fc7ed4925c715 Mon Sep 17 00:00:00 2001 From: kazami Date: Wed, 1 Nov 2017 17:00:58 +0800 Subject: [PATCH 13/80] make idxTest pass firstly --- main.cpp | 6 ++++-- tensorIdxImporter.hpp | 22 +++++++++++----------- tensorIdxImporterTests.hpp | 16 ++++++++-------- test.hpp | 38 +++++++++++++++++++------------------- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/main.cpp b/main.cpp index f2a4b4ca..1d70e2c7 100644 --- a/main.cpp +++ b/main.cpp @@ -5,6 +5,7 @@ #include "stdio.h" #include "uTensor_util.hpp" #include "tensor.hpp" +#include "tensorIdxImporterTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -21,8 +22,9 @@ int main(int argc, char** argv) { // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); // printf("prediction: %d\r\n", prediction); - Tensor *a = new RamTensor(); - delete a; + idxImporterTest idxTest; + idxTest.runAll(); + idxTest.printSummary(); //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 86934bc4..21088f4a 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -33,16 +33,16 @@ class TensorIdxImporter { HeaderMeta header; HeaderMeta parseHeader(void); template - Tensor loader(string &filename, IDX_DTYPE idx_type); + Tensor* loader(string &filename, IDX_DTYPE idx_type); void open(string filename); //void open(FILE *fp); public: - Tensor ubyte_import(string filename) { return loader(filename, IDX_DTYPE::idx_ubyte);} - Tensor byte_import(string filename) { return loader(filename, IDX_DTYPE::idx_byte);} - Tensor short_import(string filename) { return loader(filename, IDX_DTYPE::idx_short);} - Tensor int_import(string filename) { return loader(filename, IDX_DTYPE::idx_int);} - Tensor float_import(string filename) { return loader(filename, IDX_DTYPE::idx_float);} + Tensor* ubyte_import(string filename) { return loader(filename, IDX_DTYPE::idx_ubyte);} + Tensor* byte_import(string filename) { return loader(filename, IDX_DTYPE::idx_byte);} + Tensor* short_import(string filename) { return loader(filename, IDX_DTYPE::idx_short);} + Tensor* int_import(string filename) { return loader(filename, IDX_DTYPE::idx_int);} + Tensor* float_import(string filename) { return loader(filename, IDX_DTYPE::idx_float);} uint32_t getMagicNumber(unsigned char dtype, unsigned char dim); uint8_t getIdxDTypeSize(IDX_DTYPE dtype) ; //Tensor double_import(string filename) {}; @@ -108,7 +108,7 @@ HeaderMeta TensorIdxImporter::parseHeader(void) { } template -Tensor TensorIdxImporter::loader(string &filename, IDX_DTYPE idx_type) { +Tensor* TensorIdxImporter::loader(string &filename, IDX_DTYPE idx_type) { fp = fopen (filename.c_str(), "r" ); DEBUG("Opening file %s ", filename.c_str()); @@ -122,13 +122,13 @@ Tensor TensorIdxImporter::loader(string &filename, IDX_DTYPE idx_type) { fseek(fp, header.dataPos, SEEK_SET); //need error handling - Tensor t = Tensor(header.dim); //tensor allocated - const uint8_t unit_size = t.unit_size(); + Tensor* t = new RamTensor(header.dim); //tensor allocated + const uint8_t unit_size = t->unit_size(); U* val = (U *) malloc(unit_size); - U* data = t.getPointer({}); + U* data = t->read({}); - for(uint32_t i = 0; i < t.getSize(); i++) { + for(uint32_t i = 0; i < t->getSize(); i++) { fread(val, unit_size, 1, fp); switch (unit_size) { diff --git a/tensorIdxImporterTests.hpp b/tensorIdxImporterTests.hpp index 97de9f78..331ad0eb 100644 --- a/tensorIdxImporterTests.hpp +++ b/tensorIdxImporterTests.hpp @@ -19,10 +19,10 @@ class idxImporterTest : public Test { testStart("uchar import test"); TensorIdxImporter t_import; timer_start(); - Tensor t = + Tensor* t = t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx"); timer_stop(); - double result = sum(t); + double result = sum(t); passed(result == 4518); } @@ -30,10 +30,10 @@ class idxImporterTest : public Test { testStart("short import test"); TensorIdxImporter t_import; timer_start(); - Tensor t = + Tensor* t = t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx"); timer_stop(); - double result = sum(t); + double result = sum(t); passed(result == 270250); } @@ -41,10 +41,10 @@ class idxImporterTest : public Test { testStart("int import test"); TensorIdxImporter t_import; timer_start(); - Tensor t = + Tensor* t = t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx"); timer_stop(); - double result = sum(t); + double result = sum(t); passed(result == 5748992600); } @@ -52,11 +52,11 @@ class idxImporterTest : public Test { testStart("float import test"); TensorIdxImporter t_import; timer_start(); - Tensor t = + Tensor* t = t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx"); timer_stop(); - double result = sum(t); + double result = sum(t); DEBUG("***floating point test yielded: %.8e\r\n", (float)result); passed((float)result == -1.0f); diff --git a/test.hpp b/test.hpp index fa8f0c60..50ee365a 100644 --- a/test.hpp +++ b/test.hpp @@ -7,6 +7,7 @@ #include #include "mbed.h" #include "uTensor_util.hpp" +#include "tensor.hpp" class Test { private: @@ -103,11 +104,11 @@ class Test { virtual void runAll(void) = 0; - template - double sum(Tensor input) { - U* elem = input.getPointer({}); + template + double sum(Tensor* input) { + U* elem = input->read({}); double accm = 0.0; - for (uint32_t i = 0; i < input.getSize(); i++) { + for (uint32_t i = 0; i < input->getSize(); i++) { accm += (double)elem[i]; } @@ -134,16 +135,16 @@ class Test { } template - static double meanAbsErr(Tensor A, Tensor B) { - if (A.getSize() != B.getSize()) { + static double meanAbsErr(Tensor* A, Tensor* B) { + if (A->getSize() != B->getSize()) { ERR_EXIT("Test.meanAbsErr(): dimension mismatch\r\n"); } - U* elemA = A.getPointer({}); - U* elemB = B.getPointer({}); + U* elemA = A->read({}); + U* elemB = B->read({}); double accm = 0.0; - for (uint32_t i = 0; i < A.getSize(); i++) { + for (uint32_t i = 0; i < A->getSize(); i++) { accm += (double)fabs((float)elemB[i] - (float)elemA[i]); } @@ -152,16 +153,16 @@ class Test { // A being the reference template - static double sumPercentErr(Tensor A, Tensor B) { - if (A.getSize() != B.getSize()) { + static double sumPercentErr(Tensor* A, Tensor* B) { + if (A->getSize() != B->getSize()) { ERR_EXIT("Test.sumPercentErr(): dimension mismatch\r\n"); } - U* elemA = A.getPointer({}); - U* elemB = B.getPointer({}); + U* elemA = A->read({}); + U* elemB = B->read({}); double accm = 0.0; - for (uint32_t i = 0; i < A.getSize(); i++) { + for (uint32_t i = 0; i < A->getSize(); i++) { if (elemA[i] != 0.0f) { accm += (double)fabs(((float)elemB[i] - (float)elemA[i]) / fabs((float)elemA[i])); @@ -173,11 +174,10 @@ class Test { } return accm; } - - template - static double meanPercentErr(Tensor A, Tensor B) { - double sum = sumPercentErr(A, B); - return sum / A.getSize(); + template + static double meanPercentErr(Tensor* A, Tensor* B) { + double sum = sumPercentErr(A, B); + return sum / A->getSize(); } }; From 6b4349a61fc140294dd64f58b3d1c968cae0ce7f Mon Sep 17 00:00:00 2001 From: kazami Date: Wed, 1 Nov 2017 17:07:56 +0800 Subject: [PATCH 14/80] replace tensor to tensor declaration in other functions such as TensorConstant --- tensor.hpp | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index 3d5edaa7..04517155 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -150,20 +150,23 @@ class RamTensor : public Tensor { }*/ // virtual void* read(size_t offset, size_t ele) override{}; virtual void* write(size_t offset, size_t ele) override{}; - virtual uint16_t unit_size(void) override { std::cout << "my unit size" << std::endl; return sizeof(T); } + virtual uint16_t unit_size(void) override { + std::cout << "my unit size" << std::endl; + return sizeof(T); + } ~RamTensor() { std::cout << "i am ramtensor destructor" << std::endl; } private: T* cursor; }; -/*template -Tensor TensorCast(Tensor input) { - Tensor output(input.getShape()); - Tin* inputPrt = input.getPointer({}); - Tout* outputPrt = output.getPointer({}); +template +Tensor* TensorCast(Tensor* input) { + Tensor* output = new RamTensor(input->getShape()); + Tin* inputPrt = input->read({}); + Tout* outputPrt = output->read({}); - for (uint32_t i = 0; i < input.getSize(); i++) { + for (uint32_t i = 0; i < input->getSize(); i++) { outputPrt[i] = static_cast(inputPrt[i]); } @@ -171,11 +174,11 @@ Tensor TensorCast(Tensor input) { } template -Tensor TensorConstant(std::vector shape, T c) { - Tensor output(shape); - T* outPrt = output.getPointer({}); +Tensor* TensorConstant(std::vector shape, T c) { + Tensor* output = new RamTensor(shape); + T* outPrt = output->read({}); - for (uint32_t i = 0; i < output.getSize(); i++) { + for (uint32_t i = 0; i < output->getSize(); i++) { outPrt[i] = c; } @@ -183,8 +186,8 @@ Tensor TensorConstant(std::vector shape, T c) { } template -Tensor TensorConstant(std::initializer_list l, T c) { - std::vector v; +Tensor* TensorConstant(std::initializer_list l, T c) { + std::vector v; for (auto i : l) { v.push_back(i); } @@ -200,8 +203,8 @@ Tensor TensorConstant(std::initializer_list l, T c) { class permuteIndexTransform { private: - std::vector permute; - std::vector depermute; + std::vector permute; + std::vector depermute; Shape in_shape; Shape in_stride; Shape out_shape; @@ -281,25 +284,24 @@ class permuteIndexTransform { return out_index; } - }; template -void printDim(Tensor t) { +void printDim(Tensor* t) { printf("Dimension: "); - Shape s = t.getShape(); - for(auto d:s) { + Shape s = t->getShape(); + for (auto d : s) { printf("[%lu] ", d); } printf("\r\n"); } template -void tensorChkAlloc(Tensor &t, Shape dim) { - if (t.getSize() == 0) { - t = Tensor(dim); - } else if (t.getShape() != dim) { +void tensorChkAlloc(Tensor* t, Shape dim) { + if (t->getSize() == 0) { + t = new RamTensor(dim); + } else if (t->getShape() != dim) { ERR_EXIT("Dim mismatched...\r\n"); } -}*/ +} #endif From 83509d137356ccf42522c53cbe2991f67f3dee16 Mon Sep 17 00:00:00 2001 From: kazami Date: Wed, 1 Nov 2017 17:10:00 +0800 Subject: [PATCH 15/80] fix coding style --- tensorIdxImporter.hpp | 219 ++++++++++++++++++++++-------------------- 1 file changed, 115 insertions(+), 104 deletions(-) diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 21088f4a..8d77ee94 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -1,51 +1,61 @@ #ifndef UTENSOR_IDX_IMPORTER #define UTENSOR_IDX_IMPORTER -#include "mbed.h" -#include #include -#include "tensor.hpp" #include +#include +#include "mbed.h" +#include "tensor.hpp" #include "uTensor_util.hpp" using namespace std; enum IDX_DTYPE { - idx_ubyte = 0x08, - idx_byte = 0x09, - idx_short = 0x0B, - idx_int = 0x0C, - idx_float = 0x0D, - idx_double = 0x0E + idx_ubyte = 0x08, + idx_byte = 0x09, + idx_short = 0x0B, + idx_int = 0x0C, + idx_float = 0x0D, + idx_double = 0x0E }; class HeaderMeta { - public: - IDX_DTYPE dataType; - unsigned char numDim; - vector dim; - long int dataPos; + public: + IDX_DTYPE dataType; + unsigned char numDim; + vector dim; + long int dataPos; }; class TensorIdxImporter { - private: - FILE *fp; - HeaderMeta header; - HeaderMeta parseHeader(void); - template - Tensor* loader(string &filename, IDX_DTYPE idx_type); - void open(string filename); - //void open(FILE *fp); - - public: - Tensor* ubyte_import(string filename) { return loader(filename, IDX_DTYPE::idx_ubyte);} - Tensor* byte_import(string filename) { return loader(filename, IDX_DTYPE::idx_byte);} - Tensor* short_import(string filename) { return loader(filename, IDX_DTYPE::idx_short);} - Tensor* int_import(string filename) { return loader(filename, IDX_DTYPE::idx_int);} - Tensor* float_import(string filename) { return loader(filename, IDX_DTYPE::idx_float);} - uint32_t getMagicNumber(unsigned char dtype, unsigned char dim); - uint8_t getIdxDTypeSize(IDX_DTYPE dtype) ; - //Tensor double_import(string filename) {}; + private: + FILE* fp; + HeaderMeta header; + HeaderMeta parseHeader(void); + template + Tensor* loader(string& filename, IDX_DTYPE idx_type); + void open(string filename); + // void open(FILE *fp); + + public: + Tensor* ubyte_import(string filename) { + return loader(filename, IDX_DTYPE::idx_ubyte); + } + Tensor* byte_import(string filename) { + return loader(filename, IDX_DTYPE::idx_byte); + } + Tensor* short_import(string filename) { + return loader(filename, IDX_DTYPE::idx_short); + } + Tensor* int_import(string filename) { + return loader(filename, IDX_DTYPE::idx_int); + } + Tensor* float_import(string filename) { + return loader(filename, IDX_DTYPE::idx_float); + } + uint32_t getMagicNumber(unsigned char dtype, unsigned char dim); + uint8_t getIdxDTypeSize(IDX_DTYPE dtype); + // Tensor double_import(string filename) {}; }; // void TensorIdxImporter::open(FILE *_fp) { @@ -54,103 +64,104 @@ class TensorIdxImporter { // } uint8_t TensorIdxImporter::getIdxDTypeSize(IDX_DTYPE dtype) { - - switch(dtype) { - case idx_ubyte: - return 1; - case idx_byte: - return 1; - case idx_short: - return 2; - case idx_int: - return 4; - case idx_float: - return 4; - case idx_double: - return 8; - } - - return 0; + switch (dtype) { + case idx_ubyte: + return 1; + case idx_byte: + return 1; + case idx_short: + return 2; + case idx_int: + return 4; + case idx_float: + return 4; + case idx_double: + return 8; + } + + return 0; } -uint32_t TensorIdxImporter::getMagicNumber(unsigned char dtype, unsigned char dim) { - uint32_t magic = 0; +uint32_t TensorIdxImporter::getMagicNumber(unsigned char dtype, + unsigned char dim) { + uint32_t magic = 0; - magic = (magic | dtype) << 8; - magic = magic | dim; + magic = (magic | dtype) << 8; + magic = magic | dim; - return magic; + return magic; } HeaderMeta TensorIdxImporter::parseHeader(void) { - unsigned char *buf = (unsigned char*) malloc(sizeof(unsigned char) * 4); + unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 4); - fread(buf, 1, 4, fp); - if(buf[0] != 0 || buf[0] != 0) { - printf("Error, header magic number invalid\r\n"); - } + fread(buf, 1, 4, fp); + if (buf[0] != 0 || buf[0] != 0) { + printf("Error, header magic number invalid\r\n"); + } - HeaderMeta header; - header.dataType = static_cast(buf[2]); - header.numDim = buf[3]; + HeaderMeta header; + header.dataType = static_cast(buf[2]); + header.numDim = buf[3]; - for(int i = 0; i < header.numDim; i++) { - fread(buf, 1, 4, fp); - uint32_t dimSize = ntoh32(*(uint32_t*) buf); - header.dim.push_back(dimSize); - } + for (int i = 0; i < header.numDim; i++) { + fread(buf, 1, 4, fp); + uint32_t dimSize = ntoh32(*(uint32_t*)buf); + header.dim.push_back(dimSize); + } - free(buf); + free(buf); - header.dataPos = ftell(fp); - - return header; -} + header.dataPos = ftell(fp); -template -Tensor* TensorIdxImporter::loader(string &filename, IDX_DTYPE idx_type) { - fp = fopen (filename.c_str(), "r" ); + return header; +} - DEBUG("Opening file %s ", filename.c_str()); - if(fp == NULL) ERR_EXIT("Error opening file: %s", filename.c_str()); +template +Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { + fp = fopen(filename.c_str(), "r"); - header = parseHeader(); + DEBUG("Opening file %s ", filename.c_str()); + if (fp == NULL) ERR_EXIT("Error opening file: %s", filename.c_str()); - if(header.dataType != idx_type) { - ERR_EXIT("TensorIdxImporter: header and tensor type mismatch\r\n"); - } + header = parseHeader(); - fseek(fp, header.dataPos, SEEK_SET); //need error handling + if (header.dataType != idx_type) { + ERR_EXIT("TensorIdxImporter: header and tensor type mismatch\r\n"); + } - Tensor* t = new RamTensor(header.dim); //tensor allocated - const uint8_t unit_size = t->unit_size(); + fseek(fp, header.dataPos, SEEK_SET); // need error handling - U* val = (U *) malloc(unit_size); - U* data = t->read({}); + Tensor* t = new RamTensor(header.dim); // tensor allocated + const uint8_t unit_size = t->unit_size(); - for(uint32_t i = 0; i < t->getSize(); i++) { - fread(val, unit_size, 1, fp); + U* val = (U*)malloc(unit_size); + U* data = t->read({}); - switch (unit_size) { - case 2: - *(uint16_t *) val = ntoh16(*(uint16_t *) val); - break; - case 4: - *(uint32_t *) val = ntoh32(*(uint32_t *) val); - break; - default: - break; - } + for (uint32_t i = 0; i < t->getSize(); i++) { + fread(val, unit_size, 1, fp); - //val = htonl((uint32_t) buff); //NT: testing for uint8 only, deference error here - data[i] = *val ; + switch (unit_size) { + case 2: + *(uint16_t*)val = ntoh16(*(uint16_t*)val); + break; + case 4: + *(uint32_t*)val = ntoh32(*(uint32_t*)val); + break; + default: + break; } - free(val); + // val = htonl((uint32_t) buff); //NT: testing for uint8 only, deference + // error here + data[i] = *val; + } + + free(val); - ON_ERR(fclose(fp), "Closing file..."); + ON_ERR(fclose(fp), "Closing file..."); - return t; + return t; } -#endif //UTENSOR_IDX_IMPORTER +#endif // UTENSOR_IDX_IMPORTER From 2d9ca3a789adc38e21886d5f51ca39ac1eb32ef3 Mon Sep 17 00:00:00 2001 From: kazami Date: Thu, 2 Nov 2017 17:49:41 +0800 Subject: [PATCH 16/80] 1. change read function syntax according to interface --- tensor.hpp | 32 +++++++++++++++----------------- tensorIdxImporter.hpp | 2 +- test.hpp | 10 +++++----- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index 04517155..ace333ba 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -25,7 +25,6 @@ class TensorBase { ~TensorBase() { if (data != nullptr) { - std::cout << "i am tensorbase destructor " << std::endl; free(data); DEBUG("TensorBase memory freed..\r\n"); } @@ -33,13 +32,13 @@ class TensorBase { }; class Tensor : uTensor { - virtual void* read(std::initializer_list l) { return nullptr; } + virtual void* read(size_t offset, size_t ele) { return nullptr; } virtual void* write(size_t offset, size_t ele) { return nullptr; } protected: std::shared_ptr s; // short for states public: - Tensor(void) { std::cout << "tensor constructor " << std::endl; } + Tensor(void) {} // returns how far a given dimension is apart size_t getStride(size_t dim_index) { @@ -53,7 +52,6 @@ class Tensor : uTensor { } template void init(std::vector& v) { - std::cout << "initialize with type" << std::endl; s = std::make_shared(); s->total_size = 0; @@ -83,13 +81,12 @@ class Tensor : uTensor { size_t getDim(void) { return s->shape.size(); } template - T* read(std::initializer_list l) { - return (T*)read(l); + T* read(size_t offset, size_t ele) { + return (T*)read(offset, ele); } ~Tensor() { s = nullptr; - std::cout << "i am tensor destructor " << std::endl; DEBUG("Tensor Destructed\r\n"); } }; @@ -99,14 +96,12 @@ class RamTensor : public Tensor { // need deep copy public: RamTensor() : Tensor() { - std::cout << "ramtensor " << std::endl; std::vector v(3, 3); Tensor::init(v); cursor = nullptr; } RamTensor(std::initializer_list l) : Tensor() { - std::cout << "ram con " << std::endl; std::vector v; for (auto i : l) { v.push_back(i); @@ -116,7 +111,6 @@ class RamTensor : public Tensor { } RamTensor(std::vector& v) : Tensor() { - std::cout << "2 ram con " << std::endl; Tensor::init(v); } @@ -124,7 +118,10 @@ class RamTensor : public Tensor { // POST: When a degenerative index is supplied, the pointer // lowest specified dimension is returned. // Otherwise, return the pointer to the specific element. - virtual void* read(std::initializer_list l) override { + virtual void* read(size_t offset, size_t ele) override { + return (void *)((T*)s->data + offset); + } + /*virtual void* read(std::initializer_list l) override { size_t p_offset = 0; signed short current_dim = 0; for (auto i : l) { @@ -136,7 +133,7 @@ class RamTensor : public Tensor { return (void*)((T*)s->data + p_offset); } - /* T* getPointer(std::vector v) { + T* getPointer(std::vector v) { size_t p_offset = 0; signed short current_dim = 0; for (auto i : v) { @@ -149,12 +146,13 @@ class RamTensor : public Tensor { return s->data + p_offset; }*/ // virtual void* read(size_t offset, size_t ele) override{}; - virtual void* write(size_t offset, size_t ele) override{}; + virtual void* write(size_t offset, size_t ele) override{ + return (void *)((T*)s->data + offset); + }; virtual uint16_t unit_size(void) override { - std::cout << "my unit size" << std::endl; return sizeof(T); } - ~RamTensor() { std::cout << "i am ramtensor destructor" << std::endl; } + ~RamTensor() {} private: T* cursor; @@ -163,7 +161,7 @@ class RamTensor : public Tensor { template Tensor* TensorCast(Tensor* input) { Tensor* output = new RamTensor(input->getShape()); - Tin* inputPrt = input->read({}); + Tin* inputPrt = input->read(0, 0); Tout* outputPrt = output->read({}); for (uint32_t i = 0; i < input->getSize(); i++) { @@ -176,7 +174,7 @@ Tensor* TensorCast(Tensor* input) { template Tensor* TensorConstant(std::vector shape, T c) { Tensor* output = new RamTensor(shape); - T* outPrt = output->read({}); + T* outPrt = output->read(0, 0); for (uint32_t i = 0; i < output->getSize(); i++) { outPrt[i] = c; diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 8d77ee94..98011fab 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -136,7 +136,7 @@ Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { const uint8_t unit_size = t->unit_size(); U* val = (U*)malloc(unit_size); - U* data = t->read({}); + U* data = t->read(0, 0); for (uint32_t i = 0; i < t->getSize(); i++) { fread(val, unit_size, 1, fp); diff --git a/test.hpp b/test.hpp index 50ee365a..55229b7b 100644 --- a/test.hpp +++ b/test.hpp @@ -106,7 +106,7 @@ class Test { template double sum(Tensor* input) { - U* elem = input->read({}); + U* elem = input->read(0, 0); double accm = 0.0; for (uint32_t i = 0; i < input->getSize(); i++) { accm += (double)elem[i]; @@ -140,8 +140,8 @@ class Test { ERR_EXIT("Test.meanAbsErr(): dimension mismatch\r\n"); } - U* elemA = A->read({}); - U* elemB = B->read({}); + U* elemA = A->read(0, 0); + U* elemB = B->read(0, 0); double accm = 0.0; for (uint32_t i = 0; i < A->getSize(); i++) { @@ -158,8 +158,8 @@ class Test { ERR_EXIT("Test.sumPercentErr(): dimension mismatch\r\n"); } - U* elemA = A->read({}); - U* elemB = B->read({}); + U* elemA = A->read(0, 0); + U* elemB = B->read(0, 0); double accm = 0.0; for (uint32_t i = 0; i < A->getSize(); i++) { From 038631d4f4ec9fc60a90c29b33e0a51f4796068f Mon Sep 17 00:00:00 2001 From: kazami Date: Thu, 2 Nov 2017 18:17:25 +0800 Subject: [PATCH 17/80] make syntax of write function correct --- tensor.hpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index ace333ba..85508e56 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -85,6 +85,11 @@ class Tensor : uTensor { return (T*)read(offset, ele); } + template + T* write(size_t offset, size_t ele) { + return (const T*)write(offset, ele); + } + ~Tensor() { s = nullptr; DEBUG("Tensor Destructed\r\n"); @@ -121,6 +126,11 @@ class RamTensor : public Tensor { virtual void* read(size_t offset, size_t ele) override { return (void *)((T*)s->data + offset); } + virtual void* write(size_t offset, size_t ele) override { + return (void*)((T*)s->data + offset); + } + + /*virtual void* read(std::initializer_list l) override { size_t p_offset = 0; signed short current_dim = 0; @@ -146,9 +156,6 @@ class RamTensor : public Tensor { return s->data + p_offset; }*/ // virtual void* read(size_t offset, size_t ele) override{}; - virtual void* write(size_t offset, size_t ele) override{ - return (void *)((T*)s->data + offset); - }; virtual uint16_t unit_size(void) override { return sizeof(T); } From 34c5e30b640a96ec02448453c9e5e1913f530ff9 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Thu, 2 Nov 2017 20:59:07 +0800 Subject: [PATCH 18/80] context ops compile sucessful --- context.hpp | 59 +++++++++++++++++++++++++++---------------------- main.cpp | 4 +++- tensor.hpp | 51 ++++++++++++++++++++++++++++++++++++++---- uTensorBase.hpp | 22 +++++++----------- 4 files changed, 90 insertions(+), 46 deletions(-) diff --git a/context.hpp b/context.hpp index 9513914d..f0d41e6a 100644 --- a/context.hpp +++ b/context.hpp @@ -1,6 +1,9 @@ #ifndef UTENSOR_CTX_H #define UTENSOR_CTX_H +#include "uTensorBase.hpp" +#include "stdio.h" + //#include //TODO: how do we deal with dangling tensors? @@ -11,45 +14,46 @@ // tensors can be all pointers here, but destructors has to set data to nullptr // push(op, input_t_list, output_t_list) or push(op, init-list, init-list) // TensorListModifierOp -class Context : uTensor { +class Context : public uTensor { protected: - vector op_list; + vector op_list; bool del_onsight; //std::unordered_map TensorList; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); //void gc(void); //garbage collector, delete any tracked unreferenced tensor - void initOpTensors(TList &t_list); - void deinitTensors(TList &t_list); - void updateInputTensorRef(TList &t_list); - void dcrRefCount(TList &t_list); + void initTensors(const TList &t_list); + void deinitTensors(const TList &t_list); + void updateInputTensorRef(const TList &t_list); + void dcrRefCount(TList t_list); public: - void push(Operator op, TList &_inputs, TList &_outputs); + void push(Operator *op, TList &_inputs, TList &_outputs); int run(void); + + Context() { + del_onsight = true; + } }; -Context() { - del_onsight = true; -} -void Context::push(Operator op, TList &_inputs, TList &_outputs) { - if(op.getInputCount() != _inputs.size()) { +void Context::push(Operator *op, TList &_inputs, TList &_outputs) { + if(op->getInputs().size() != _inputs.size()) { ERR_EXIT("valid number of inputs\r\n"); } - if(op.getOutputCount() != _outputs.size()) { + if(op->getOutputs().size() != _outputs.size()) { ERR_EXIT("valid number of output\r\n"); } - op.setInputs(_inputs); - op.setOutputs(_outputs); + op->setInputs(_inputs); + op->setOutputs(_outputs); op_list.push_back(op); updateInputTensorRef(_inputs); } -void Context::updateInputTensorRef(TList &t_list) { +void Context::updateInputTensorRef(const TList &t_list) { for(auto t:t_list) { t->incrRef(); //if an initial ref value is supplied to the tensor at compile time //then this function does nothing @@ -58,41 +62,42 @@ void Context::updateInputTensorRef(TList &t_list) { } } -void Context::initOpTensors(vector &t_list) { +void Context::initTensors(const TList &t_list) { for(auto t:t_list) { t->inFocus(); } } -void Context::deinitTensors(vector &t_list) { +void Context::deinitTensors(const TList &t_list) { for(auto t:t_list) { t->deFocus(); } } -void Context::dcrRefCount(vector &t_list) { +void Context::dcrRefCount(TList t_list) { for(auto t:t_list) { t->dcrRef(); if(t->getRef() < 1 && del_onsight) { delete t; } + } } int Context::run(void) { //unref2nullTensors(); for(auto op:op_list) { - initTensors(op.getInputs()); - initTensors(op.getOutputs()); + initTensors(op->getInputs()); + initTensors(op->getOutputs()); - op.init(); - op.compute(); - op.deinit(); + op->inFocus(); + op->compute(); + op->deFocus(); - deinitOpTensors(op.getInputs()); - deinitOpTensors(op.getOutputs()); + deinitTensors(op->getInputs()); + deinitTensors(op->getOutputs()); - decreRefCount(op.getInputs()); + dcrRefCount(op->getInputs()); } } diff --git a/main.cpp b/main.cpp index 1d70e2c7..ce3b457f 100644 --- a/main.cpp +++ b/main.cpp @@ -1,4 +1,3 @@ -#include #include "FATFileSystem.h" #include "SDBlockDevice.h" #include "mbed.h" @@ -6,6 +5,7 @@ #include "uTensor_util.hpp" #include "tensor.hpp" #include "tensorIdxImporterTests.hpp" +#include "context.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -25,6 +25,8 @@ int main(int argc, char** argv) { idxImporterTest idxTest; idxTest.runAll(); idxTest.printSummary(); + + Context ctx; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensor.hpp b/tensor.hpp index 04517155..5ea4e972 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -8,20 +8,32 @@ #include "stdlib.h" #include "uTensor_util.hpp" +enum class DType : char { + uint8, + int8, + uint16, + int32, + flt, + dbl, +}; + class uTensor { + public: virtual void inFocus(){}; virtual void deFocus(){}; - - public: virtual ~uTensor() = 0; }; + uTensor::~uTensor() {} class TensorBase { public: std::vector shape; void* data; uint32_t total_size; + DType dtype; + uint16_t ref_count; + bool allow_runtime_ref_inc; //to support compile-time ref count ~TensorBase() { if (data != nullptr) { @@ -32,14 +44,16 @@ class TensorBase { } }; -class Tensor : uTensor { +class Tensor : public uTensor { virtual void* read(std::initializer_list l) { return nullptr; } virtual void* write(size_t offset, size_t ele) { return nullptr; } protected: std::shared_ptr s; // short for states public: - Tensor(void) { std::cout << "tensor constructor " << std::endl; } + Tensor(void) { + std::cout << "tensor constructor " << std::endl; + } // returns how far a given dimension is apart size_t getStride(size_t dim_index) { @@ -69,6 +83,9 @@ class Tensor : uTensor { s->data = (void*)malloc(unit_size() * s->total_size); if (s->data == NULL) ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); + + s->ref_count = 0; + s->allow_runtime_ref_inc = false; } std::vector getShape(void) { return s->shape; } @@ -87,6 +104,31 @@ class Tensor : uTensor { return (T*)read(l); } + DType getDType(void) { + return s->dtype; + } + + uint16_t incrRef() { + if(s->allow_runtime_ref_inc) { + s->ref_count += 1; + } + + return s->ref_count; + } + + uint16_t dcrRef() { + s->ref_count -= 1; + return s->ref_count; + } + + uint16_t getRef() { + return s->ref_count; + } + + bool is_ref_runtime(void) { + return s->allow_runtime_ref_inc; + } + ~Tensor() { s = nullptr; std::cout << "i am tensor destructor " << std::endl; @@ -103,6 +145,7 @@ class RamTensor : public Tensor { std::vector v(3, 3); Tensor::init(v); cursor = nullptr; + //dtype = something... } RamTensor(std::initializer_list l) : Tensor() { diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 66d863e1..82c552d9 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -5,20 +5,14 @@ typedef vector TList; -class uTensor { - virtual void inFocus() {}; - virtual void deFocus() {}; - virtual ~uTensor() = 0; -}; - - //isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not //double dispatch //new vs stack -class Operator { +class Operator : public uTensor{ protected: //setup input/output info in derived constructors + //ref count? TList inputs; vector dtype_in; TList outputs; @@ -29,24 +23,24 @@ class Operator { void setInputs(TList &_inputs) { if(_inputs.size() != inputs.size()) ERR_EXIT("Input Tensor list mismatched..."); - for(uint8_t i = 0; i < input.size(); i++) { - if(dtype_in[i] != inputs.getType()) { + for(uint8_t i = 0; i < inputs.size(); i++) { + if(dtype_in[i] != inputs[i]->getDType()) { ERR_EXIT("Tensor Type mismatched..."); } - input[i] = _inputs[i]; + inputs[i] = _inputs[i]; } } void setOutputs(TList &_outputs) { if(_outputs.size() != outputs.size()) ERR_EXIT("Input Tensor list mismatched..."); - for(uint8_t i = 0; i < output.size(); i++) { - if(dtype_out[i].getType() != output[i].getType()) { + for(uint8_t i = 0; i < outputs.size(); i++) { + if(dtype_out[i] != outputs[i]->getDType()) { ERR_EXIT("Tensor Type mismatched..."); } - output[i] = _output[i] + outputs[i] = _outputs[i]; } } From 1b163e06f4f0e8ac9b6571a2d1cfcff6171dd81a Mon Sep 17 00:00:00 2001 From: kazami Date: Thu, 2 Nov 2017 22:16:11 +0800 Subject: [PATCH 19/80] revise main for test idx and matrixops --- main.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/main.cpp b/main.cpp index 1d70e2c7..dc0ae3d6 100644 --- a/main.cpp +++ b/main.cpp @@ -24,7 +24,15 @@ int main(int argc, char** argv) { // printf("prediction: %d\r\n", prediction); idxImporterTest idxTest; idxTest.runAll(); + + + printf("running matrix test ...\r\n"); +// matrixOpsTest matrixTests; +// matrixTests.runAll(); + printf("IDX import:\r\n"); idxTest.printSummary(); + printf("Matrix: \r\n"); +// matrixTests.printSummary(); //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) From 0f31406aad6485022246a69350a89e106bea397b Mon Sep 17 00:00:00 2001 From: kazami Date: Fri, 3 Nov 2017 15:48:21 +0800 Subject: [PATCH 20/80] 1. replace tensor in matriops to new one 2. modify matrixtests for passing the test --- MatrixOps.hpp | 42 +++++++++++++++++++++--------------------- MatrixTests.hpp | 28 ++++++++++++++-------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/MatrixOps.hpp b/MatrixOps.hpp index a8a1799a..45031eaf 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -101,21 +101,21 @@ void QuantizationRangeForMultiplication(float min_a, float max_a, float min_b, } template -void QuantizedMatMul(Tensor A, Tensor B, Tensor &C, - Tensor mina, Tensor minb, Tensor maxa, - Tensor maxb, Tensor outmin, - Tensor outmax, bool transpose_a = false, +void QuantizedMatMul(Tensor* A, Tensor* B, Tensor* C, + Tensor* mina, Tensor* minb, Tensor* maxa, + Tensor* maxb, Tensor* outmin, + Tensor* outmax, bool transpose_a = false, bool transpose_b = false) { - const float min_a = *(mina.getPointer({})); - const float max_a = *(maxa.getPointer({})); - const float min_b = *(minb.getPointer({})); - const float max_b = *(maxb.getPointer({})); + const float min_a = *(mina->read(0, 0)); + const float max_a = *(maxa->read(0, 0)); + const float min_b = *(minb->read(0, 0)); + const float max_b = *(maxb->read(0, 0)); //auto tensor allocation Shape c_shape; - c_shape.push_back((A.getShape())[0]); - c_shape.push_back((B.getShape())[1]); - tensorChkAlloc(C, c_shape); + c_shape.push_back((A->getShape())[0]); + c_shape.push_back((B->getShape())[1]); + tensorChkAlloc(C, c_shape); const int32_t offset_a = FloatToQuantizedUnclamped( 0.0f, min_a, max_a); // NT: what 0 quantized to; depends on @@ -131,16 +131,16 @@ void QuantizedMatMul(Tensor A, Tensor B, Tensor &C, int a_dim_remaining = 1 - first; int b_dim_remaining = 1 - second; - T1* A_Data = A.getPointer({}); - T2* B_Data = B.getPointer({}); - Toutput* C_Data = C.getPointer({}); + T1* A_Data = A->read(0, 0); + T2* B_Data = B->read(0, 0); + Toutput* C_Data = C->write(0, 0); const bool transpose_c = false; - const size_t m = A.getShape()[a_dim_remaining]; - const size_t n = B.getShape()[b_dim_remaining]; - const size_t k = A.getShape()[first]; - const size_t lda = A.getShape()[1]; - const size_t ldb = B.getShape()[1]; + const size_t m = A->getShape()[a_dim_remaining]; + const size_t n = B->getShape()[b_dim_remaining]; + const size_t k = A->getShape()[first]; + const size_t lda = A->getShape()[1]; + const size_t ldb = B->getShape()[1]; const size_t ldc = n; ReferenceGemmuImpl( @@ -152,9 +152,9 @@ void QuantizedMatMul(Tensor A, Tensor B, Tensor &C, QuantizationRangeForMultiplication( min_a, max_a, min_b, max_b, &min_c_value, &max_c_value); - float* c_min = outmin.getPointer({}); + float* c_min = outmin->read(0, 0); *c_min = min_c_value; - float* c_max = outmax.getPointer({}); + float* c_max = outmax->read(0, 0); *c_max = max_c_value; } diff --git a/MatrixTests.hpp b/MatrixTests.hpp index dde0cf0b..04d8ea1c 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -12,25 +12,25 @@ class matrixOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = + Tensor* a = t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx"); - Tensor a_min = + Tensor* a_min = t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx"); - Tensor a_max = + Tensor* a_max = t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx"); - Tensor b = + Tensor* b = t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx"); - Tensor b_min = + Tensor* b_min = t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx"); - Tensor b_max = + Tensor* b_max = t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx"); // reference outputs - Tensor c = + Tensor* c = t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx"); - Tensor c_min = + Tensor* c_min = t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx"); - Tensor c_max = + Tensor* c_max = t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx"); // actual implementation, uses ReferenceGemm() @@ -39,9 +39,9 @@ class matrixOpsTest : public Test { // Sub-functions: QuantizationRangeForMultiplication, // QuantizationRangeForMultiplication, FloatForOneQuantizedLevel - Tensor out_c(c.getShape()); - Tensor out_min(c_min.getShape()); - Tensor out_max(c_max.getShape()); + Tensor* out_c = new RamTensor(c->getShape()); + Tensor* out_min = new RamTensor(c_min->getShape()); + Tensor* out_max = new RamTensor(c_max->getShape()); timer_start(); QuantizedMatMul(a, b, out_c, a_min, b_min, a_max, b_max, out_min, out_max); @@ -51,8 +51,8 @@ class matrixOpsTest : public Test { // modify the checks below: - double result = meanPercentErr(c, out_c) + meanPercentErr(c_min, out_min) + - meanPercentErr(c_max, out_max); + double result = meanPercentErr(c, out_c) + meanPercentErr(c_min, out_min) + + meanPercentErr(c_max, out_max); // passed(result < 0.0001); passed(result == 0); } From b2168612535ea154f0e0cdec830a54bb6d0b0334 Mon Sep 17 00:00:00 2001 From: kazami Date: Fri, 3 Nov 2017 15:54:47 +0800 Subject: [PATCH 21/80] 1. remove unnecessary private member 2. remove temporary function in ramtensor constructor 3. fix constructor parameter type --- tensor.hpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index dc822651..e349c38d 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -102,7 +102,7 @@ class Tensor : public uTensor { template T* write(size_t offset, size_t ele) { - return (const T*)write(offset, ele); + return (T*)write(offset, ele); } DType getDType(void) { @@ -141,9 +141,6 @@ class RamTensor : public Tensor { // need deep copy public: RamTensor() : Tensor() { - std::vector v(3, 3); ///NT: why (3,3)? - Tensor::init(v); - cursor = nullptr; //dtype = something... } @@ -156,7 +153,7 @@ class RamTensor : public Tensor { Tensor::init(v); } - RamTensor(std::vector& v) : Tensor() { + RamTensor(std::vector v) : Tensor() { Tensor::init(v); } @@ -202,8 +199,6 @@ class RamTensor : public Tensor { } ~RamTensor() {} - private: - T* cursor; }; template From b88c8655222bd8a8d23a3adc1c3847b55f9c4093 Mon Sep 17 00:00:00 2001 From: kazami Date: Fri, 3 Nov 2017 15:56:47 +0800 Subject: [PATCH 22/80] modify main function for matrixops test --- main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/main.cpp b/main.cpp index f1998b41..9f3df270 100644 --- a/main.cpp +++ b/main.cpp @@ -6,6 +6,7 @@ #include "tensor.hpp" #include "tensorIdxImporterTests.hpp" #include "context.hpp" +#include "MatrixTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -27,12 +28,12 @@ int main(int argc, char** argv) { printf("running matrix test ...\r\n"); -// matrixOpsTest matrixTests; -// matrixTests.runAll(); + matrixOpsTest matrixTests; + matrixTests.runAll(); printf("IDX import:\r\n"); idxTest.printSummary(); printf("Matrix: \r\n"); -// matrixTests.printSummary(); + matrixTests.printSummary(); Context ctx; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) From 229abef7f170fd944b52b5233faecce97d8a5dc0 Mon Sep 17 00:00:00 2001 From: kazami Date: Fri, 3 Nov 2017 16:48:21 +0800 Subject: [PATCH 23/80] for arrayops test 1. replace old version of tensor to new one 2. make pass test --- ArrayOps.hpp | 58 +++++++++++++++++++++--------------------- ArrayTests.hpp | 48 +++++++++++++++++----------------- main.cpp | 6 +++++ quantization_utils.hpp | 8 +++--- 4 files changed, 63 insertions(+), 57 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index f1b82c73..dc0af275 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -10,11 +10,11 @@ //mode = MIN_FIRST //name = unspecified template -void QuantizeV2(Tensor input, Tensor _min_range, Tensor _max_range, - Tensor output, Tensor output_min, Tensor output_max) { +void QuantizeV2(Tensor* input, Tensor* _min_range, Tensor* _max_range, + Tensor* output, Tensor* output_min, Tensor* output_max) { - float input_min_range = *(_min_range.getPointer({0})); - float input_max_range = *(_max_range.getPointer({0})); + float input_min_range = *(_min_range->read(0, 0)); + float input_max_range = *(_max_range->read(0, 0)); if(input_max_range < input_min_range) ERR_EXIT("input_max_range must be larger than input_min_range."); @@ -28,13 +28,13 @@ void QuantizeV2(Tensor input, Tensor _min_range, Tensor _ma FloatToQuantizedStruct f2q(min_range, max_range); //quantization_utils.h:149 - float* input_ptr = input.getPointer({}); - T* output_ptr = output.getPointer({}); - float* output_min_ptr = output_min.getPointer({0}); - float* output_max_ptr = output_max.getPointer({0}); + float* input_ptr = input->read(0, 0); + T* output_ptr = output->write(0, 0); + float* output_min_ptr = output_min->read(0, 0); + float* output_max_ptr = output_max->read(0, 0); ///NT: need error checking at some point... - for(uint32_t i = 0; i < input.getSize(); i++) { + for(uint32_t i = 0; i < input->getSize(); i++) { float val = std::round(input_ptr[i] * f2q.range_scale); val -= f2q.range_min_scaled - f2q.lowest_quantized(); val = std::max(val, f2q.lower_bound_float()); @@ -52,21 +52,21 @@ void QuantizeV2(Tensor input, Tensor _min_range, Tensor _ma //name = unspecified //dequantize_op.cc: 87 template -void dequantize(Tensor input, Tensor min_range, Tensor max_range, Tensor &output) { - float min = *(min_range.getPointer({0})); - float max = *(max_range.getPointer({0})); +void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor* output) { + float min = *(min_range->read(0, 0)); + float max = *(max_range->read(0, 0)); //auto tensor allocation Shape out_shape; - tensorChkAlloc(output, input.getShape()); + tensorChkAlloc(output, input->getShape()); - T* input_ptr = input.getPointer({}); - float* output_ptr = output.getPointer({}); + T* input_ptr = input->read(0, 0); + float* output_ptr = output->write(0, 0); //quantization_utils.h: 771 QuantizedToFloatStruct q2f(min, max); //quantization_utils.h: 141 - for(uint32_t i = 0; i < input.getSize(); i++) { + for(uint32_t i = 0; i < input->getSize(); i++) { float val = static_cast(input_ptr[i]); output_ptr[i] = ((q2f.range_min_rounded - q2f.lowest_quantized() * q2f.range_scale) + \ val * q2f.range_scale); @@ -94,14 +94,14 @@ void dequantize(Tensor input, Tensor min_range, Tensor max_rang ///NT: This Op hasn't been tested extensively. We will have to increase the test-coverage for this function. template -void reshape(Tensor input, Tensor shape, Tensor &output) { +void reshape(Tensor* input, Tensor* shape, Tensor* output) { Shape dim; //validating and inferring dimensions int infer_index = -1; - uint32_t dim_rem = input.getSize(); - int* val = shape.getPointer({}); - for(uint32_t i = 0; i < shape.getSize(); i++) { + uint32_t dim_rem = input->getSize(); + int* val = shape->read(0, 0); + for(uint32_t i = 0; i < shape->getSize(); i++) { if(val[i] == -1) { if(infer_index == -1) { infer_index = i; @@ -123,22 +123,22 @@ void reshape(Tensor input, Tensor shape, Tensor &output) { if(dim_rem != 1) ERR_EXIT("supplied shape does not match up to input"); - T* input_ptr = input.getPointer({}); + T* input_ptr = input->read(0, 0); //check if the output dim is valid - if(output.getSize() > 0 && dim == output.getShape()) { + if(output->getSize() > 0 && dim == output->getShape()) { //copy - T* output_ptr = output.getPointer({}); - std::memcpy(output_ptr, input_ptr, (std::size_t) input.getSize_in_bytes()); - } else if(output.getSize() > 0 && dim != output.getShape()) { + T* output_ptr = output->read(0, 0); + std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); + } else if(output->getSize() > 0 && dim != output->getShape()) { ERR_EXIT("output tensor dimension mismatches supplied shape") } else { //construct a new tensor and copy - Tensor tmp(dim); - T* output_ptr = tmp.getPointer({}); - std::memcpy(output_ptr, input_ptr, (std::size_t) input.getSize_in_bytes()); + Tensor* tmp = new RamTensor(dim); + T* output_ptr = tmp->write(0, 0); + std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); output = tmp; } } -#endif //UTENSOR_ARRAY_OPS \ No newline at end of file +#endif //UTENSOR_ARRAY_OPS diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 42ba56a1..08b2b932 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -12,22 +12,22 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs /Users/neitan01/Documents/mbed/uTensor.git/TESTS/scripts/PRE-GEN/qA - Tensor b = t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx"); - Tensor b_min = t_import.float_import("/fs/testData/qB/in/Min_1_0.idx"); - Tensor b_max = t_import.float_import("/fs/testData/qB/in/Max_1_0.idx"); + Tensor* b = t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx"); + Tensor* b_min = t_import.float_import("/fs/testData/qB/in/Min_1_0.idx"); + Tensor* b_max = t_import.float_import("/fs/testData/qB/in/Max_1_0.idx"); //reference outputs - Tensor b_q_ref = t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx"); - Tensor b_min_q_ref = t_import.float_import("/fs/testData/qB/out/qB_1.idx"); - Tensor b_max_q_ref = t_import.float_import("/fs/testData/qB/out/qb_2.idx"); + Tensor* b_q_ref = t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx"); + Tensor* b_min_q_ref = t_import.float_import("/fs/testData/qB/out/qB_1.idx"); + Tensor* b_max_q_ref = t_import.float_import("/fs/testData/qB/out/qb_2.idx"); - Tensor b_q(b_q_ref.getShape()); - Tensor b_min_q(b_min_q_ref.getShape()); - Tensor b_max_q(b_max_q_ref.getShape()); + Tensor* b_q = new RamTensor(b_q_ref->getShape()); + Tensor* b_min_q = new RamTensor(b_min_q_ref->getShape()); + Tensor* b_max_q = new RamTensor(b_max_q_ref->getShape()); //Implementation goes here timer_start(); - QuantizeV2(b, b_min, b_max, b_q, b_min_q, b_max_q); + QuantizeV2(b, b_min, b_max, b_q, b_min_q, b_max_q); timer_stop(); // printf("refMin is : %f \r\n", *(b_min_q_ref.getPointer({0}))); @@ -35,7 +35,7 @@ class ArrayOpsTest : public Test { // printf("diff : output(%f), outMin(%f), outMax(%f)\r\n", // meanPercentErr(b_q_ref, b_q), meanPercentErr(b_min_q_ref, b_min_q), meanPercentErr(b_max_q_ref, b_max_q)); - double result = meanPercentErr(b_q_ref, b_q) + meanPercentErr(b_min_q_ref, b_min_q) + meanPercentErr(b_max_q_ref, b_max_q); + double result = meanPercentErr(b_q_ref, b_q) + meanPercentErr(b_min_q_ref, b_min_q) + meanPercentErr(b_max_q_ref, b_max_q); //passed(result < 0.0001); passed(result == 0); } @@ -45,21 +45,21 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - Tensor a = t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx"); - Tensor a_min = t_import.float_import("/fs/testData/deQ/in/rQ_1.idx"); - Tensor a_max = t_import.float_import("/fs/testData/deQ/in/rQ_2.idx"); + Tensor* a = t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx"); + Tensor* a_min = t_import.float_import("/fs/testData/deQ/in/rQ_1.idx"); + Tensor* a_max = t_import.float_import("/fs/testData/deQ/in/rQ_2.idx"); //reference outputs - Tensor out_ref = t_import.float_import("/fs/testData/deQ/out/deQ_0.idx"); + Tensor* out_ref = t_import.float_import("/fs/testData/deQ/out/deQ_0.idx"); //modify the checks below: - Tensor out(out_ref.getShape()); + Tensor* out = new RamTensor(out_ref->getShape()); timer_start(); - dequantize(a, a_min, a_max, out); + dequantize(a, a_min, a_max, out); timer_stop(); - double result = meanPercentErr(out_ref, out); + double result = meanPercentErr(out_ref, out); //passed(result < 0.0001); passed(result == 0); } @@ -69,20 +69,20 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - Tensor ref_a = t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx"); - Tensor ref_dim = t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx"); + Tensor* ref_a = t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx"); + Tensor* ref_dim = t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx"); //reference outputs - Tensor out_ref = t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx"); + Tensor* out_ref = t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx"); //modify the checks below: - Tensor out(out_ref.getShape()); + Tensor* out = new RamTensor(out_ref->getShape()); timer_start(); - reshape(ref_a, ref_dim, out); + reshape(ref_a, ref_dim, out); timer_stop(); - double result = meanPercentErr(out_ref, out); + double result = meanPercentErr(out_ref, out); //passed(result < 0.0001); passed(result == 0); } diff --git a/main.cpp b/main.cpp index 9f3df270..fb9e4664 100644 --- a/main.cpp +++ b/main.cpp @@ -7,6 +7,7 @@ #include "tensorIdxImporterTests.hpp" #include "context.hpp" #include "MatrixTests.hpp" +#include "ArrayTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -35,6 +36,11 @@ int main(int argc, char** argv) { printf("Matrix: \r\n"); matrixTests.printSummary(); + ArrayOpsTest arrayTests; + arrayTests.runAll(); + printf("Array: \r\n"); + arrayTests.printSummary(); + Context ctx; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/quantization_utils.hpp b/quantization_utils.hpp index 6139614e..6352cb77 100644 --- a/quantization_utils.hpp +++ b/quantization_utils.hpp @@ -65,12 +65,12 @@ T FloatToQuantized(float input, float range_min, float range_max) { } template -inline void RequantizeManyInNewRange(Tensor input, uint32_t count, +inline void RequantizeManyInNewRange(Tensor* input, uint32_t count, float min_input, float max_input, float min_output, float max_output, - Tensor output) { - T1 *in_ptr = input.getPointer({}); - T2 *out_ptr = output.getPointer({}); + Tensor* output) { + T1 *in_ptr = input->read(0, 0); + T2 *out_ptr = output->read(0, 0); for (size_t index = 0; index < count; ++index) { const float input_float = QuantizedToFloat(in_ptr[index], min_input, max_input); From 020fa1cce860fe0751e5228da7aaf525d6e6293d Mon Sep 17 00:00:00 2001 From: Yog Mehta Date: Sat, 4 Nov 2017 12:32:13 +0530 Subject: [PATCH 24/80] Update README.md Fixed some grammar --- README.md | 108 +++++++++++++++++++++++++++--------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 8a7193d0..8bff935a 100644 --- a/README.md +++ b/README.md @@ -1,59 +1,59 @@ -# uTensor - -## Introduction - - uTensor is an extreme light-weight Deep-Learning Inference framework built on mbed and Tensorflow. - - This project is under going constant development. - -## Requirement - -- [Mbed CLI](https://github.com/ARMmbed/mbed-cli) -- [Tensorflow](https://www.tensorflow.org/install/) -- [tf-node-viewer](https://github.com/neil-tan/tf-node-viewer) (Optional, for graph-weight extraction) -- Mbed-os 5.6+ compatiable [boards](https://os.mbed.com/platforms/?mbed-os=25) with at least 256kb of RAM -- SD Card (Must be LESS than 32 GB) -- SD Card reader for the board (Optional if built into the board) - -## Finding your target name - -`mbed detect` to see which target is connect to the board - -`mbedls -l` to list all supported targets - -## Configure - -See mbed_app.json - -## Build Steps - -1. Clone the repository -2. Run `mbed deploy` to download all referenced libraries -3. Insert the prepared SD card to the board (see SD Card Preparation Section) -4. Use `mbed compile -t GCC_ARM -m NUCLEO_F767ZI --profile=./build_profile/release.json` to build for ST NUCLEO F767ZI. Or, `mbed compile -t GCC_ARM -m NUCLEO_F767ZI --profile=./build_profile/release.json -f` to compile and flash - -## SD Card Preparation -The test data has to be loaded to the SD card for the default binary to run: - -1. Install python dependencies `pip install -r requirements.txt` (Note: may have to use `pip3`) -1. Go to the `[project]\TESTS\scripts` folder -1. Run `python3 compileTestData.py`. This will create `[project]\TESTS\scripts\testData` directory. -1. Copy `[project]\TESTS\scripts\testData` to the root of your SD card. - -## Expected Output -The quantized weight and input data are stored in the SD. Setting the serial baud rate to 115200, here is what you should see: - -``` +# uTensor + +## Introduction + + uTensor is an extremely light-weight Deep-Learning Inference framework built on mbed and Tensorflow. + + This project is under going constant development. + +## Requirement + +- [Mbed CLI](https://github.com/ARMmbed/mbed-cli) +- [Tensorflow](https://www.tensorflow.org/install/) +- [tf-node-viewer](https://github.com/neil-tan/tf-node-viewer) (Optional, for graph-weight extraction) +- Mbed-os 5.6+ compatible [boards](https://os.mbed.com/platforms/?mbed-os=25) with at least 256kb of RAM +- SD Card (Must be LESS than 32 GB) +- SD Card reader for the board (Optional if built into the board) + +## Finding your target name + +`mbed detect` to see which target is connect to the board + +`mbedls -l` to list all supported targets + +## Configure + +See mbed_app.json + +## Build Steps + +1. Clone the repository +2. Run `mbed deploy` to download all referenced libraries +3. Insert the prepared SD card to the board (see SD Card Preparation Section) +4. Use `mbed compile -t GCC_ARM -m NUCLEO_F767ZI --profile=./build_profile/release.json` to build for ST NUCLEO F767ZI. Or, `mbed compile -t GCC_ARM -m NUCLEO_F767ZI --profile=./build_profile/release.json -f` to compile and flash + +## SD Card Preparation +The test data has to be loaded to the SD card for the default binary to run: + +1. Install python dependencies `pip install -r requirements.txt` (Note: may have to use `pip3`) +1. Go to the `[project]\TESTS\scripts` folder +1. Run `python3 compileTestData.py`. This will create `[project]\TESTS\scripts\testData` directory. +1. Copy `[project]\TESTS\scripts\testData` to the root of your SD card. + +## Expected Output +The quantized weight and input data are stored in the SD. Setting the serial baud rate to 115200, here is what you should see: + +``` Deep MLP on Mbed (Trained with Tensorflow) running deep-mlp... PASSED 0.00000000 -prediction: 7 -``` -Currently, the binary runs the first sample of the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) which contains a handwritten digit of number 7. Ths network architecture is a 3-layer Relu based MLP, as shown below: - -![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mlp_mnist.png "mxnet Handwritten Digit Recognition") - - - The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. +prediction: 7 +``` +Currently, the binary runs the first sample of the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) which contains a handwritten digit of number 7. Ths network architecture is a 3-layer Relu based MLP, as shown below: + +![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mlp_mnist.png "mxnet Handwritten Digit Recognition") + + + The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. From 5fc5b6c24e246a262206394de35590cb215868e5 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 4 Nov 2017 16:30:30 +0800 Subject: [PATCH 25/80] change readme to explain develop branch for developer --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fe627ca1..c4bcdea8 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ uTensor is an extreme light-weight Deep-Learning Inference framework built on mbed and Tensorflow. - This project is under going constant development. + This project is under going constant development. The development is running on develop branch. ## Requirement @@ -55,4 +55,4 @@ Currently, the binary runs the first sample of the [MNIST dataset](http://yann.l ![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mlp_mnist.png "mxnet Handwritten Digit Recognition") - The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. \ No newline at end of file + The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. From cd226d887cd97f6456be7b13799dd75dc3e7afe1 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 4 Nov 2017 17:15:38 +0800 Subject: [PATCH 26/80] fix tensorChkAlloc call convention 1. because of Tensor* to Tensor** --- ArrayOps.hpp | 2 +- MatrixOps.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index dc0af275..e4b5bc70 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -57,7 +57,7 @@ void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor* out float max = *(max_range->read(0, 0)); //auto tensor allocation Shape out_shape; - tensorChkAlloc(output, input->getShape()); + tensorChkAlloc(&output, input->getShape()); T* input_ptr = input->read(0, 0); float* output_ptr = output->write(0, 0); diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 45031eaf..52853b1d 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -115,7 +115,7 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor* C, Shape c_shape; c_shape.push_back((A->getShape())[0]); c_shape.push_back((B->getShape())[1]); - tensorChkAlloc(C, c_shape); + tensorChkAlloc(&C, c_shape); const int32_t offset_a = FloatToQuantizedUnclamped( 0.0f, min_a, max_a); // NT: what 0 quantized to; depends on From b9dbedaabf776535ceeb09435a5eb653794dedfb Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 4 Nov 2017 17:18:45 +0800 Subject: [PATCH 27/80] 1. math mathops pass 2. replace old tensor syntax to refactored one 3. make tensorChkAlloc syntax changed, because of using realloc Tensor* in function --- MathOps.hpp | 109 ++++++++++++++++---------------- MathTests.hpp | 170 +++++++++++++++++++++++++------------------------- main.cpp | 7 +++ tensor.hpp | 10 +-- 4 files changed, 152 insertions(+), 144 deletions(-) diff --git a/MathOps.hpp b/MathOps.hpp index bfc4534b..38b03e88 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -5,12 +5,13 @@ #include "quantization_utils.hpp" #include "tensor.hpp" -void CalculateUsedRange(Tensor& input, int32_t* used_min_quan, +template +void CalculateUsedRange(Tensor* input, int32_t* used_min_quan, int32_t* used_max_quan) { int32_t minimum = INT_MAX; int32_t maxmum = INT_MIN; - uint32_t size = input.getSize(); - int* in_ptr = input.getPointer({}); + uint32_t size = input->getSize(); + T1* in_ptr = input->read(0, 0); for (uint32_t i = 0; i < size; i++) { if (minimum > in_ptr[i]) minimum = static_cast(in_ptr[i]); @@ -22,83 +23,83 @@ void CalculateUsedRange(Tensor& input, int32_t* used_min_quan, *used_max_quan = maxmum; } template -void Requantization_Range(Tensor input, Tensor min, Tensor max, - Tensor out_min, Tensor out_max) { - const float input_min = *(min.getPointer({})); - const float input_max = *(max.getPointer({})); +void Requantization_Range(Tensor* input, Tensor* min, Tensor* max, + Tensor* out_min, Tensor* out_max) { + const float input_min = *(min->read(0, 0)); + const float input_max = *(max->read(0, 0)); int32_t used_min_quan; int32_t used_max_quan; - CalculateUsedRange(input, &used_min_quan, &used_max_quan); + CalculateUsedRange(input, &used_min_quan, &used_max_quan); const float used_min = std::min(0.0f, QuantizedToFloat(used_min_quan, input_min, input_max)); const float used_max = QuantizedToFloat(used_max_quan, input_min, input_max); - float* c_min = out_min.getPointer({}); + float* c_min = out_min->write(0, 0); *c_min = used_min; - float* c_max = out_max.getPointer({}); + float* c_max = out_max->write(0, 0); *c_max = used_max; } template -void Requantize(Tensor input, Tensor in_min, Tensor in_max, - Tensor r_min, Tensor r_max, Tensor output, - Tensor out_min, Tensor out_max) { - const float input_min = in_min.getPointer({})[0]; - const float input_max = in_max.getPointer({})[0]; - const float r_output_min = r_min.getPointer({})[0]; - const float r_output_max = r_max.getPointer({})[0]; - T1 *input_ptr = input.getPointer({}); - Toutput *out_ptr = output.getPointer({}); +void Requantize(Tensor* input, Tensor* in_min, Tensor* in_max, + Tensor* r_min, Tensor* r_max, Tensor* output, + Tensor* out_min, Tensor* out_max) { + const float input_min = in_min->read(0, 0)[0]; + const float input_max = in_max->read(0, 0)[0]; + const float r_output_min = r_min->read(0, 0)[0]; + const float r_output_max = r_max->read(0, 0)[0]; + T1 *input_ptr = input->read(0, 0); + Toutput *out_ptr = output->write(0, 0); // RequantizeManyInNewRange(input, input.getSize(), input_min, // input_max, r_output_min, r_output_max, // output); - RequantizeManyInNewRangeReference(input_ptr, input.getSize(),input_min, + RequantizeManyInNewRangeReference(input_ptr, input->getSize(),input_min, input_max, r_output_min, r_output_max, out_ptr); - float* v_out_min = out_min.getPointer({}); + float* v_out_min = out_min->write(0, 0); *v_out_min = r_output_min; - float* v_out_max = out_max.getPointer({}); + float* v_out_max = out_max->write(0, 0); *v_out_max = r_output_max; } template -void Add(Tensor input, Tensor input2, Tensor &out) { - const TIn* p_in = input.getPointer({}); - const TIn* p_in2 = input2.getPointer({}); +void Add(Tensor* input, Tensor* input2, Tensor* out) { + const TIn* p_in = input->read(0, 0); + const TIn* p_in2 = input2->read(0, 0); //auto shape - tensorChkAlloc(out, input.getShape()); + tensorChkAlloc(&out, input->getShape()); - TOut* p_out = out.getPointer({}); + TOut* p_out = out->write(0, 0); - const uint32_t size = out.getSize(); + const uint32_t size = out->getSize(); for (uint32_t i = 0; i < size; i++) { p_out[i] = p_in[i] + p_in2[i]; } } template -void Min(Tensor input, Tensor dim, Tensor out) { - const TIn* p_in = input.getPointer({}); - const Td* p_in2 = dim.getPointer({}); - TOut* p_out = out.getPointer({}); +void Min(Tensor* input, Tensor* dim, Tensor* out) { + const TIn* p_in = input->read(0, 0); + const Td* p_in2 = dim->read(0, 0); + TOut* p_out = out->read(0, 0); Td n_dim = p_in2[0]; - vector permute; - for (uint32_t i_dim = 0; i_dim < input.getShape().size(); i_dim++) { + std::vector permute; + for (uint32_t i_dim = 0; i_dim < input->getShape().size(); i_dim++) { permute.push_back(i_dim); } permute.push_back(n_dim); permute.erase(permute.begin() + n_dim); - Shape outShape = input.getShape(); + Shape outShape = input->getShape(); size_t reduce_size = outShape[n_dim]; outShape.erase(outShape.begin() + n_dim); outShape.push_back(reduce_size); size_t out_index = 0; permuteIndexTransform trans(outShape, permute); - for (uint32_t j = 0; j < input.getSize(); j += reduce_size) { + for (uint32_t j = 0; j < input->getSize(); j += reduce_size) { TIn min_val = std::numeric_limits::max(); for (size_t k = 0; k < reduce_size; k++) { TIn val = p_in[trans[j + k]]; @@ -112,25 +113,25 @@ void Min(Tensor input, Tensor dim, Tensor out) { } template -void Max(Tensor input, Tensor dim, Tensor out) { - const TIn* p_in = input.getPointer({}); - const Td* p_in2 = dim.getPointer({}); - TOut* p_out = out.getPointer({}); +void Max(Tensor* input, Tensor* dim, Tensor* out) { + const TIn* p_in = input->read(0, 0); + const Td* p_in2 = dim->read(0, 0); + TOut* p_out = out->read(0, 0); Td n_dim = p_in2[0]; - vector permute; - for (uint32_t i_dim = 0; i_dim < input.getShape().size(); i_dim++) { + std::vector permute; + for (uint32_t i_dim = 0; i_dim < input->getShape().size(); i_dim++) { permute.push_back(i_dim); } permute.push_back(n_dim); permute.erase(permute.begin() + n_dim); - Shape outShape = input.getShape(); + Shape outShape = input->getShape(); size_t reduce_size = outShape[n_dim]; outShape.erase(outShape.begin() + n_dim); outShape.push_back(reduce_size); size_t out_index = 0; permuteIndexTransform trans(outShape, permute); - for (uint32_t j = 0; j < input.getSize(); j += reduce_size) { + for (uint32_t j = 0; j < input->getSize(); j += reduce_size) { TIn max_val = std::numeric_limits::lowest(); for (size_t k = 0; k < reduce_size; k++) { TIn val = p_in[trans[j + k]]; @@ -144,28 +145,28 @@ void Max(Tensor input, Tensor dim, Tensor out) { } template -void ArgMax(Tensor input, Tensor dim, Tensor& out) { - int dim_reduce = *(dim.getPointer({0})); - Shape outShape = input.getShape(); +void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { + int dim_reduce = *(dim->read(0, 0)); + Shape outShape = input->getShape(); uint32_t reduce_dim_size = outShape[dim_reduce]; outShape.erase(outShape.begin() + dim_reduce); // construct the permute vector vector permute; - for (uint8_t i = 0; i < input.getShape().size(); i++) { + for (uint8_t i = 0; i < input->getShape().size(); i++) { permute.push_back(i); } permute.push_back(static_cast(dim_reduce)); permute.erase(permute.begin() + dim_reduce); // check dimensionality - if (out.getSize() != 0 && out.getShape() != outShape) { + if ((*out)->getSize() != 0 && (*out)->getShape() != outShape) { ERR_EXIT("output shape mismatch"); } // allocate output tensor if empty - if (out.getSize() == 0) { - out = Tensor(outShape); + if ((*out)->getSize() == 0) { + *out = new RamTensor(outShape); } // construct the origin-shape for permuteIndexTransform @@ -177,12 +178,12 @@ void ArgMax(Tensor input, Tensor dim, Tensor& out) { // In this case, we are going backward. permuteIndexTransform trans(vOutShape, permute); - TIn* inPtr = input.getPointer({}); - TOut* outPtr = out.getPointer({}); + TIn* inPtr = input->read(0, 0); + TOut* outPtr = (*out)->write(0, 0); size_t out_index = 0; - for (uint32_t i = 0; i < input.getSize(); i += reduce_dim_size) { + for (uint32_t i = 0; i < input->getSize(); i += reduce_dim_size) { TOut max_j = 0; TIn last_max = std::numeric_limits::min(); for (uint32_t j = 0; j < reduce_dim_size; j++) { diff --git a/MathTests.hpp b/MathTests.hpp index a969e998..52e6ce55 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -12,30 +12,30 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = + Tensor* a = t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx"); - Tensor a_min = + Tensor* a_min = t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx"); - Tensor a_max = + Tensor* a_max = t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx"); // reference outputs - Tensor ref_min = + Tensor* ref_min = t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx"); - Tensor ref_max = + Tensor* ref_max = t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx"); // Implementation goes here // modify the checks below: - Tensor out_min(ref_min.getShape()); - Tensor out_max(ref_max.getShape()); + Tensor* out_min = new RamTensor(ref_min->getShape()); + Tensor* out_max = new RamTensor(ref_max->getShape()); timer_start(); Requantization_Range(a, a_min, a_max, out_min, out_max); timer_stop(); double result = - meanPercentErr(ref_min, out_min) + meanPercentErr(ref_max, out_max); + meanPercentErr(ref_min, out_min) + meanPercentErr(ref_max, out_max); // passed(result < 0.0001); passed(result == 0); } @@ -45,29 +45,29 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx"); - Tensor a_min = + Tensor* a = t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx"); + Tensor* a_min = t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx"); - Tensor a_max = + Tensor* a_max = t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx"); - Tensor r_a_min = + Tensor* r_a_min = t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx"); - Tensor r_a_max = + Tensor* r_a_max = t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx"); // tf.quint8 // reference outputs - Tensor ref_a_q = + Tensor* ref_a_q = t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx"); - Tensor ref_a_min = + Tensor* ref_a_min = t_import.float_import("/fs/testData/rQ/out/rQ_1.idx"); - Tensor ref_a_max = + Tensor* ref_a_max = t_import.float_import("/fs/testData/rQ/out/rQ_2.idx"); // modify the checks below: - Tensor a_q(ref_a_q.getShape()); - Tensor a_min_q(ref_a_min.getShape()); - Tensor a_max_q(ref_a_max.getShape()); + Tensor* a_q = new RamTensor(ref_a_q->getShape()); + Tensor* a_min_q = new RamTensor(ref_a_min->getShape()); + Tensor* a_max_q = new RamTensor(ref_a_max->getShape()); // Implementation goes here timer_start(); @@ -75,9 +75,9 @@ class MathOpsTest : public Test { a_q, a_min_q, a_max_q); timer_stop(); - double result = meanPercentErr(ref_a_q, a_q) + - meanPercentErr(ref_a_min, a_min_q) + - meanPercentErr(ref_a_max, a_max_q); + double result = meanPercentErr(ref_a_q, a_q) + + meanPercentErr(ref_a_min, a_min_q) + + meanPercentErr(ref_a_max, a_max_q); // passed(result < 0.0001); passed(result == 0); } @@ -87,30 +87,30 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx"); - Tensor a_min = + Tensor* a = t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx"); + Tensor* a_min = t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx"); - Tensor a_max = + Tensor* a_max = t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx"); - Tensor r_a_min = + Tensor* r_a_min = t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx"); - Tensor r_a_max = + Tensor* r_a_max = t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx"); // tf.quint8 // reference outputs - Tensor ref_a_q = + Tensor* ref_a_q = t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx"); - Tensor ref_a_min = + Tensor* ref_a_min = t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx"); - Tensor ref_a_max = + Tensor* ref_a_max = t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx"); // modify the checks below: - Tensor a_q(ref_a_q.getShape()); - Tensor a_min_q(ref_a_min.getShape()); - Tensor a_max_q(ref_a_max.getShape()); + Tensor* a_q = new RamTensor(ref_a_q->getShape()); + Tensor* a_min_q = new RamTensor(ref_a_min->getShape()); + Tensor* a_max_q = new RamTensor(ref_a_max->getShape()); // Implementation goes here timer_start(); @@ -119,11 +119,11 @@ class MathOpsTest : public Test { timer_stop(); double result; - if((result = meanPercentErr(ref_a_q, a_q)) != 0) { + if((result = meanPercentErr(ref_a_q, a_q)) != 0) { printf("Requantize a_q failed (%.6f)\r\n", result); - unsigned char* ref_ptr = ref_a_q.getPointer({}); - unsigned char* test_ptr = a_q.getPointer({}); - for(uint32_t i = 0; i < ref_a_q.getSize(); i++) { + unsigned char* ref_ptr = ref_a_q->read(0, 0); + unsigned char* test_ptr = a_q->read(0, 0); + for(uint32_t i = 0; i < ref_a_q->getSize(); i++) { if(ref_ptr[i] != test_ptr[i]) { printf("%lu: %d != %d\r\n", i, ref_ptr[i], test_ptr[i]); } else { @@ -133,13 +133,13 @@ class MathOpsTest : public Test { } - if((result = meanPercentErr(ref_a_min, a_min_q)) != 0) printf("Requantize a_min_q failed (%.6f)\r\n", result); + if((result = meanPercentErr(ref_a_min, a_min_q)) != 0) printf("Requantize a_min_q failed (%.6f)\r\n", result); - if((result = meanPercentErr(ref_a_max, a_max_q)) != 0) printf("Requantize a_max_q failed (%.6f)\r\n", result); + if((result = meanPercentErr(ref_a_max, a_max_q)) != 0) printf("Requantize a_max_q failed (%.6f)\r\n", result); - result = meanPercentErr(ref_a_q, a_q) + - meanPercentErr(ref_a_min, a_min_q) + - meanPercentErr(ref_a_max, a_max_q); + result = meanPercentErr(ref_a_q, a_q) + + meanPercentErr(ref_a_min, a_min_q) + + meanPercentErr(ref_a_max, a_max_q); // passed(result < 0.0001); passed(result == 0); } @@ -149,26 +149,26 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor ref_a = t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx"); - Tensor ref_dim = t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx"); + Tensor* ref_a = t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx"); + Tensor* ref_dim = t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx"); // reference outputs /// NT: FIXME: argmax outputs int64 tensor which isn't supported by /// int_import. - Tensor ref_out = t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx"); + Tensor* ref_out = t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx"); // Implementation goes here // modify the checks below: - Tensor out(ref_out.getShape()); + Tensor* out = new RamTensor(ref_out->getShape()); timer_start(); - ArgMax(ref_a, ref_dim, out); + ArgMax(ref_a, ref_dim, &out); timer_stop(); - Tensor out_float = TensorCast(out); + Tensor* out_float = TensorCast(out); - double result = meanPercentErr(ref_out, out_float); + double result = meanPercentErr(ref_out, out_float); // passed(result < 0.0001); passed(result == 0); @@ -176,29 +176,29 @@ class MathOpsTest : public Test { void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here testStart("argmax2"); - Tensor test_input = TensorConstant({10, 5}, 0.0f); - *(test_input.getPointer({5,0})) = 1.0f; - *(test_input.getPointer({5,1})) = 1.0f; - *(test_input.getPointer({1,2})) = 1.0f; - *(test_input.getPointer({9,3})) = 1.0f; - *(test_input.getPointer({2,4})) = 1.0f; - - Tensor test_dim({1}); - *(test_dim.getPointer({0})) = 0; - - Tensor test_out_ref({5}); - *(test_out_ref.getPointer({0})) = 5.0f; - *(test_out_ref.getPointer({1})) = 5.0f; - *(test_out_ref.getPointer({2})) = 1.0f; - *(test_out_ref.getPointer({3})) = 9.0f; - *(test_out_ref.getPointer({4})) = 2.0f; - - Tensor test_out(test_out_ref.getShape()); + Tensor* test_input = TensorConstant({10, 5}, 0.0f); + *(test_input->read(25, 0)) = 1.0f; + *(test_input->read(26, 0)) = 1.0f; + *(test_input->read(7, 0)) = 1.0f; + *(test_input->read(48, 0)) = 1.0f; + *(test_input->read(14, 0)) = 1.0f; + + Tensor* test_dim = new RamTensor({1}); + *(test_dim->read(0, 0)) = 0; + + Tensor* test_out_ref = new RamTensor({5}); + *(test_out_ref->read(0, 0)) = 5.0f; + *(test_out_ref->read(1, 0)) = 5.0f; + *(test_out_ref->read(2, 0)) = 1.0f; + *(test_out_ref->read(3, 0)) = 9.0f; + *(test_out_ref->read(4, 0)) = 2.0f; + + Tensor* test_out = new RamTensor(test_out_ref->getShape()); timer_start(); - ArgMax(test_input, test_dim, test_out); + ArgMax(test_input, test_dim, &test_out); timer_stop(); - double result = meanPercentErr(test_out_ref, test_out); + double result = meanPercentErr(test_out_ref, test_out); // passed(result < 0.0001); passed(result == 0); } @@ -208,24 +208,24 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = + Tensor* a = t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx"); - Tensor b = + Tensor* b = t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx"); // reference outputs - Tensor ref_out = + Tensor* ref_out = t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx"); // Implementation goes here // modify the checks below: - Tensor out(ref_out.getShape()); + Tensor* out = new RamTensor(ref_out->getShape()); timer_start(); Add(a, b, out); timer_stop(); - double result = meanPercentErr(ref_out, out); + double result = meanPercentErr(ref_out, out); // passed(result < 0.0001); passed(result == 0); } @@ -235,24 +235,24 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = + Tensor* a = t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx"); - Tensor dim = + Tensor* dim = t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx"); // reference outputs - Tensor ref_out = + Tensor* ref_out = t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx"); // Implementation goes here // modify the checks below: - Tensor out(ref_out.getShape()); + Tensor* out = new RamTensor(ref_out->getShape()); timer_start(); Min(a, dim, out); timer_stop(); - double result = meanPercentErr(ref_out, out); + double result = meanPercentErr(ref_out, out); // passed(result < 0.0001); passed(result == 0); } @@ -262,31 +262,31 @@ class MathOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = + Tensor* a = t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx"); - Tensor dim = + Tensor* dim = t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx"); // reference outputs - Tensor ref_out = + Tensor* ref_out = t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx"); // Implementation goes here // modify the checks below: - Tensor out(ref_out.getShape()); + Tensor *out = new RamTensor(ref_out->getShape()); timer_start(); Max(a, dim, out); timer_stop(); - double result = meanPercentErr(ref_out, out); + double result = meanPercentErr(ref_out, out); // passed(result < 0.0001); passed(result == 0); } void runAll(void) { argmaxTest(); - argmaxTest2(); +// argmaxTest2(); requantization_rangeTest(); requantizeTest(); requantizeTest2(); diff --git a/main.cpp b/main.cpp index fb9e4664..6522eaf2 100644 --- a/main.cpp +++ b/main.cpp @@ -8,6 +8,7 @@ #include "context.hpp" #include "MatrixTests.hpp" #include "ArrayTests.hpp" +#include "MathTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -41,6 +42,12 @@ int main(int argc, char** argv) { printf("Array: \r\n"); arrayTests.printSummary(); + printf("Math: \r\n"); + MathOpsTest mathTests; + mathTests.runAll(); + printf("Math result...\r\n"); + mathTests.printSummary(); + Context ctx; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensor.hpp b/tensor.hpp index e349c38d..4c0e273b 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -205,7 +205,7 @@ template Tensor* TensorCast(Tensor* input) { Tensor* output = new RamTensor(input->getShape()); Tin* inputPrt = input->read(0, 0); - Tout* outputPrt = output->read({}); + Tout* outputPrt = output->read(0, 0); for (uint32_t i = 0; i < input->getSize(); i++) { outputPrt[i] = static_cast(inputPrt[i]); @@ -338,10 +338,10 @@ void printDim(Tensor* t) { } template -void tensorChkAlloc(Tensor* t, Shape dim) { - if (t->getSize() == 0) { - t = new RamTensor(dim); - } else if (t->getShape() != dim) { +void tensorChkAlloc(Tensor** t, Shape dim) { + if ((*t)->getSize() == 0) { + *t = new RamTensor(dim); + } else if ((*t)->getShape() != dim) { ERR_EXIT("Dim mismatched...\r\n"); } } From e804b92cbe946212ad7dd6be2f9dc722b73d29fc Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 4 Nov 2017 17:20:27 +0800 Subject: [PATCH 28/80] delete the tensor in testcase for avoiding runing out of memory --- tensorIdxImporterTests.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorIdxImporterTests.hpp b/tensorIdxImporterTests.hpp index 331ad0eb..cf052073 100644 --- a/tensorIdxImporterTests.hpp +++ b/tensorIdxImporterTests.hpp @@ -24,6 +24,7 @@ class idxImporterTest : public Test { timer_stop(); double result = sum(t); passed(result == 4518); + delete t; } void shortTest(void) { @@ -35,6 +36,7 @@ class idxImporterTest : public Test { timer_stop(); double result = sum(t); passed(result == 270250); + delete t; } void intTest(void) { @@ -46,6 +48,7 @@ class idxImporterTest : public Test { timer_stop(); double result = sum(t); passed(result == 5748992600); + delete t; } void floatTest(void) { @@ -60,6 +63,7 @@ class idxImporterTest : public Test { DEBUG("***floating point test yielded: %.8e\r\n", (float)result); passed((float)result == -1.0f); + delete t; } void runAll(void) { From 25b40a33d50cbef74233bb076d8fdb52dffc15c6 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 4 Nov 2017 21:54:19 +0800 Subject: [PATCH 29/80] 1. refactor NnOps to use new version tensor --- NnOps.hpp | 18 +++++++++--------- NnTests.hpp | 33 +++++++++++++++++++++------------ main.cpp | 7 +++++++ 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/NnOps.hpp b/NnOps.hpp index 717625e3..ad45f15a 100644 --- a/NnOps.hpp +++ b/NnOps.hpp @@ -5,25 +5,25 @@ #include "tensor.hpp" template -void Relu(Tensor input, Tensor in_min, Tensor in_max, - Tensor output, Tensor out_min, Tensor out_max) { - const float input_min = in_min.getPointer({})[0]; - const float input_max = in_max.getPointer({})[0]; - TIn* in = input.getPointer({}); +void Relu(Tensor* input, Tensor* in_min, Tensor* in_max, + Tensor* output, Tensor* out_min, Tensor* out_max) { + const float input_min = in_min->read(0, 0)[0]; + const float input_max = in_max->read(0, 0)[0]; + TIn* in = input->read(0, 0); const TOut min_as_quantized = FloatToQuantized(0.0f, input_min, input_max); - TOut* out = output.getPointer({}); - for (uint32_t i = 0; i < output.getSize(); i++) { + TOut* out = output->write(0, 0); + for (uint32_t i = 0; i < output->getSize(); i++) { if (in[i] > min_as_quantized) { out[i] = in[i]; } else { out[i] = min_as_quantized; } } - T2* v_out_min = out_min.getPointer({}); + T2* v_out_min = out_min->write(0, 0); *v_out_min = input_min; - T2* v_out_max = out_max.getPointer({}); + T2* v_out_max = out_max->write(0, 0); *v_out_max = input_max; } #endif // UTENSOR_NN_OPS diff --git a/NnTests.hpp b/NnTests.hpp index 2f330366..d3ee3833 100644 --- a/NnTests.hpp +++ b/NnTests.hpp @@ -12,36 +12,45 @@ class NnOpsTest : public Test { TensorIdxImporter t_import; // reference inputs - Tensor a = + Tensor* a = t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx"); - Tensor min = + Tensor* min = t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx"); - Tensor max = + Tensor* max = t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx"); // reference outputs - Tensor ref_out = + Tensor* ref_out = t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx"); - Tensor ref_min = + Tensor* ref_min = t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx"); - Tensor ref_max = + Tensor* ref_max = t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx"); // modify the checks below: - Tensor out(ref_out.getShape()); - Tensor out_min(ref_out.getShape()); - Tensor out_max(ref_out.getShape()); + Tensor* out = new RamTensor(ref_out->getShape()); + Tensor* out_min = new RamTensor(ref_min->getShape()); + Tensor* out_max = new RamTensor(ref_max->getShape()); timer_start(); Relu(a, min, max, out, out_min, out_max); timer_stop(); - double result = meanPercentErr(ref_out, out) + - meanPercentErr(ref_min, out_min) + - meanPercentErr(ref_max, out_max); + double result = meanPercentErr(ref_out, out) + + meanPercentErr(ref_min, out_min) + + meanPercentErr(ref_max, out_max); // passed(result < 0.0001); passed(result == 0); + delete a; + delete min; + delete max; + delete ref_out; + delete ref_min; + delete ref_max; + delete out; + delete out_min; + delete out_max; } void runAll(void) { reluTest(); } diff --git a/main.cpp b/main.cpp index 6522eaf2..4cc2f78a 100644 --- a/main.cpp +++ b/main.cpp @@ -9,6 +9,7 @@ #include "MatrixTests.hpp" #include "ArrayTests.hpp" #include "MathTests.hpp" +#include "NnTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -48,6 +49,12 @@ int main(int argc, char** argv) { printf("Math result...\r\n"); mathTests.printSummary(); + printf("NnOpS: \r\n"); + NnOpsTest nnTest; + nnTest.runAll(); + printf("Nn Ops result...\r\n"); + nnTest.printSummary(); + Context ctx; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) From 6f4684450df036eebb8544321c9ce001e77ba98b Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 4 Nov 2017 22:21:45 +0800 Subject: [PATCH 30/80] 1. make tensor_test pass for new tensor --- main.cpp | 6 ++++++ tensor_test.hpp | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/main.cpp b/main.cpp index 4cc2f78a..cee1b7b0 100644 --- a/main.cpp +++ b/main.cpp @@ -10,6 +10,7 @@ #include "ArrayTests.hpp" #include "MathTests.hpp" #include "NnTests.hpp" +#include "tensor_test.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -55,6 +56,11 @@ int main(int argc, char** argv) { printf("Nn Ops result...\r\n"); nnTest.printSummary(); + printf("Trans test: \r\n"); + transTest tTest; + tTest.runAll(); + printf("Trans result...\r\n"); + tTest.printSummary(); Context ctx; //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensor_test.hpp b/tensor_test.hpp index 0556ed88..f65370f1 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -16,15 +16,15 @@ class transTest : public Test { testStart("transtest"); std::default_random_engine gen; vector tmp({2, 3, 4, 5}); - Tensor inputTensor(tmp); + Tensor* inputTensor = new RamTensor(tmp); vector permute = {2, 3, 1, 0}; - vector g = inputTensor.getShape(); + vector g = inputTensor->getShape(); std::shuffle(permute.begin(), permute.end(), gen); - permuteIndexTransform trans(inputTensor.getShape(), permute); + permuteIndexTransform trans(inputTensor->getShape(), permute); - Tensor output(trans.getNewShape()); - vector s = output.getShape(); + Tensor* output = new RamTensor(trans.getNewShape()); + vector s = output->getShape(); res = testshape(g, s, permute); if (!res) { passed(res); @@ -40,10 +40,10 @@ class transTest : public Test { vector output_1({2, 2, 3, 5, 6, 6, 4, 5, 7, 5, 1, 9, 1, 3, 2, 2, 5, 3, 3, 6, 3, 4, 9, 2}); - Tensor inputTensor({2, 3, 4}); + Tensor* inputTensor = new RamTensor({2, 3, 4}); vector permute = {0, 2, 1}; - permuteIndexTransform trans(inputTensor.getShape(), permute); + permuteIndexTransform trans(inputTensor->getShape(), permute); size_t out_index = 0; bool res = false; @@ -66,9 +66,9 @@ class transTest : public Test { vector output_2({2, 1, 2, 3, 3, 2, 5, 2, 6, 5, 6, 3, 4, 3, 5, 6, 7, 3, 5, 4, 1, 9, 9, 2}); - Tensor inputTensor2({2, 4, 3}); + Tensor* inputTensor2 = new RamTensor({2, 4, 3}); vector permute2 = {1, 2, 0}; - permuteIndexTransform trans2(inputTensor2.getShape(), permute2); + permuteIndexTransform trans2(inputTensor2->getShape(), permute2); for (uint32_t i = 0; i < input_2.size(); i++) { testStart("test vec 2 for transform"); out_index = trans2[i]; @@ -86,9 +86,9 @@ class transTest : public Test { vector output_3({8, 2, 8, 1, 0, 3, 4, 6, 2, 6, 0, 6, 3, 9, 2, 7, 0, 7, 0, 4, 8, 9, 0, 4, 3, 6, 8}); - Tensor inputTensor3({1, 3, 3, 3}); + Tensor* inputTensor3 = new RamTensor({1, 3, 3, 3}); vector permute3 = {0, 3, 2, 1}; - permuteIndexTransform trans3(inputTensor3.getShape(), permute3); + permuteIndexTransform trans3(inputTensor3->getShape(), permute3); for (uint32_t i = 0; i < input_3.size(); i++) { testStart("test vec 4d for transform"); out_index = trans3[i]; From 7a3c7fa1fda3b0225b75cb8fa9998cf01f364847 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 5 Nov 2017 02:05:11 +0800 Subject: [PATCH 31/80] QntMatMal Context test --- MatrixOps.hpp | 13 ++++++ context.hpp | 13 ++++-- context_test.hpp | 70 ++++++++++++++++++++++++++++++++ main.cpp | 12 ++++-- tensor.hpp | 101 ++++++++++++++++++++++++++--------------------- uTensorBase.hpp | 53 +++++++++++-------------- 6 files changed, 179 insertions(+), 83 deletions(-) create mode 100644 context_test.hpp diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 52853b1d..9be6ee19 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -158,4 +158,17 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor* C, *c_max = max_c_value; } +class QntMatMulOp : public Operator{ +public: + QntMatMulOp() { + n_inputs = 6; + n_outputs = 3; + } + virtual void compute() override { + QuantizedMatMul(inputs[0], inputs[3], + outputs[0], inputs[1], inputs[4], inputs[2], inputs[5], + outputs[1], outputs[2]); + } +}; + #endif diff --git a/context.hpp b/context.hpp index 0b9f93bc..35abd018 100644 --- a/context.hpp +++ b/context.hpp @@ -30,7 +30,7 @@ class Context : public uTensor { public: void push(Operator *op, TList &_inputs, TList &_outputs); - int run(void); + int eval(void); Context() { del_onsight = true; @@ -39,10 +39,10 @@ class Context : public uTensor { void Context::push(Operator *op, TList &_inputs, TList &_outputs) { - if(op->getInputs().size() != _inputs.size()) { + if(op->getNumInputs() != _inputs.size()) { ERR_EXIT("valid number of inputs\r\n"); } - if(op->getOutputs().size() != _outputs.size()) { + if(op->getNumOutputs() != _outputs.size()) { ERR_EXIT("valid number of output\r\n"); } @@ -83,7 +83,7 @@ void Context::dcrRefCount(TList t_list) { } } -int Context::run(void) { +int Context::eval(void) { //unref2nullTensors(); for(auto op:op_list) { @@ -98,6 +98,11 @@ int Context::run(void) { deinitTensors(op->getOutputs()); dcrRefCount(op->getInputs()); + + op->dcrRef(); + if(op->getRef() < 1 && del_onsight) { + delete op; + } } return 0; diff --git a/context_test.hpp b/context_test.hpp new file mode 100644 index 00000000..c9efb24d --- /dev/null +++ b/context_test.hpp @@ -0,0 +1,70 @@ +#ifndef UTENSOR_CONTEXT_TESTS +#define UTENSOR_CONTEXT_TESTS + +#include "mbed.h" +#include "uTensor_util.hpp" +#include "tensor.hpp" +#include "context.hpp" +#include "tensorIdxImporter.hpp" +#include "MatrixOps.hpp" +#include "test.hpp" + + + +class contextTest : public Test { + + TensorIdxImporter t_import; + +public: + + void MatMalTest(void) { + testStart("Context QntMatMal Op"); + //inputs + Tensor* a = + t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx"); + Tensor* a_min = + t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx"); + Tensor* a_max = + t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx"); + Tensor* b = + t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx"); + Tensor* b_min = + t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx"); + Tensor* b_max = + t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx"); + + // reference outputs + Tensor* c = + t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx"); + Tensor* c_min = + t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx"); + Tensor* c_max = + t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx"); + + + Tensor* out_c = new RamTensor(c->getShape()); + Tensor* out_min = new RamTensor(c_min->getShape()); + Tensor* out_max = new RamTensor(c_max->getShape()); + + TList inputs = {a, a_min, a_max, b, b_min, b_max}; + TList outputs = {out_c, out_min, out_max}; + Operator* matMal = new QntMatMulOp(); + + Context ctx; + timer_start(); + ctx.push(matMal, inputs, outputs); + ctx.eval(); + timer_stop(); + + double result = meanPercentErr(c, out_c) + meanPercentErr(c_min, out_min) + + meanPercentErr(c_max, out_max); + + passed(result == 0); + } + + void runAll(void) { + MatMalTest(); + } +}; + +#endif // UTENSOR_IDX_IMPORTER_TESTS \ No newline at end of file diff --git a/main.cpp b/main.cpp index cee1b7b0..84ddce4b 100644 --- a/main.cpp +++ b/main.cpp @@ -11,6 +11,7 @@ #include "MathTests.hpp" #include "NnTests.hpp" #include "tensor_test.hpp" +#include "context_test.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -56,12 +57,17 @@ int main(int argc, char** argv) { printf("Nn Ops result...\r\n"); nnTest.printSummary(); - printf("Trans test: \r\n"); + printf("Transformation test: \r\n"); transTest tTest; tTest.runAll(); - printf("Trans result...\r\n"); + printf("Transformation result...\r\n"); tTest.printSummary(); - Context ctx; + + printf("Context test: \r\n"); + contextTest ctxTest; + ctxTest.runAll(); + printf("Context result...\r\n"); + ctxTest.printSummary(); //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensor.hpp b/tensor.hpp index 4c0e273b..837ceb5d 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -8,22 +8,66 @@ #include "stdlib.h" #include "uTensor_util.hpp" -enum class DType : char { - uint8, - int8, - uint16, - int32, - flt, - dbl, -}; +// enum class DType : char { +// uint8, +// int8, +// uint16, +// int32, +// flt, +// dbl, +// }; class uTensor { - public: - virtual void inFocus(){}; - virtual void deFocus(){}; - virtual ~uTensor() = 0; +protected: + uint16_t ref_count; + bool static_ref_flag; //to support compile-time ref count +public: + uTensor() { + ref_count = 0; + static_ref_flag = false; + } + virtual void inFocus(){}; + virtual void deFocus(){}; + uint16_t incrRef(); + uint16_t dcrRef(); + uint16_t getRef(); + void setStaticRef(uint16_t c); + bool is_static_ref(void); + virtual ~uTensor() = 0; + }; +uint16_t uTensor::incrRef() { + if(!static_ref_flag) { + ref_count += 1; + } + + return ref_count; +} + +uint16_t uTensor::dcrRef() { + ref_count -= 1; + return ref_count; +} + +uint16_t uTensor::getRef() { + return ref_count; +} + +bool uTensor::is_static_ref(void) { + return static_ref_flag; +} + +void uTensor::setStaticRef(uint16_t c) { + if(ref_count == 0) { + ref_count = c; + static_ref_flag = true; + } else { + ERR_EXIT("None-zero ref_count"); + } +} + + uTensor::~uTensor() {} class TensorBase { @@ -31,9 +75,6 @@ class TensorBase { std::vector shape; void* data; uint32_t total_size; - DType dtype; - uint16_t ref_count; - bool allow_runtime_ref_inc; //to support compile-time ref count ~TensorBase() { if (data != nullptr) { @@ -80,8 +121,6 @@ class Tensor : public uTensor { if (s->data == NULL) ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); - s->ref_count = 0; - s->allow_runtime_ref_inc = false; } std::vector getShape(void) { return s->shape; } @@ -105,31 +144,6 @@ class Tensor : public uTensor { return (T*)write(offset, ele); } - DType getDType(void) { - return s->dtype; - } - - uint16_t incrRef() { - if(s->allow_runtime_ref_inc) { - s->ref_count += 1; - } - - return s->ref_count; - } - - uint16_t dcrRef() { - s->ref_count -= 1; - return s->ref_count; - } - - uint16_t getRef() { - return s->ref_count; - } - - bool is_ref_runtime(void) { - return s->allow_runtime_ref_inc; - } - ~Tensor() { s = nullptr; DEBUG("Tensor Destructed\r\n"); @@ -140,9 +154,6 @@ template class RamTensor : public Tensor { // need deep copy public: - RamTensor() : Tensor() { - //dtype = something... - } RamTensor(std::initializer_list l) : Tensor() { std::vector v; diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 82c552d9..f8c27bd2 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -14,44 +14,35 @@ class Operator : public uTensor{ //setup input/output info in derived constructors //ref count? TList inputs; - vector dtype_in; + uint8_t n_inputs; TList outputs; - vector dtype_out; + uint8_t n_outputs; + public: virtual void compute() = 0; - - void setInputs(TList &_inputs) { - if(_inputs.size() != inputs.size()) ERR_EXIT("Input Tensor list mismatched..."); - - for(uint8_t i = 0; i < inputs.size(); i++) { - if(dtype_in[i] != inputs[i]->getDType()) { - ERR_EXIT("Tensor Type mismatched..."); - } - - inputs[i] = _inputs[i]; - } + void setInputs(TList &_inputs); + void setOutputs(TList &_outputs); + TList getInputs(void) { return inputs; } + TList getOutputs(void) { return outputs;} + uint8_t getNumInputs(void) { return n_inputs; } + uint8_t getNumOutputs(void) { return n_outputs; } + + Operator() { + n_inputs = 0; //overridden by constructor + n_outputs = 0; } +}; - void setOutputs(TList &_outputs) { - if(_outputs.size() != outputs.size()) ERR_EXIT("Input Tensor list mismatched..."); - - for(uint8_t i = 0; i < outputs.size(); i++) { - if(dtype_out[i] != outputs[i]->getDType()) { - ERR_EXIT("Tensor Type mismatched..."); - } - - outputs[i] = _outputs[i]; - } - } +void Operator::setInputs(TList &_inputs) { + if(_inputs.size() != n_inputs) ERR_EXIT("Input Tensor list mismatched..."); - TList getInputs(void) { - return inputs; - } + inputs = _inputs; +} - TList getOutputs(void) { - return outputs; - } -}; +void Operator::setOutputs(TList &_outputs) { + if(_outputs.size() != n_outputs) ERR_EXIT("Input Tensor list mismatched..."); + outputs = _outputs; +} #endif //UTENSOR_BASE_H From 12f8c937c97bea6b56ad36406763b37f1ac6fe52 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 16:54:34 +0800 Subject: [PATCH 32/80] pass the mlp test 1. change interface of reshape (from Tensor * to Tensor**) --- ArrayOps.hpp | 10 +-- ArrayTests.hpp | 2 +- main.cpp | 12 ++- mlp_test.hpp | 234 +++++++++++++++++++++++++++---------------------- 4 files changed, 144 insertions(+), 114 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index e4b5bc70..86702707 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -94,7 +94,7 @@ void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor* out ///NT: This Op hasn't been tested extensively. We will have to increase the test-coverage for this function. template -void reshape(Tensor* input, Tensor* shape, Tensor* output) { +void reshape(Tensor* input, Tensor* shape, Tensor** output) { Shape dim; //validating and inferring dimensions @@ -125,18 +125,18 @@ void reshape(Tensor* input, Tensor* shape, Tensor* output) { T* input_ptr = input->read(0, 0); //check if the output dim is valid - if(output->getSize() > 0 && dim == output->getShape()) { + if(*output && (*output)->getSize() > 0 && dim == (*output)->getShape()) { //copy - T* output_ptr = output->read(0, 0); + T* output_ptr = (*output)->read(0, 0); std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); - } else if(output->getSize() > 0 && dim != output->getShape()) { + } else if(*output && (*output)->getSize() > 0 && dim != (*output)->getShape()) { ERR_EXIT("output tensor dimension mismatches supplied shape") } else { //construct a new tensor and copy Tensor* tmp = new RamTensor(dim); T* output_ptr = tmp->write(0, 0); std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); - output = tmp; + *output = tmp; } } diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 08b2b932..3adb9810 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -79,7 +79,7 @@ class ArrayOpsTest : public Test { Tensor* out = new RamTensor(out_ref->getShape()); timer_start(); - reshape(ref_a, ref_dim, out); + reshape(ref_a, ref_dim, &out); timer_stop(); double result = meanPercentErr(out_ref, out); diff --git a/main.cpp b/main.cpp index 84ddce4b..f180f080 100644 --- a/main.cpp +++ b/main.cpp @@ -12,6 +12,7 @@ #include "NnTests.hpp" #include "tensor_test.hpp" #include "context_test.hpp" +#include "mlp_test.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -28,7 +29,7 @@ int main(int argc, char** argv) { // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); // printf("prediction: %d\r\n", prediction); - idxImporterTest idxTest; +/* idxImporterTest idxTest; idxTest.runAll(); @@ -67,7 +68,14 @@ int main(int argc, char** argv) { contextTest ctxTest; ctxTest.runAll(); printf("Context result...\r\n"); - ctxTest.printSummary(); + ctxTest.printSummary();*/ + + + printf("mlp test: \r\n"); + mlpTest mlpt; + mlpt.runAll(); + printf("mlp result...\r\n"); + mlpt.printSummary(); //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/mlp_test.hpp b/mlp_test.hpp index 1f38feb6..d9db7945 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -17,50 +17,58 @@ class mlpTest : public Test { //reshape //input - Tensor mnist_input = t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx"); - Tensor reshape_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx"); + Tensor* mnist_input = t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx"); + Tensor* reshape_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx"); //output - Tensor reshape_out; - reshape(mnist_input, reshape_dim, reshape_out); - mnist_input.~Tensor(); - reshape_dim.~Tensor(); + Tensor* reshape_out = nullptr; + reshape(mnist_input, reshape_dim, &reshape_out); + delete mnist_input; + delete reshape_dim; //min //input - Tensor min_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx"); + Tensor* min_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx"); //output - Tensor min_out({1}); - Min(reshape_out, min_reduce_dim, min_out); - min_reduce_dim.~Tensor(); + Tensor* min_out = new RamTensor({1}); + Min(reshape_out, min_reduce_dim, min_out); + delete min_reduce_dim; //max //input - Tensor max_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx"); + Tensor* max_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx"); //output - Tensor max_out({1}); - Max(reshape_out, max_reduce_dim, max_out); - max_reduce_dim.~Tensor(); + Tensor* max_out = new RamTensor({1}); + Max(reshape_out, max_reduce_dim, max_out); + delete max_reduce_dim; //quantization //output - Tensor qnt_out(reshape_out.getShape()); - Tensor qnt_min({1}); - Tensor qnt_max({1}); - QuantizeV2(reshape_out, min_out, max_out, qnt_out, qnt_min, qnt_max); - reshape_out.~Tensor(); + Tensor* qnt_out = new RamTensor(reshape_out->getShape()); + Tensor* qnt_min = new RamTensor({1}); + Tensor* qnt_max = new RamTensor({1}); + QuantizeV2(reshape_out, min_out, max_out, qnt_out, qnt_min, qnt_max); + delete reshape_out; timer_stop(); - Tensor qnt_ref = t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx"); - Tensor qnt_min_ref = t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx"); - Tensor qnt_max_ref = t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx"); + Tensor* qnt_ref = t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx"); + Tensor* qnt_min_ref = t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx"); + Tensor* qnt_max_ref = t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx"); - double result = meanPercentErr(qnt_ref, qnt_out); - result += meanPercentErr(qnt_min_ref, qnt_min); - result += meanPercentErr(qnt_max_ref, qnt_max); + double result = meanPercentErr(qnt_ref, qnt_out); + result += meanPercentErr(qnt_min_ref, qnt_min); + result += meanPercentErr(qnt_max_ref, qnt_max); passed(result == 0); + delete qnt_ref; + delete qnt_min_ref; + delete qnt_max_ref; + delete qnt_out; + delete qnt_min; + delete qnt_max; + delete max_out; + delete min_out; } //quantized matmul dequant add @@ -72,52 +80,52 @@ class mlpTest : public Test { //quantized matrix multiplication //input - Tensor x = + Tensor* x = t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx"); - Tensor x_min = + Tensor* x_min = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx"); - Tensor x_max = + Tensor* x_max = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx"); - Tensor w = + Tensor* w = t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx"); - Tensor w_min = + Tensor* w_min = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx"); - Tensor w_max = + Tensor* w_max = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx"); DEBUG("all QuantizedMatMul input imported...\r\n"); //output - uint32_t out_col = (x.getShape())[0]; - uint32_t out_row = (w.getShape())[1]; - Tensor out_c({out_col, out_row}); + uint32_t out_col = (x->getShape())[0]; + uint32_t out_row = (w->getShape())[1]; + Tensor* out_c = new RamTensor({out_col, out_row}); // printf("x[0] = %d, x[1] = %d, b[0] = %d, b[1] = %d\r\n", (x.getShape())[0], (x.getShape())[1], // (w.getShape())[0], (w.getShape())[1]); // printf("c[0] = %d, c[1] = %d\r\n", (out_c.getShape())[0], (out_c.getShape())[1]); // fflush(stdout); - Tensor matmul_out_min({1}); - Tensor matmul_out_max({1}); + Tensor* matmul_out_min = new RamTensor({1}); + Tensor* matmul_out_max = new RamTensor({1}); QuantizedMatMul(x, w, out_c, x_min, w_min, x_max, w_max, matmul_out_min, matmul_out_max); //clean up - x.~Tensor(); - w.~Tensor(); - x_min.~Tensor(); - w_min.~Tensor(); - x_max.~Tensor(); - w_max.~Tensor(); - - Tensor ref_out_c = + delete x; + delete w; + delete x_min; + delete w_min; + delete x_max; + delete w_max; + + Tensor* ref_out_c = t_import.int_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_0.idx"); - Tensor ref_matmul_out_min = + Tensor* ref_matmul_out_min = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_1.idx"); - Tensor ref_matmul_out_max = + Tensor* ref_matmul_out_max = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_2.idx"); - double temp_result = (meanPercentErr(ref_out_c, out_c) + meanPercentErr(ref_matmul_out_min, matmul_out_min) + meanPercentErr(ref_matmul_out_max, matmul_out_max)); + double temp_result = (meanPercentErr(ref_out_c, out_c) + meanPercentErr(ref_matmul_out_min, matmul_out_min) + meanPercentErr(ref_matmul_out_max, matmul_out_max)); if(temp_result > 0) { DEBUG("matrix mul failed\r\n"); failed(); @@ -125,20 +133,23 @@ class mlpTest : public Test { } else { DEBUG("matrix mul passed\r\n"); } + delete ref_out_c; + delete ref_matmul_out_max; + delete ref_matmul_out_min; DEBUG("QuantizedMatMul completed!\r\n"); //output - Tensor req_out_min({1}); - Tensor req_out_max({1}); + Tensor* req_out_min = new RamTensor({1}); + Tensor* req_out_max = new RamTensor({1}); Requantization_Range(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max); - Tensor ref_req_out_min = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requant_range_0.idx"); - Tensor ref_req_out_max = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requant_range_1.idx"); + Tensor* ref_req_out_min = + t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_0.idx"); + Tensor* ref_req_out_max = + t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_1.idx"); - temp_result = (meanPercentErr(ref_req_out_min, req_out_min) + meanPercentErr(ref_req_out_max, req_out_max)); + temp_result = (meanPercentErr(ref_req_out_min, req_out_min) + meanPercentErr(ref_req_out_max, req_out_max)); if(temp_result > 0) { DEBUG("Requantization_Range failed\r\n"); failed(); @@ -146,29 +157,31 @@ class mlpTest : public Test { } else { DEBUG("Requantization_Range passed\r\n"); } + delete ref_req_out_min; + delete ref_req_out_max; DEBUG("Requantization_Range completed!\r\n"); //output - Tensor reqnt_out(out_c.getShape()); - Tensor reqnt_out_min({1}); - Tensor reqnt_out_max({1}); + Tensor* reqnt_out = new RamTensor(out_c->getShape()); + Tensor* reqnt_out_min = new RamTensor({1}); + Tensor* reqnt_out_max = new RamTensor({1}); Requantize(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max, reqnt_out, reqnt_out_min, reqnt_out_max); //clean up - matmul_out_min.~Tensor(); - matmul_out_max.~Tensor(); - req_out_min.~Tensor(); - req_out_max.~Tensor(); + delete matmul_out_min; + delete matmul_out_max; + delete req_out_min; + delete req_out_max; - Tensor ref_reqnt_out = + Tensor* ref_reqnt_out = t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx"); - Tensor ref_reqnt_out_min = + Tensor* ref_reqnt_out_min = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx"); - Tensor ref_reqnt_out_max = + Tensor* ref_reqnt_out_max = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx"); - temp_result = (meanPercentErr(ref_reqnt_out, reqnt_out) + meanPercentErr(ref_reqnt_out_min, reqnt_out_min) + meanPercentErr(ref_reqnt_out_max, reqnt_out_max)); + temp_result = (meanPercentErr(ref_reqnt_out, reqnt_out) + meanPercentErr(ref_reqnt_out_min, reqnt_out_min) + meanPercentErr(ref_reqnt_out_max, reqnt_out_max)); if(temp_result > 0) { DEBUG("Requantize failed\r\n"); failed(); @@ -176,23 +189,27 @@ class mlpTest : public Test { } else { DEBUG("Requantize passed\r\n"); } + delete ref_reqnt_out; + delete ref_reqnt_out_min; + delete ref_reqnt_out_max; DEBUG("Requantize completed!\r\n"); //output - Tensor deqnt_out(out_c.getShape()); - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, deqnt_out); - out_c.~Tensor(); - reqnt_out_min.~Tensor(); - reqnt_out_max.~Tensor(); - - Tensor ref_deqnt_out = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx"); + Tensor* deqnt_out = new RamTensor(out_c->getShape()); + dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, deqnt_out); + delete out_c; + delete reqnt_out_min; + delete reqnt_out_max; + delete reqnt_out; + + Tensor* ref_deqnt_out = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx"); double temp; - if((temp = meanPercentErr(ref_deqnt_out, deqnt_out)) > 0) { + if((temp = meanPercentErr(ref_deqnt_out, deqnt_out)) > 0) { printf("dequantize failed (%.6f)\r\n", temp); - float* ref_ptr = ref_deqnt_out.getPointer({}); - float* test_ptr = deqnt_out.getPointer({}); - for(uint32_t i; i < ref_deqnt_out.getSize(); i++) { + float* ref_ptr = ref_deqnt_out->read(0, 0); + float* test_ptr = deqnt_out->read(0, 0); + for(uint32_t i; i < ref_deqnt_out->getSize(); i++) { if(ref_ptr[i] != test_ptr[i]) { DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); } else { @@ -204,25 +221,30 @@ class mlpTest : public Test { } else { DEBUG("dequantize passed\r\n"); } + delete ref_deqnt_out; DEBUG("dequantize completed!\r\n"); //input - Tensor bias = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx"); + Tensor* bias = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx"); //output - Tensor output_z(deqnt_out.getShape()); + Tensor* output_z = new RamTensor(deqnt_out->getShape()); Add(deqnt_out, bias, output_z); + delete deqnt_out; DEBUG("Add completed!\r\n"); timer_stop(); //load reference - Tensor ref_z = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx"); + Tensor* ref_z = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx"); - double result = meanPercentErr(ref_z, output_z); + double result = meanPercentErr(ref_z, output_z); passed(result < 0.0001); + delete ref_z; + delete output_z; + delete bias; } @@ -230,56 +252,56 @@ class mlpTest : public Test { testStart("runQntRelu"); - Tensor input_z = t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx"); - Tensor reshape_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx"); - Tensor reshape_out; + Tensor* input_z = t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx"); + Tensor* reshape_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx"); + Tensor* reshape_out = nullptr; timer_start(); - reshape(input_z, reshape_dim, reshape_out); + reshape(input_z, reshape_dim, &reshape_out); //min //input - Tensor min_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx"); + Tensor* min_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx"); //output - Tensor min_out({1}); - Min(reshape_out, min_reduce_dim, min_out); - min_reduce_dim.~Tensor(); + Tensor* min_out = new RamTensor({1}); + Min(reshape_out, min_reduce_dim, min_out); + delete min_reduce_dim; //max //input - Tensor max_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx"); + Tensor* max_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx"); //output - Tensor max_out({1}); - Max(reshape_out, max_reduce_dim, max_out); - max_reduce_dim.~Tensor(); + Tensor* max_out = new RamTensor({1}); + Max(reshape_out, max_reduce_dim, max_out); + delete max_reduce_dim; //quantization //output - Tensor qnt_out(reshape_out.getShape()); - Tensor qnt_min({1}); - Tensor qnt_max({1}); - QuantizeV2(reshape_out, min_out, max_out, qnt_out, qnt_min, qnt_max); - reshape_out.~Tensor(); + Tensor* qnt_out = new RamTensor(reshape_out->getShape()); + Tensor* qnt_min = new RamTensor({1}); + Tensor* qnt_max = new RamTensor({1}); + QuantizeV2(reshape_out, min_out, max_out, qnt_out, qnt_min, qnt_max); + delete reshape_out; - Tensor out(qnt_out.getShape()); - Tensor out_min({1}); - Tensor out_max({1}); + Tensor* out = new RamTensor(qnt_out->getShape()); + Tensor* out_min = new RamTensor({1}); + Tensor* out_max = new RamTensor({1}); Relu(qnt_out, qnt_min, qnt_max, out, out_min, out_max); timer_stop(); - Tensor ref_out = + Tensor* ref_out = t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx"); - Tensor ref_out_min = + Tensor* ref_out_min = t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx"); - Tensor ref_out_max = + Tensor* ref_out_max = t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx"); - double result = meanPercentErr(ref_out, out); - result += meanPercentErr(ref_out_min, out_min); - result += meanPercentErr(ref_out_max, out_max); + double result = meanPercentErr(ref_out, out); + result += meanPercentErr(ref_out_min, out_min); + result += meanPercentErr(ref_out_max, out_max); passed(result == 0); From cb96d7ad3d8f4384a648ffba441d1d4e8f103e42 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:07:16 +0800 Subject: [PATCH 33/80] 1. change for making auto allocation for Tensor** --- ArrayOps.hpp | 6 +++--- ArrayTests.hpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index 86702707..e6b41f7d 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -52,15 +52,15 @@ void QuantizeV2(Tensor* input, Tensor* _min_range, Tensor* _max_range, //name = unspecified //dequantize_op.cc: 87 template -void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor* output) { +void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor** output) { float min = *(min_range->read(0, 0)); float max = *(max_range->read(0, 0)); //auto tensor allocation Shape out_shape; - tensorChkAlloc(&output, input->getShape()); + tensorChkAlloc(output, input->getShape()); T* input_ptr = input->read(0, 0); - float* output_ptr = output->write(0, 0); + float* output_ptr = (*output)->write(0, 0); //quantization_utils.h: 771 QuantizedToFloatStruct q2f(min, max); diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 3adb9810..08949ae6 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -56,7 +56,7 @@ class ArrayOpsTest : public Test { Tensor* out = new RamTensor(out_ref->getShape()); timer_start(); - dequantize(a, a_min, a_max, out); + dequantize(a, a_min, a_max, &out); timer_stop(); double result = meanPercentErr(out_ref, out); From 10e9fc489112a5e881e0d5396c7cca60f6b2f307 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:08:29 +0800 Subject: [PATCH 34/80] 1. change for making auto allocation for tensor** --- MathOps.hpp | 17 ++++++++++------- MathTests.hpp | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/MathOps.hpp b/MathOps.hpp index 38b03e88..800c8678 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -66,16 +66,16 @@ void Requantize(Tensor* input, Tensor* in_min, Tensor* in_max, } template -void Add(Tensor* input, Tensor* input2, Tensor* out) { +void Add(Tensor* input, Tensor* input2, Tensor** out) { const TIn* p_in = input->read(0, 0); const TIn* p_in2 = input2->read(0, 0); //auto shape - tensorChkAlloc(&out, input->getShape()); + tensorChkAlloc(out, input->getShape()); - TOut* p_out = out->write(0, 0); + TOut* p_out = (*out)->write(0, 0); - const uint32_t size = out->getSize(); + const uint32_t size = (*out)->getSize(); for (uint32_t i = 0; i < size; i++) { p_out[i] = p_in[i] + p_in2[i]; } @@ -160,14 +160,17 @@ void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { permute.erase(permute.begin() + dim_reduce); // check dimensionality - if ((*out)->getSize() != 0 && (*out)->getShape() != outShape) { + if (*out && (*out)->getSize() != 0 && (*out)->getShape() != outShape) { ERR_EXIT("output shape mismatch"); } // allocate output tensor if empty - if ((*out)->getSize() == 0) { - *out = new RamTensor(outShape); + if (*out && (*out)->getSize() == 0) { + (*out)->init(outShape); + } else { + *out = new RamTensor(outShape); } + // construct the origin-shape for permuteIndexTransform Shape vOutShape = outShape; diff --git a/MathTests.hpp b/MathTests.hpp index 52e6ce55..8a79a4ea 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -222,7 +222,7 @@ class MathOpsTest : public Test { // modify the checks below: Tensor* out = new RamTensor(ref_out->getShape()); timer_start(); - Add(a, b, out); + Add(a, b, &out); timer_stop(); double result = meanPercentErr(ref_out, out); From 910c3eed00450b3b22862c4de1b6084ab56c62ab Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:09:54 +0800 Subject: [PATCH 35/80] 1. make run_mlp pass 2. in order to support auto reallocatoin, the interface of add, quantizedMat is changed --- deep_mnist_mlp.hpp | 248 ++++++++++++++++++++++----------------------- main.cpp | 13 +-- mlp_test.hpp | 6 +- tensor.hpp | 9 +- 4 files changed, 140 insertions(+), 136 deletions(-) diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index f0912396..a4f8d4b2 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -8,165 +8,167 @@ #include "ArrayOps.hpp" #include "uTensor_util.hpp" -void tensorQuantize(Tensor input, Tensor &output, - Tensor &out_min, Tensor &out_max) { +template +void tensorQuantize(Tensor* input, Tensor** output, + Tensor** out_min, Tensor** out_max) { //reshape - Tensor reshape_shape({1}); - Tensor reduce_dim({1}); - Shape input_shape = input.getShape(); - Tensor reshape_out; + Tensor* reshape_shape = new RamTensor({1}); + Tensor* reduce_dim = new RamTensor({1}); + Shape input_shape = input->getShape(); + Tensor* reshape_out = nullptr; - *(reshape_shape.getPointer({0})) = -1; - *(reduce_dim.getPointer({0})) = 0; + *(reshape_shape->read(0, 0)) = -1; + *(reduce_dim->read(0, 0)) = 0; - reshape(input, reshape_shape, reshape_out); - - input.~Tensor(); + reshape(input, reshape_shape, &reshape_out); //Min and Max of (reshaped) input - Tensor min_out({1}); - Tensor max_out({1}); - Min(reshape_out, reduce_dim, min_out); - Max(reshape_out, reduce_dim, max_out); + Tensor* min_out = new RamTensor({1}); + Tensor* max_out = new RamTensor({1}); + Min(reshape_out, reduce_dim, min_out); + Max(reshape_out, reduce_dim, max_out); - tensorChkAlloc(output, input_shape); + tensorChkAlloc(output, input->getShape()); + delete input; Shape shape_one; shape_one.push_back(1); - tensorChkAlloc(out_min, shape_one); - tensorChkAlloc(out_max, shape_one); + tensorChkAlloc(out_min, shape_one); + tensorChkAlloc(out_max, shape_one); - QuantizeV2(reshape_out, min_out, max_out, output, out_min, out_max); + QuantizeV2(reshape_out, min_out, max_out, *output, *out_min, *out_max); } -void ReluLayer(Tensor x, Tensor x_min, Tensor x_max, - Tensor w, Tensor w_min, Tensor w_max, Tensor b, - Tensor &output, Tensor &output_min, Tensor &output_max) { +template +void ReluLayer(Tensor* x, Tensor* x_min, Tensor* x_max, + Tensor* w, Tensor* w_min, Tensor* w_max, Tensor* b, + Tensor** output, Tensor** output_min, Tensor** output_max) { //quantized matmul - Tensor out_c; - Tensor matmul_out_min({1}); - Tensor matmul_out_max({1}); + Tensor* out_c = nullptr; + Tensor* matmul_out_min = new RamTensor({1}); + Tensor* matmul_out_max = new RamTensor({1}); - QuantizedMatMul(x, w, out_c, x_min, w_min, x_max, + QuantizedMatMul(x, w, &out_c, x_min, w_min, x_max, w_max, matmul_out_min, matmul_out_max); //clean up - x.~Tensor(); - w.~Tensor(); - x_min.~Tensor(); - w_min.~Tensor(); - x_max.~Tensor(); - w_max.~Tensor(); + delete x; + delete w; + delete x_min; + delete w_min; + delete x_max; + delete w_max; //Requantization_Range - Tensor req_out_min({1}); - Tensor req_out_max({1}); + Tensor* req_out_min = new RamTensor({1}); + Tensor* req_out_max = new RamTensor({1}); Requantization_Range(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max); //Requantize - Tensor reqnt_out(out_c.getShape()); - Tensor reqnt_out_min({1}); - Tensor reqnt_out_max({1}); + Tensor* reqnt_out = new RamTensor(out_c->getShape()); + Tensor* reqnt_out_min = new RamTensor({1}); + Tensor* reqnt_out_max = new RamTensor({1}); Requantize(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max, reqnt_out, reqnt_out_min, reqnt_out_max); - Shape out_shape = out_c.getShape(); + Shape out_shape = out_c->getShape(); //clean up - out_c.~Tensor(); - matmul_out_min.~Tensor(); - matmul_out_max.~Tensor(); - req_out_min.~Tensor(); - req_out_max.~Tensor(); - - Tensor deqnt_out; - tensorChkAlloc(deqnt_out, reqnt_out.getShape()); - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, deqnt_out); - reqnt_out.~Tensor(); - - Tensor z_output(deqnt_out.getShape()); - Add(deqnt_out, b, z_output); - deqnt_out.~Tensor(); - - Tensor z_qnt_output; - Tensor z_min({1}); - Tensor z_max({1}); - tensorQuantize(z_output, z_qnt_output, z_min, z_max); - z_output.~Tensor(); - - tensorChkAlloc(output, z_qnt_output.getShape()); + delete out_c; + delete matmul_out_min; + delete matmul_out_max; + delete req_out_min; + delete req_out_max; + + Tensor* deqnt_out = nullptr; + tensorChkAlloc(&deqnt_out, reqnt_out->getShape()); + dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, &deqnt_out); + delete reqnt_out; + + Tensor* z_output = new RamTensor(deqnt_out->getShape()); + Add(deqnt_out, b, &z_output); + delete deqnt_out; + delete b; + + Tensor* z_qnt_output = nullptr; + Tensor* z_min = new RamTensor({1}); + Tensor* z_max = new RamTensor({1}); + tensorQuantize(z_output, &z_qnt_output, &z_min, &z_max); + + tensorChkAlloc(output, z_qnt_output->getShape()); Shape shape_one; shape_one.push_back(1); - tensorChkAlloc(output_min, shape_one); - tensorChkAlloc(output_max, shape_one); - Relu(z_qnt_output, z_min, z_max, output, output_min, output_max); + tensorChkAlloc(output_min, shape_one); + tensorChkAlloc(output_max, shape_one); + Relu(z_qnt_output, z_min, z_max, *output, *output_min, *output_max); } -void PredLayer(Tensor input, Tensor input_min, - Tensor input_max, Tensor &output) { +template +void PredLayer(Tensor* input, Tensor* input_min, + Tensor* input_max, Tensor** output) { TensorIdxImporter t_import; - Tensor w = t_import.ubyte_import( + Tensor* w = t_import.ubyte_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" "inputs/Variable_4_quint8_const_0.idx"); - Tensor w_min = t_import.float_import( + Tensor* w_min = t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" "inputs/Variable_4_min_0.idx"); - Tensor w_max = t_import.float_import( + Tensor* w_max = t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" "inputs/Variable_4_max_0.idx"); - Tensor out_c; - Tensor matmul_out_min({1}); - Tensor matmul_out_max({1}); + Tensor* out_c = nullptr; + Tensor* matmul_out_min = new RamTensor({1}); + Tensor* matmul_out_max = new RamTensor({1}); //MatMul - QuantizedMatMul(input, w, out_c, input_min, w_min, + QuantizedMatMul(input, w, &out_c, input_min, w_min, input_max, w_max, matmul_out_min, matmul_out_max); //clean up - input.~Tensor(); - w.~Tensor(); - w_min.~Tensor(); - w_max.~Tensor(); - input_min.~Tensor(); - input_max.~Tensor(); + delete input; + delete w;; + delete w_min; + delete w_max; + delete input_min; + delete input_max; //Requantization_Range - Tensor req_out_min({1}); - Tensor req_out_max({1}); + Tensor* req_out_min = new RamTensor({1}); + Tensor* req_out_max = new RamTensor({1}); Requantization_Range(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max); //Requantize - Tensor reqnt_out(out_c.getShape()); - Tensor reqnt_out_min({1}); - Tensor reqnt_out_max({1}); + Tensor* reqnt_out = new RamTensor(out_c->getShape()); + Tensor* reqnt_out_min = new RamTensor({1}); + Tensor* reqnt_out_max = new RamTensor({1}); Requantize(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max, reqnt_out, reqnt_out_min, reqnt_out_max); - out_c.~Tensor(); - matmul_out_min.~Tensor(); - matmul_out_max.~Tensor(); + delete out_c; + delete matmul_out_min; + delete matmul_out_max; //dequantize - Tensor deqnt_out; - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, deqnt_out); - reqnt_out_min.~Tensor(); - reqnt_out_max.~Tensor(); + Tensor* deqnt_out = nullptr; + dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, &deqnt_out); + delete reqnt_out_min; + delete reqnt_out_max; //Add - Tensor bias = t_import.float_import( + Tensor* bias = t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx"); - Tensor output_z; - Add(deqnt_out, bias, output_z); - deqnt_out.~Tensor(); - bias.~Tensor(); + Tensor* output_z = nullptr; + Add(deqnt_out, bias, &output_z); + delete deqnt_out; + delete bias; //ArgMax - Tensor dim = t_import.int_import( + Tensor* dim = t_import.int_import( "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" "y_pred-dimension_0.idx"); - ArgMax(output_z, dim, output); + ArgMax(output_z, dim, output); } //Test code @@ -199,28 +201,28 @@ void runPred(void) { int runMLP(string inputIdxFile) { TensorIdxImporter t_import; - Tensor x = + Tensor* x = t_import.float_import(inputIdxFile); - Tensor x_quantized; - Tensor x_min; - Tensor x_max; + Tensor* x_quantized = nullptr; + Tensor* x_min = nullptr; + Tensor* x_max = nullptr; - tensorQuantize(x, x_quantized, x_min, x_max); + tensorQuantize(x, &x_quantized, &x_min, &x_max); - Tensor w = t_import.ubyte_import( + Tensor* w = t_import.ubyte_import( "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx"); - Tensor w_min = + Tensor* w_min = t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx"); - Tensor w_max = + Tensor* w_max = t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx"); - Tensor b = + Tensor* b = t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx"); - Tensor relu_output; - Tensor relu_min; - Tensor relu_max; + Tensor* relu_output = nullptr; + Tensor* relu_min = nullptr; + Tensor* relu_max = nullptr; - ReluLayer(x_quantized, x_min, x_max, w, w_min, w_max, b, relu_output, - relu_min, relu_max); + ReluLayer(x_quantized, x_min, x_max, w, w_min, w_max, b, &relu_output, + &relu_min, &relu_max); w = t_import.ubyte_import( "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx"); @@ -229,25 +231,23 @@ int runMLP(string inputIdxFile) { w_max = t_import.float_import( "/fs/testData/deep_mlp/import-Variable_2_max_0.idx"); b = t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx"); - Tensor relu_output2; - Tensor relu_min2; - Tensor relu_max2; + Tensor* relu_output2 = nullptr; + Tensor* relu_min2 = nullptr; + Tensor* relu_max2 = nullptr; - ReluLayer(relu_output, relu_min, relu_max, w, w_min, w_max, b, relu_output2, - relu_min2, relu_max2); - w.~Tensor(); + ReluLayer(relu_output, relu_min, relu_max, w, w_min, w_max, b, &relu_output2, + &relu_min2, &relu_max2); - Tensor pred; - PredLayer(relu_output2, relu_min2, relu_max2, pred); - relu_output2.~Tensor(); + Tensor* pred = nullptr; + PredLayer(relu_output2, relu_min2, relu_max2, &pred); - Tensor ref_out = t_import.float_import( + Tensor* ref_out = t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); - Tensor ref_pred = TensorCast(ref_out); + Tensor* ref_pred = TensorCast(ref_out); - double result = Test::meanPercentErr(ref_pred, pred); + double result = Test::meanPercentErr(ref_pred, pred); if (result < 0.0001) { printf("PASSED %.8f\r\n\r\n", result); @@ -255,6 +255,6 @@ int runMLP(string inputIdxFile) { printf("FAILED %.8f\r\n\r\n", result); } - return *(pred.getPointer({0})); + return *(pred->read(0, 0)); // output layer } diff --git a/main.cpp b/main.cpp index f180f080..a725b876 100644 --- a/main.cpp +++ b/main.cpp @@ -13,6 +13,7 @@ #include "tensor_test.hpp" #include "context_test.hpp" #include "mlp_test.hpp" +#include "deep_mnist_mlp.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -27,9 +28,9 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); - // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - // printf("prediction: %d\r\n", prediction); -/* idxImporterTest idxTest; +// int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); +// printf("prediction: %d\r\n", prediction); + idxImporterTest idxTest; idxTest.runAll(); @@ -68,14 +69,14 @@ int main(int argc, char** argv) { contextTest ctxTest; ctxTest.runAll(); printf("Context result...\r\n"); - ctxTest.printSummary();*/ + ctxTest.printSummary(); - printf("mlp test: \r\n"); +/* printf("mlp test: \r\n"); mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); - mlpt.printSummary(); + mlpt.printSummary();*/ //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/mlp_test.hpp b/mlp_test.hpp index d9db7945..b2faec3d 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -108,7 +108,7 @@ class mlpTest : public Test { Tensor* matmul_out_min = new RamTensor({1}); Tensor* matmul_out_max = new RamTensor({1}); - QuantizedMatMul(x, w, out_c, x_min, w_min, x_max, + QuantizedMatMul(x, w, &out_c, x_min, w_min, x_max, w_max, matmul_out_min, matmul_out_max); //clean up delete x; @@ -197,7 +197,7 @@ class mlpTest : public Test { //output Tensor* deqnt_out = new RamTensor(out_c->getShape()); - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, deqnt_out); + dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, &deqnt_out); delete out_c; delete reqnt_out_min; delete reqnt_out_max; @@ -229,7 +229,7 @@ class mlpTest : public Test { Tensor* bias = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx"); //output Tensor* output_z = new RamTensor(deqnt_out->getShape()); - Add(deqnt_out, bias, output_z); + Add(deqnt_out, bias, &output_z); delete deqnt_out; DEBUG("Add completed!\r\n"); diff --git a/tensor.hpp b/tensor.hpp index 837ceb5d..bd6f4bda 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -350,10 +350,13 @@ void printDim(Tensor* t) { template void tensorChkAlloc(Tensor** t, Shape dim) { - if ((*t)->getSize() == 0) { - *t = new RamTensor(dim); - } else if ((*t)->getShape() != dim) { + if (*t && (*t)->getSize() == 0) { + (*t)->init(dim); + } else if (*t && (*t)->getShape() != dim) { ERR_EXIT("Dim mismatched...\r\n"); + } else { + *t = new RamTensor(dim); } + } #endif From 13c2e45bab0d0d47872cc6aa61be3c01310bede8 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:44:39 +0800 Subject: [PATCH 36/80] 1. make reallocation for tensor 2. when the pointer is null, it would be reallocated. --- MatrixOps.hpp | 8 ++++---- MatrixTests.hpp | 2 +- tensor.hpp | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 9be6ee19..6722fb8d 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -101,7 +101,7 @@ void QuantizationRangeForMultiplication(float min_a, float max_a, float min_b, } template -void QuantizedMatMul(Tensor* A, Tensor* B, Tensor* C, +void QuantizedMatMul(Tensor* A, Tensor* B, Tensor** C, Tensor* mina, Tensor* minb, Tensor* maxa, Tensor* maxb, Tensor* outmin, Tensor* outmax, bool transpose_a = false, @@ -115,7 +115,7 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor* C, Shape c_shape; c_shape.push_back((A->getShape())[0]); c_shape.push_back((B->getShape())[1]); - tensorChkAlloc(&C, c_shape); + tensorChkAlloc(C, c_shape); const int32_t offset_a = FloatToQuantizedUnclamped( 0.0f, min_a, max_a); // NT: what 0 quantized to; depends on @@ -133,7 +133,7 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor* C, T1* A_Data = A->read(0, 0); T2* B_Data = B->read(0, 0); - Toutput* C_Data = C->write(0, 0); + Toutput* C_Data = (*C)->write(0, 0); const bool transpose_c = false; const size_t m = A->getShape()[a_dim_remaining]; @@ -166,7 +166,7 @@ class QntMatMulOp : public Operator{ } virtual void compute() override { QuantizedMatMul(inputs[0], inputs[3], - outputs[0], inputs[1], inputs[4], inputs[2], inputs[5], + &(outputs[0]), inputs[1], inputs[4], inputs[2], inputs[5], outputs[1], outputs[2]); } }; diff --git a/MatrixTests.hpp b/MatrixTests.hpp index 04d8ea1c..a569b698 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -43,7 +43,7 @@ class matrixOpsTest : public Test { Tensor* out_min = new RamTensor(c_min->getShape()); Tensor* out_max = new RamTensor(c_max->getShape()); timer_start(); - QuantizedMatMul(a, b, out_c, a_min, b_min, a_max, + QuantizedMatMul(a, b, &out_c, a_min, b_min, a_max, b_max, out_min, out_max); timer_stop(); // diff --git a/tensor.hpp b/tensor.hpp index bd6f4bda..c7d2b88e 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -354,7 +354,7 @@ void tensorChkAlloc(Tensor** t, Shape dim) { (*t)->init(dim); } else if (*t && (*t)->getShape() != dim) { ERR_EXIT("Dim mismatched...\r\n"); - } else { + } else if (*t == nullptr){ *t = new RamTensor(dim); } From 90959800d8cc369bbfbbc2cd8d9fc5ee37e578df Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:46:29 +0800 Subject: [PATCH 37/80] 1. when the code is compiled with release mode, the dequantize error would be 10-7. it will be bigger than 0, and failed 2. 10-7 should be ignorable. --- mlp_test.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlp_test.hpp b/mlp_test.hpp index b2faec3d..c779c7fb 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -204,8 +204,8 @@ class mlpTest : public Test { delete reqnt_out; Tensor* ref_deqnt_out = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx"); - double temp; - if((temp = meanPercentErr(ref_deqnt_out, deqnt_out)) > 0) { + double temp = meanPercentErr(ref_deqnt_out, deqnt_out); + if(temp > 0.0001) { printf("dequantize failed (%.6f)\r\n", temp); float* ref_ptr = ref_deqnt_out->read(0, 0); float* test_ptr = deqnt_out->read(0, 0); @@ -240,8 +240,9 @@ class mlpTest : public Test { Tensor* ref_z = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx"); double result = meanPercentErr(ref_z, output_z); + std::cout << result << std::endl; - passed(result < 0.0001); + passed(result < 0.001); delete ref_z; delete output_z; delete bias; From 5a21dbb83a56bf3ef564848654c6ae656dde9630 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:48:21 +0800 Subject: [PATCH 38/80] 1. changed main function for testing run_mlp 2. mlp test passed --- main.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main.cpp b/main.cpp index a725b876..1c1cdccd 100644 --- a/main.cpp +++ b/main.cpp @@ -28,8 +28,8 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); -// int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); -// printf("prediction: %d\r\n", prediction); + int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + printf("prediction: %d\r\n", prediction); idxImporterTest idxTest; idxTest.runAll(); @@ -72,11 +72,11 @@ int main(int argc, char** argv) { ctxTest.printSummary(); -/* printf("mlp test: \r\n"); + printf("mlp test: \r\n"); mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); - mlpt.printSummary();*/ + mlpt.printSummary(); //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) From 0eb520bac18b88c74f6586d22720637fc73d32a9 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 5 Nov 2017 19:53:33 +0800 Subject: [PATCH 39/80] fix typo error 1. runQntDeqntLayerZ error should be less than 0.0001 --- mlp_test.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlp_test.hpp b/mlp_test.hpp index c779c7fb..6c2611ff 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -242,7 +242,7 @@ class mlpTest : public Test { double result = meanPercentErr(ref_z, output_z); std::cout << result << std::endl; - passed(result < 0.001); + passed(result < 0.0001); delete ref_z; delete output_z; delete bias; From 6eda5dae12be528a941bff77b5b38bcf44013434 Mon Sep 17 00:00:00 2001 From: kazami Date: Mon, 6 Nov 2017 16:12:45 +0800 Subject: [PATCH 40/80] 1. add resize function and test case --- main.cpp | 7 +++++++ tensor.hpp | 24 ++++++++++++++++++++++++ tensor_test.hpp | 18 ++++++++++++++++++ test.hpp | 9 +++++++++ 4 files changed, 58 insertions(+) diff --git a/main.cpp b/main.cpp index 1c1cdccd..12342973 100644 --- a/main.cpp +++ b/main.cpp @@ -77,6 +77,13 @@ int main(int argc, char** argv) { mlpt.runAll(); printf("mlp result...\r\n"); mlpt.printSummary(); + + + printf("tesnor test: \r\n"); + tensorTest tenT; + tenT.runAll(); + printf("tensor result: \r\n"); + tenT.printSummary(); //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/tensor.hpp b/tensor.hpp index c7d2b88e..6708be8e 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -123,6 +123,30 @@ class Tensor : public uTensor { } + template + void resize(std::vector& v) { + uint32_t size = 0; + s->shape.clear(); + for (auto i : v) { + s->shape.push_back(i); + if (size == 0) { + size = i; + } else { + size *= i; + } + } + if (size == s->total_size) { + return; + } else { + free(s->data); + s->total_size = size; + s->data = (void*)malloc(unit_size() * s->total_size); + } + + if (s->data == NULL) + ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); + } + std::vector getShape(void) { return s->shape; } uint32_t getSize(void) { return s->total_size; } diff --git a/tensor_test.hpp b/tensor_test.hpp index f65370f1..14b84cd6 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -7,6 +7,24 @@ #include "tensorIdxImporter.hpp" #include "test.hpp" + +class tensorTest : public Test { + public: + void runResize() { + testStart("tensortest"); + Tensor* a = new RamTensor({3, 2, 3}); + std::vector v({1, 5, 8}); + a->resize(v); + bool res = testsize(1 * 5 * 8, a->getSize()); + passed(res); + delete a; + } + + void runAll() { + runResize(); + } +}; + class transTest : public Test { public: void runShapeTest() { diff --git a/test.hpp b/test.hpp index 55229b7b..2a610e1a 100644 --- a/test.hpp +++ b/test.hpp @@ -125,6 +125,15 @@ class Test { } return pass; } + + bool testsize(uint32_t src, uint32_t res) { + bool pass = true; + if (src != res) { + pass = false; + return pass; + } + return pass; + } template bool testval(T src, T res) { bool pass = true; From b0f251da7433b527aab02ba84f0030ec82940766 Mon Sep 17 00:00:00 2001 From: kazami Date: Mon, 6 Nov 2017 17:45:38 +0800 Subject: [PATCH 41/80] 1. change read interface from T* read() to const T* read() 2. it will avoid error of chaning input value unintentially --- ArrayOps.hpp | 14 +++++++------- MathOps.hpp | 10 +++++----- MathTests.hpp | 28 ++++++++++++++-------------- MatrixOps.hpp | 8 ++++---- NnOps.hpp | 2 +- deep_mnist_mlp.hpp | 4 ++-- mlp_test.hpp | 4 ++-- tensor.hpp | 10 +++++----- tensorIdxImporter.hpp | 2 +- test.hpp | 10 +++++----- 10 files changed, 46 insertions(+), 46 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index e6b41f7d..b5208c69 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -28,10 +28,10 @@ void QuantizeV2(Tensor* input, Tensor* _min_range, Tensor* _max_range, FloatToQuantizedStruct f2q(min_range, max_range); //quantization_utils.h:149 - float* input_ptr = input->read(0, 0); + const float* input_ptr = input->read(0, 0); T* output_ptr = output->write(0, 0); - float* output_min_ptr = output_min->read(0, 0); - float* output_max_ptr = output_max->read(0, 0); + float* output_min_ptr = output_min->write(0, 0); + float* output_max_ptr = output_max->write(0, 0); ///NT: need error checking at some point... for(uint32_t i = 0; i < input->getSize(); i++) { @@ -59,7 +59,7 @@ void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor** ou Shape out_shape; tensorChkAlloc(output, input->getShape()); - T* input_ptr = input->read(0, 0); + const T* input_ptr = input->read(0, 0); float* output_ptr = (*output)->write(0, 0); //quantization_utils.h: 771 @@ -100,7 +100,7 @@ void reshape(Tensor* input, Tensor* shape, Tensor** output) { //validating and inferring dimensions int infer_index = -1; uint32_t dim_rem = input->getSize(); - int* val = shape->read(0, 0); + const int* val = shape->read(0, 0); for(uint32_t i = 0; i < shape->getSize(); i++) { if(val[i] == -1) { if(infer_index == -1) { @@ -123,11 +123,11 @@ void reshape(Tensor* input, Tensor* shape, Tensor** output) { if(dim_rem != 1) ERR_EXIT("supplied shape does not match up to input"); - T* input_ptr = input->read(0, 0); + const T* input_ptr = input->read(0, 0); //check if the output dim is valid if(*output && (*output)->getSize() > 0 && dim == (*output)->getShape()) { //copy - T* output_ptr = (*output)->read(0, 0); + T* output_ptr = (*output)->write(0, 0); std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); } else if(*output && (*output)->getSize() > 0 && dim != (*output)->getShape()) { ERR_EXIT("output tensor dimension mismatches supplied shape") diff --git a/MathOps.hpp b/MathOps.hpp index 800c8678..2fb5749e 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -11,7 +11,7 @@ void CalculateUsedRange(Tensor* input, int32_t* used_min_quan, int32_t minimum = INT_MAX; int32_t maxmum = INT_MIN; uint32_t size = input->getSize(); - T1* in_ptr = input->read(0, 0); + const T1* in_ptr = input->read(0, 0); for (uint32_t i = 0; i < size; i++) { if (minimum > in_ptr[i]) minimum = static_cast(in_ptr[i]); @@ -50,7 +50,7 @@ void Requantize(Tensor* input, Tensor* in_min, Tensor* in_max, const float input_max = in_max->read(0, 0)[0]; const float r_output_min = r_min->read(0, 0)[0]; const float r_output_max = r_max->read(0, 0)[0]; - T1 *input_ptr = input->read(0, 0); + const T1 *input_ptr = input->read(0, 0); Toutput *out_ptr = output->write(0, 0); // RequantizeManyInNewRange(input, input.getSize(), input_min, @@ -84,7 +84,7 @@ template void Min(Tensor* input, Tensor* dim, Tensor* out) { const TIn* p_in = input->read(0, 0); const Td* p_in2 = dim->read(0, 0); - TOut* p_out = out->read(0, 0); + TOut* p_out = out->write(0, 0); Td n_dim = p_in2[0]; std::vector permute; @@ -116,7 +116,7 @@ template void Max(Tensor* input, Tensor* dim, Tensor* out) { const TIn* p_in = input->read(0, 0); const Td* p_in2 = dim->read(0, 0); - TOut* p_out = out->read(0, 0); + TOut* p_out = out->write(0, 0); Td n_dim = p_in2[0]; std::vector permute; @@ -181,7 +181,7 @@ void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { // In this case, we are going backward. permuteIndexTransform trans(vOutShape, permute); - TIn* inPtr = input->read(0, 0); + const TIn* inPtr = input->read(0, 0); TOut* outPtr = (*out)->write(0, 0); size_t out_index = 0; diff --git a/MathTests.hpp b/MathTests.hpp index 8a79a4ea..c649ac59 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -121,8 +121,8 @@ class MathOpsTest : public Test { double result; if((result = meanPercentErr(ref_a_q, a_q)) != 0) { printf("Requantize a_q failed (%.6f)\r\n", result); - unsigned char* ref_ptr = ref_a_q->read(0, 0); - unsigned char* test_ptr = a_q->read(0, 0); + unsigned char* ref_ptr = ref_a_q->write(0, 0); + unsigned char* test_ptr = a_q->write(0, 0); for(uint32_t i = 0; i < ref_a_q->getSize(); i++) { if(ref_ptr[i] != test_ptr[i]) { printf("%lu: %d != %d\r\n", i, ref_ptr[i], test_ptr[i]); @@ -177,21 +177,21 @@ class MathOpsTest : public Test { void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here testStart("argmax2"); Tensor* test_input = TensorConstant({10, 5}, 0.0f); - *(test_input->read(25, 0)) = 1.0f; - *(test_input->read(26, 0)) = 1.0f; - *(test_input->read(7, 0)) = 1.0f; - *(test_input->read(48, 0)) = 1.0f; - *(test_input->read(14, 0)) = 1.0f; + *(test_input->write(25, 0)) = 1.0f; + *(test_input->write(26, 0)) = 1.0f; + *(test_input->write(7, 0)) = 1.0f; + *(test_input->write(48, 0)) = 1.0f; + *(test_input->write(14, 0)) = 1.0f; Tensor* test_dim = new RamTensor({1}); - *(test_dim->read(0, 0)) = 0; + *(test_dim->write(0, 0)) = 0; Tensor* test_out_ref = new RamTensor({5}); - *(test_out_ref->read(0, 0)) = 5.0f; - *(test_out_ref->read(1, 0)) = 5.0f; - *(test_out_ref->read(2, 0)) = 1.0f; - *(test_out_ref->read(3, 0)) = 9.0f; - *(test_out_ref->read(4, 0)) = 2.0f; + *(test_out_ref->write(0, 0)) = 5.0f; + *(test_out_ref->write(1, 0)) = 5.0f; + *(test_out_ref->write(2, 0)) = 1.0f; + *(test_out_ref->write(3, 0)) = 9.0f; + *(test_out_ref->write(4, 0)) = 2.0f; Tensor* test_out = new RamTensor(test_out_ref->getShape()); timer_start(); @@ -286,7 +286,7 @@ class MathOpsTest : public Test { void runAll(void) { argmaxTest(); -// argmaxTest2(); + argmaxTest2(); requantization_rangeTest(); requantizeTest(); requantizeTest2(); diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 6722fb8d..676f5725 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -131,8 +131,8 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor** C, int a_dim_remaining = 1 - first; int b_dim_remaining = 1 - second; - T1* A_Data = A->read(0, 0); - T2* B_Data = B->read(0, 0); + const T1* A_Data = A->read(0, 0); + const T2* B_Data = B->read(0, 0); Toutput* C_Data = (*C)->write(0, 0); const bool transpose_c = false; @@ -152,9 +152,9 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor** C, QuantizationRangeForMultiplication( min_a, max_a, min_b, max_b, &min_c_value, &max_c_value); - float* c_min = outmin->read(0, 0); + float* c_min = outmin->write(0, 0); *c_min = min_c_value; - float* c_max = outmax->read(0, 0); + float* c_max = outmax->write(0, 0); *c_max = max_c_value; } diff --git a/NnOps.hpp b/NnOps.hpp index ad45f15a..33744c50 100644 --- a/NnOps.hpp +++ b/NnOps.hpp @@ -9,7 +9,7 @@ void Relu(Tensor* input, Tensor* in_min, Tensor* in_max, Tensor* output, Tensor* out_min, Tensor* out_max) { const float input_min = in_min->read(0, 0)[0]; const float input_max = in_max->read(0, 0)[0]; - TIn* in = input->read(0, 0); + const TIn* in = input->read(0, 0); const TOut min_as_quantized = FloatToQuantized(0.0f, input_min, input_max); diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index a4f8d4b2..5c61597e 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -18,8 +18,8 @@ void tensorQuantize(Tensor* input, Tensor** output, Shape input_shape = input->getShape(); Tensor* reshape_out = nullptr; - *(reshape_shape->read(0, 0)) = -1; - *(reduce_dim->read(0, 0)) = 0; + *(reshape_shape->write(0, 0)) = -1; + *(reduce_dim->write(0, 0)) = 0; reshape(input, reshape_shape, &reshape_out); diff --git a/mlp_test.hpp b/mlp_test.hpp index 6c2611ff..3c9c696b 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -207,8 +207,8 @@ class mlpTest : public Test { double temp = meanPercentErr(ref_deqnt_out, deqnt_out); if(temp > 0.0001) { printf("dequantize failed (%.6f)\r\n", temp); - float* ref_ptr = ref_deqnt_out->read(0, 0); - float* test_ptr = deqnt_out->read(0, 0); + const float* ref_ptr = ref_deqnt_out->read(0, 0); + const float* test_ptr = deqnt_out->read(0, 0); for(uint32_t i; i < ref_deqnt_out->getSize(); i++) { if(ref_ptr[i] != test_ptr[i]) { DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); diff --git a/tensor.hpp b/tensor.hpp index 6708be8e..4d6c5502 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -159,8 +159,8 @@ class Tensor : public uTensor { size_t getDim(void) { return s->shape.size(); } template - T* read(size_t offset, size_t ele) { - return (T*)read(offset, ele); + const T* read(size_t offset, size_t ele) { + return (const T*)read(offset, ele); } template @@ -239,8 +239,8 @@ class RamTensor : public Tensor { template Tensor* TensorCast(Tensor* input) { Tensor* output = new RamTensor(input->getShape()); - Tin* inputPrt = input->read(0, 0); - Tout* outputPrt = output->read(0, 0); + const Tin* inputPrt = input->read(0, 0); + Tout* outputPrt = output->write(0, 0); for (uint32_t i = 0; i < input->getSize(); i++) { outputPrt[i] = static_cast(inputPrt[i]); @@ -252,7 +252,7 @@ Tensor* TensorCast(Tensor* input) { template Tensor* TensorConstant(std::vector shape, T c) { Tensor* output = new RamTensor(shape); - T* outPrt = output->read(0, 0); + T* outPrt = output->write(0, 0); for (uint32_t i = 0; i < output->getSize(); i++) { outPrt[i] = c; diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 98011fab..632454a9 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -136,7 +136,7 @@ Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { const uint8_t unit_size = t->unit_size(); U* val = (U*)malloc(unit_size); - U* data = t->read(0, 0); + U* data = t->write(0, 0); for (uint32_t i = 0; i < t->getSize(); i++) { fread(val, unit_size, 1, fp); diff --git a/test.hpp b/test.hpp index 2a610e1a..77c7b808 100644 --- a/test.hpp +++ b/test.hpp @@ -106,7 +106,7 @@ class Test { template double sum(Tensor* input) { - U* elem = input->read(0, 0); + const U* elem = input->read(0, 0); double accm = 0.0; for (uint32_t i = 0; i < input->getSize(); i++) { accm += (double)elem[i]; @@ -149,8 +149,8 @@ class Test { ERR_EXIT("Test.meanAbsErr(): dimension mismatch\r\n"); } - U* elemA = A->read(0, 0); - U* elemB = B->read(0, 0); + const U* elemA = A->read(0, 0); + const U* elemB = B->read(0, 0); double accm = 0.0; for (uint32_t i = 0; i < A->getSize(); i++) { @@ -167,8 +167,8 @@ class Test { ERR_EXIT("Test.sumPercentErr(): dimension mismatch\r\n"); } - U* elemA = A->read(0, 0); - U* elemB = B->read(0, 0); + const U* elemA = A->read(0, 0); + const U* elemB = B->read(0, 0); double accm = 0.0; for (uint32_t i = 0; i < A->getSize(); i++) { From 4e1e49ed1b61c3463ec44d952838865c6d27ebae Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Fri, 10 Nov 2017 01:06:29 +0900 Subject: [PATCH 42/80] context MatMalTest passed --- MatrixOps.hpp | 66 ++++++++++++++++++++++-- MatrixTests.hpp | 2 + context.hpp | 132 ++++++++++++++++++++++++++++++++++------------- context_test.hpp | 100 +++++++++++++++++++++++++---------- main.cpp | 10 ++-- tensor.hpp | 45 +--------------- uTensorBase.hpp | 21 +++++--- 7 files changed, 254 insertions(+), 122 deletions(-) diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 676f5725..e6543a91 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -158,6 +158,66 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor** C, *c_max = max_c_value; } +////////////////////////////////////////////////////// +template +void QuantizedMatMul2(Tensor* A, Tensor* B, Tensor* C, + Tensor* mina, Tensor* minb, Tensor* maxa, + Tensor* maxb, Tensor* outmin, + Tensor* outmax, bool transpose_a = false, + bool transpose_b = false) { + const float min_a = *(mina->read(0, 0)); + const float max_a = *(maxa->read(0, 0)); + const float min_b = *(minb->read(0, 0)); + const float max_b = *(maxb->read(0, 0)); + + //auto tensor allocation + Shape c_shape; + c_shape.push_back((A->getShape())[0]); + c_shape.push_back((B->getShape())[1]); + //tensorChkAlloc2(C, c_shape); + //replace this with resize + + const int32_t offset_a = FloatToQuantizedUnclamped( + 0.0f, min_a, max_a); // NT: what 0 quantized to; depends on + // Eigen::NumTraits::lowest() + const int32_t offset_b = FloatToQuantizedUnclamped(0.0f, min_b, max_b); + const int32_t offset_c = 0; + const int32_t mult_c = 1; + const int32_t shift_c = 0; + + int first = transpose_a ? 0 : 1; + int second = transpose_b ? 1 : 0; + + int a_dim_remaining = 1 - first; + int b_dim_remaining = 1 - second; + + const T1* A_Data = A->read(0, 0); + const T2* B_Data = B->read(0, 0); + Toutput* C_Data = C->write(0, 0); + + const bool transpose_c = false; + const size_t m = A->getShape()[a_dim_remaining]; + const size_t n = B->getShape()[b_dim_remaining]; + const size_t k = A->getShape()[first]; + const size_t lda = A->getShape()[1]; + const size_t ldb = B->getShape()[1]; + const size_t ldc = n; + + ReferenceGemmuImpl( + transpose_a, transpose_b, transpose_c, m, n, k, A_Data, offset_a, lda, + B_Data, offset_b, ldb, C_Data, shift_c, offset_c, mult_c, ldc); + float min_c_value; + float max_c_value; + + QuantizationRangeForMultiplication( + min_a, max_a, min_b, max_b, &min_c_value, &max_c_value); + + float* c_min = outmin->write(0, 0); + *c_min = min_c_value; + float* c_max = outmax->write(0, 0); + *c_max = max_c_value; +} + class QntMatMulOp : public Operator{ public: QntMatMulOp() { @@ -165,9 +225,9 @@ class QntMatMulOp : public Operator{ n_outputs = 3; } virtual void compute() override { - QuantizedMatMul(inputs[0], inputs[3], - &(outputs[0]), inputs[1], inputs[4], inputs[2], inputs[5], - outputs[1], outputs[2]); + QuantizedMatMul2(inputs[0].get(), inputs[3].get(), + outputs[0].get(), inputs[1].get(), inputs[4].get(), inputs[2].get(), inputs[5].get(), + outputs[1].get(), outputs[2].get()); } }; diff --git a/MatrixTests.hpp b/MatrixTests.hpp index a569b698..7ef7385d 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -8,6 +8,7 @@ class matrixOpsTest : public Test { public: void qMatMul(void) { + testStart("Quantized Matrix Mul"); TensorIdxImporter t_import; @@ -55,6 +56,7 @@ class matrixOpsTest : public Test { meanPercentErr(c_max, out_max); // passed(result < 0.0001); passed(result == 0); + } void runAll(void) { qMatMul(); } diff --git a/context.hpp b/context.hpp index 35abd018..bfae7e10 100644 --- a/context.hpp +++ b/context.hpp @@ -1,34 +1,48 @@ #ifndef UTENSOR_CTX_H #define UTENSOR_CTX_H +#include +#include #include "uTensorBase.hpp" #include "stdio.h" - //#include -//TODO: how do we deal with dangling tensors? -// only allow pushing for exact number of inputs -// output reference count are initialized to 0, incremented only on input-push -// outputs are allocated in ops -// output lists can contain nullptr/empty-tensors -// tensors can be all pointers here, but destructors has to set data to nullptr -// push(op, input_t_list, output_t_list) or push(op, init-list, init-list) -// TensorListModifierOp +class Ref_Record { +public: + uint8_t count; + bool allow_incr; + std::shared_ptr sptr; + + Ref_Record() { + count = 0; + allow_incr = true; + sptr.reset(); + } +}; + class Context : public uTensor { protected: vector op_list; bool del_onsight; - //std::unordered_map TensorList; //all tensors alive //kill all unused if malloc failed? + + std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); //void gc(void); //garbage collector, delete any tracked unreferenced tensor - void initTensors(const TList &t_list); - void deinitTensors(const TList &t_list); - void updateInputTensorRef(const TList &t_list); - void dcrRefCount(TList t_list); + void initTensors(const S_TList &t_list); + void deinitTensors(const S_TList &t_list); + + void incrTListRef(const TList &t_list); + void dcrListRef(S_TList t_list); + void delTensor(Tensor* t); + //uint16_t incrRef(std::shared_ptr sptr); + uint8_t dcrRef(Tensor* t); + bool isTracked(Tensor* t); + //uint16_t getRef(); public: + std::weak_ptr add(Tensor* t, uint8_t init_count = 0); void push(Operator *op, TList &_inputs, TList &_outputs); int eval(void); @@ -37,52 +51,100 @@ class Context : public uTensor { } }; - -void Context::push(Operator *op, TList &_inputs, TList &_outputs) { - if(op->getNumInputs() != _inputs.size()) { - ERR_EXIT("valid number of inputs\r\n"); +std::weak_ptr Context::add(Tensor* t, uint8_t init_count) { + if(rTable.find(t) != rTable.end()) { + ERR_EXIT("tensor pointer address already exist in rTable"); } - if(op->getNumOutputs() != _outputs.size()) { - ERR_EXIT("valid number of output\r\n"); + + shared_ptr _sptr(t); + + Ref_Record record; + + if(init_count != 0) { + record.count = init_count; + record.allow_incr = false; } + record.sptr = _sptr; + + rTable[t] = record; + + auto wptr = _sptr; + + return wptr; +} + +void Context::push(Operator *op, TList &_inputs, TList &_outputs) { + //error checking in the Op class op->setInputs(_inputs); op->setOutputs(_outputs); op_list.push_back(op); - updateInputTensorRef(_inputs); + incrTListRef(_inputs); } -void Context::updateInputTensorRef(const TList &t_list) { +void Context::incrTListRef(const TList &t_list) { for(auto t:t_list) { - t->incrRef(); //if an initial ref value is supplied to the tensor at compile time + Tensor* ptr = t.lock().get(); + if(rTable.find(ptr) == rTable.end()) { + ERR_EXIT("tensor not registered"); + } + + Ref_Record record = rTable[ptr]; + if(record.allow_incr) { + record.count++; + rTable[ptr] = record; + } + + //if an initial ref value is supplied to the tensor at compile time //then this function does nothing - //otherwise, it increment the internal ref count of the tensor - //in internal count is init to 0 by the tensor constructor + //otherwise, it increment the ref count of the tensor + //count is init to 0 by the record constructor } } -void Context::initTensors(const TList &t_list) { +void Context::initTensors(const S_TList &t_list) { for(auto t:t_list) { t->inFocus(); } } -void Context::deinitTensors(const TList &t_list) { +void Context::deinitTensors(const S_TList &t_list) { for(auto t:t_list) { t->deFocus(); } } -void Context::dcrRefCount(TList t_list) { +void Context::delTensor(Tensor* t) { + Ref_Record record = rTable[t]; + record.sptr.reset(); + rTable.erase(t); +} + +void Context::dcrListRef(S_TList t_list) { for(auto t:t_list) { - t->dcrRef(); - if(t->getRef() < 1 && del_onsight) { - delete t; + if(dcrRef(t.get()) < 1) { + delTensor(t.get()); } } } +uint8_t Context::dcrRef(Tensor* t) { + if(!isTracked(t)) { + ERR_EXIT("Tensor not registered"); + } + + Ref_Record record = rTable[t]; + if(record.count > 0) record.count -= 1; + rTable[t] = record; + + return record.count; +} + +bool Context::isTracked(Tensor* t) { + return (rTable.find(t) != rTable.end()); +} + int Context::eval(void) { //unref2nullTensors(); @@ -97,12 +159,10 @@ int Context::eval(void) { deinitTensors(op->getInputs()); deinitTensors(op->getOutputs()); - dcrRefCount(op->getInputs()); + dcrListRef(op->getInputs()); + + delete op; - op->dcrRef(); - if(op->getRef() < 1 && del_onsight) { - delete op; - } } return 0; diff --git a/context_test.hpp b/context_test.hpp index c9efb24d..d5cfa1bb 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -19,51 +19,99 @@ class contextTest : public Test { void MatMalTest(void) { testStart("Context QntMatMal Op"); + Context ctx; //inputs - Tensor* a = - t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx"); - Tensor* a_min = - t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx"); - Tensor* a_max = - t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx"); - Tensor* b = - t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx"); - Tensor* b_min = - t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx"); - Tensor* b_max = - t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx"); + auto a = + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); + auto a_min = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); + auto a_max = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); + auto b = + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); + auto b_min = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); + auto b_max = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); // reference outputs - Tensor* c = - t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx"); - Tensor* c_min = - t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx"); - Tensor* c_max = - t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx"); + auto c = + ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); + auto c_min = + ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); + auto c_max = + ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - Tensor* out_c = new RamTensor(c->getShape()); - Tensor* out_min = new RamTensor(c_min->getShape()); - Tensor* out_max = new RamTensor(c_max->getShape()); + auto out_c = ctx.add(new RamTensor(c.lock()->getShape())); + auto out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); + auto out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); TList inputs = {a, a_min, a_max, b, b_min, b_max}; TList outputs = {out_c, out_min, out_max}; - Operator* matMal = new QntMatMulOp(); - Context ctx; + //set which tensors to keep alive + auto ref_c_rptr = c.lock(); + auto ref_min_rptr = c_min.lock(); + auto ref_max_rptr = c_max.lock(); + auto out_c_rptr = out_c.lock(); + auto out_min_rptr = out_min.lock(); + auto out_max_rptr = out_max.lock(); + + timer_start(); - ctx.push(matMal, inputs, outputs); + ctx.push(new QntMatMulOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(c, out_c) + meanPercentErr(c_min, out_min) + - meanPercentErr(c_max, out_max); + double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + + meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); passed(result == 0); } +/* + void RefCountTest(void) { + testStart("Context Ref Count"); + //inputs + Tensor* a = new RamTensor({1,1,1}); + Tensor* b = new RamTensor({1,1,1}); + Tensor* c = new RamTensor({1,1,1}); + + // reference outputs + Tensor* out = new RamTensor({1,1,1}); + out->keep_alive(true); + + + Context ctx; + timer_start(); + + TList input0 = {a, b}; + TList output0 = {c}; + ctx.push(new AddOp(), inputs0, outputs0); + + TList input1 = {c, a}; + TList output1 = {b}; + ctx.push(new AddOp(), inputs1, outputs1); + + TList input2 = {a, b}; + TList output2 = {out}; + ctx.push(new AddOp(), inputs2, outputs2); + ctx.eval(); + timer_stop(); + + if(a != nullptr || b != nullptr || c != nullptr) { + failed(); + return; + } + + passed(out->read(0, 0) != 1); + + } + */ void runAll(void) { MatMalTest(); + //RefCountTest(); } }; diff --git a/main.cpp b/main.cpp index 12342973..8789333c 100644 --- a/main.cpp +++ b/main.cpp @@ -72,11 +72,11 @@ int main(int argc, char** argv) { ctxTest.printSummary(); - printf("mlp test: \r\n"); - mlpTest mlpt; - mlpt.runAll(); - printf("mlp result...\r\n"); - mlpt.printSummary(); + // printf("mlp test: \r\n"); + // mlpTest mlpt; + // mlpt.runAll(); + // printf("mlp result...\r\n"); + // mlpt.printSummary(); printf("tesnor test: \r\n"); diff --git a/tensor.hpp b/tensor.hpp index 4d6c5502..e34b49f7 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -18,57 +18,14 @@ // }; class uTensor { -protected: - uint16_t ref_count; - bool static_ref_flag; //to support compile-time ref count public: - uTensor() { - ref_count = 0; - static_ref_flag = false; - } virtual void inFocus(){}; virtual void deFocus(){}; - uint16_t incrRef(); - uint16_t dcrRef(); - uint16_t getRef(); - void setStaticRef(uint16_t c); - bool is_static_ref(void); + virtual ~uTensor() = 0; }; -uint16_t uTensor::incrRef() { - if(!static_ref_flag) { - ref_count += 1; - } - - return ref_count; -} - -uint16_t uTensor::dcrRef() { - ref_count -= 1; - return ref_count; -} - -uint16_t uTensor::getRef() { - return ref_count; -} - -bool uTensor::is_static_ref(void) { - return static_ref_flag; -} - -void uTensor::setStaticRef(uint16_t c) { - if(ref_count == 0) { - ref_count = c; - static_ref_flag = true; - } else { - ERR_EXIT("None-zero ref_count"); - } -} - - - uTensor::~uTensor() {} class TensorBase { public: diff --git a/uTensorBase.hpp b/uTensorBase.hpp index f8c27bd2..ce1a893e 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -3,27 +3,28 @@ #include "tensor.hpp" -typedef vector TList; +typedef vector> TList; +typedef vector> S_TList; //isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not //double dispatch //new vs stack -class Operator : public uTensor{ +class Operator : public uTensor { protected: //setup input/output info in derived constructors //ref count? - TList inputs; + S_TList inputs; uint8_t n_inputs; - TList outputs; + S_TList outputs; uint8_t n_outputs; public: virtual void compute() = 0; void setInputs(TList &_inputs); void setOutputs(TList &_outputs); - TList getInputs(void) { return inputs; } - TList getOutputs(void) { return outputs;} + S_TList getInputs(void) { return inputs; } + S_TList getOutputs(void) { return outputs;} uint8_t getNumInputs(void) { return n_inputs; } uint8_t getNumOutputs(void) { return n_outputs; } @@ -36,13 +37,17 @@ class Operator : public uTensor{ void Operator::setInputs(TList &_inputs) { if(_inputs.size() != n_inputs) ERR_EXIT("Input Tensor list mismatched..."); - inputs = _inputs; + for(uint8_t i=0; i < _inputs.size(); i++) { + inputs.push_back(_inputs[i].lock()); + } } void Operator::setOutputs(TList &_outputs) { if(_outputs.size() != n_outputs) ERR_EXIT("Input Tensor list mismatched..."); - outputs = _outputs; + for(uint8_t i=0; i < _outputs.size(); i++) { + outputs.push_back(_outputs[i].lock()); + } } #endif //UTENSOR_BASE_H From 537606c232f9fcbf8739a65ac44d1ad6304972db Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Fri, 10 Nov 2017 14:53:35 +0900 Subject: [PATCH 43/80] polished up the syntax --- MatrixOps.hpp | 14 +++++++------- context.hpp | 8 ++++---- context_test.hpp | 39 ++++++++++++++++++++------------------- tensor.hpp | 7 +++++++ uTensorBase.hpp | 3 --- 5 files changed, 38 insertions(+), 33 deletions(-) diff --git a/MatrixOps.hpp b/MatrixOps.hpp index e6543a91..84e0a12c 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -160,10 +160,10 @@ void QuantizedMatMul(Tensor* A, Tensor* B, Tensor** C, ////////////////////////////////////////////////////// template -void QuantizedMatMul2(Tensor* A, Tensor* B, Tensor* C, - Tensor* mina, Tensor* minb, Tensor* maxa, - Tensor* maxb, Tensor* outmin, - Tensor* outmax, bool transpose_a = false, +void QuantizedMatMul2(S_TENSOR A, S_TENSOR B, S_TENSOR C, + S_TENSOR mina, S_TENSOR minb, S_TENSOR maxa, + S_TENSOR maxb, S_TENSOR outmin, + S_TENSOR outmax, bool transpose_a = false, bool transpose_b = false) { const float min_a = *(mina->read(0, 0)); const float max_a = *(maxa->read(0, 0)); @@ -225,9 +225,9 @@ class QntMatMulOp : public Operator{ n_outputs = 3; } virtual void compute() override { - QuantizedMatMul2(inputs[0].get(), inputs[3].get(), - outputs[0].get(), inputs[1].get(), inputs[4].get(), inputs[2].get(), inputs[5].get(), - outputs[1].get(), outputs[2].get()); + QuantizedMatMul2(inputs[0], inputs[3], + outputs[0], inputs[1], inputs[4], inputs[2], inputs[5], + outputs[1], outputs[2]); } }; diff --git a/context.hpp b/context.hpp index bfae7e10..f7437bfd3 100644 --- a/context.hpp +++ b/context.hpp @@ -11,7 +11,7 @@ class Ref_Record { public: uint8_t count; bool allow_incr; - std::shared_ptr sptr; + S_TENSOR sptr; Ref_Record() { count = 0; @@ -42,7 +42,7 @@ class Context : public uTensor { //uint16_t getRef(); public: - std::weak_ptr add(Tensor* t, uint8_t init_count = 0); + W_TENSOR add(Tensor* t, uint8_t init_count = 0); void push(Operator *op, TList &_inputs, TList &_outputs); int eval(void); @@ -51,12 +51,12 @@ class Context : public uTensor { } }; -std::weak_ptr Context::add(Tensor* t, uint8_t init_count) { +W_TENSOR Context::add(Tensor* t, uint8_t init_count) { if(rTable.find(t) != rTable.end()) { ERR_EXIT("tensor pointer address already exist in rTable"); } - shared_ptr _sptr(t); + S_TENSOR _sptr(t); Ref_Record record; diff --git a/context_test.hpp b/context_test.hpp index d5cfa1bb..9c7219da 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -21,42 +21,43 @@ class contextTest : public Test { testStart("Context QntMatMal Op"); Context ctx; //inputs - auto a = + W_TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); - auto a_min = + W_TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); - auto a_max = + W_TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); - auto b = + W_TENSOR b = ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); - auto b_min = + W_TENSOR b_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); - auto b_max = + W_TENSOR b_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); // reference outputs - auto c = + W_TENSOR c = ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); - auto c_min = + W_TENSOR c_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); - auto c_max = + W_TENSOR c_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - auto out_c = ctx.add(new RamTensor(c.lock()->getShape())); - auto out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); - auto out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); + W_TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); + W_TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); + W_TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); TList inputs = {a, a_min, a_max, b, b_min, b_max}; TList outputs = {out_c, out_min, out_max}; - //set which tensors to keep alive - auto ref_c_rptr = c.lock(); - auto ref_min_rptr = c_min.lock(); - auto ref_max_rptr = c_max.lock(); - auto out_c_rptr = out_c.lock(); - auto out_min_rptr = out_min.lock(); - auto out_max_rptr = out_max.lock(); + //if you want tensors to be alive after .eval() + //copies of the share_pointer needs to be here + S_TENSOR ref_c_rptr = c.lock(); + S_TENSOR ref_min_rptr = c_min.lock(); + S_TENSOR ref_max_rptr = c_max.lock(); + S_TENSOR out_c_rptr = out_c.lock(); + S_TENSOR out_min_rptr = out_min.lock(); + S_TENSOR out_max_rptr = out_max.lock(); timer_start(); diff --git a/tensor.hpp b/tensor.hpp index e34b49f7..e9f023c7 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -17,6 +17,13 @@ // dbl, // }; +class Tensor; + +typedef std::shared_ptr S_TENSOR; +typedef std::weak_ptr W_TENSOR; +typedef vector TList; +typedef vector S_TList; + class uTensor { public: virtual void inFocus(){}; diff --git a/uTensorBase.hpp b/uTensorBase.hpp index ce1a893e..e8ea0bb1 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -3,9 +3,6 @@ #include "tensor.hpp" -typedef vector> TList; -typedef vector> S_TList; - //isType() https://stackoverflow.com/questions/9974596/how-to-check-whether-two-pointers-point-to-the-same-object-or-not //double dispatch From 7e278d44df37f698b7fdbd4d6400af669b472863 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Fri, 10 Nov 2017 15:10:57 +0900 Subject: [PATCH 44/80] Op should use resize() for output tensors; syntax updates --- MatrixOps.hpp | 3 +-- context.hpp | 4 ++-- context_test.hpp | 26 ++++++++++++++------------ tensor.hpp | 4 ++-- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 84e0a12c..7b07d972 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -174,8 +174,7 @@ void QuantizedMatMul2(S_TENSOR A, S_TENSOR B, S_TENSOR C, Shape c_shape; c_shape.push_back((A->getShape())[0]); c_shape.push_back((B->getShape())[1]); - //tensorChkAlloc2(C, c_shape); - //replace this with resize + C->resize(c_shape); const int32_t offset_a = FloatToQuantizedUnclamped( 0.0f, min_a, max_a); // NT: what 0 quantized to; depends on diff --git a/context.hpp b/context.hpp index f7437bfd3..5b327fca 100644 --- a/context.hpp +++ b/context.hpp @@ -42,7 +42,7 @@ class Context : public uTensor { //uint16_t getRef(); public: - W_TENSOR add(Tensor* t, uint8_t init_count = 0); + TENSOR add(Tensor* t, uint8_t init_count = 0); void push(Operator *op, TList &_inputs, TList &_outputs); int eval(void); @@ -51,7 +51,7 @@ class Context : public uTensor { } }; -W_TENSOR Context::add(Tensor* t, uint8_t init_count) { +TENSOR Context::add(Tensor* t, uint8_t init_count) { if(rTable.find(t) != rTable.end()) { ERR_EXIT("tensor pointer address already exist in rTable"); } diff --git a/context_test.hpp b/context_test.hpp index 9c7219da..2894a820 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -21,31 +21,33 @@ class contextTest : public Test { testStart("Context QntMatMal Op"); Context ctx; //inputs - W_TENSOR a = + TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); - W_TENSOR a_min = + TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); - W_TENSOR a_max = + TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); - W_TENSOR b = + TENSOR b = ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); - W_TENSOR b_min = + TENSOR b_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); - W_TENSOR b_max = + TENSOR b_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); // reference outputs - W_TENSOR c = + TENSOR c = ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); - W_TENSOR c_min = + TENSOR c_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); - W_TENSOR c_max = + TENSOR c_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - W_TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); - W_TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); - W_TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); + //we need default constructor here + //so we can get ride of the shapes here + TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); + TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); + TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); TList inputs = {a, a_min, a_max, b, b_min, b_max}; TList outputs = {out_c, out_min, out_max}; diff --git a/tensor.hpp b/tensor.hpp index e9f023c7..bf070c62 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -20,8 +20,8 @@ class Tensor; typedef std::shared_ptr S_TENSOR; -typedef std::weak_ptr W_TENSOR; -typedef vector TList; +typedef std::weak_ptr TENSOR; +typedef vector TList; typedef vector S_TList; class uTensor { From 1b1a0711dce7f6b4a2dd215dbaed7504d33f249d Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Fri, 10 Nov 2017 21:57:55 +0900 Subject: [PATCH 45/80] RefCountTest bugged --- MathOps.hpp | 27 ++++++++++++++++++++++++++ context.hpp | 2 +- context_test.hpp | 49 +++++++++++++++++++++++++----------------------- tensor.hpp | 2 +- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/MathOps.hpp b/MathOps.hpp index 2fb5749e..c3694ec4 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -201,4 +201,31 @@ void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { out_index++; } } + +template +void Add(S_TENSOR input, S_TENSOR input2, S_TENSOR out) { + const TIn* p_in = input->read(0, 0); + const TIn* p_in2 = input2->read(0, 0); + + //auto shape + out->resize(input->getShape()); + + TOut* p_out = out->write(0, 0); + + const uint32_t size = out->getSize(); + for (uint32_t i = 0; i < size; i++) { + p_out[i] = p_in[i] + p_in2[i]; + } +} + +class AddOp : public Operator{ +public: + AddOp() { + n_inputs = 2; + n_outputs = 1; + } + virtual void compute() override { + Add(inputs[0], inputs[1], outputs[0]); + } +}; #endif // UTENSOR_MATH_OPS diff --git a/context.hpp b/context.hpp index 5b327fca..045e3969 100644 --- a/context.hpp +++ b/context.hpp @@ -68,7 +68,7 @@ TENSOR Context::add(Tensor* t, uint8_t init_count) { rTable[t] = record; - auto wptr = _sptr; + TENSOR wptr = _sptr; return wptr; } diff --git a/context_test.hpp b/context_test.hpp index 2894a820..f2cfc9dc 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -14,12 +14,12 @@ class contextTest : public Test { TensorIdxImporter t_import; + Context ctx; public: void MatMalTest(void) { testStart("Context QntMatMal Op"); - Context ctx; //inputs TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); @@ -72,49 +72,52 @@ class contextTest : public Test { passed(result == 0); } -/* + void RefCountTest(void) { testStart("Context Ref Count"); + timer_start(); //inputs - Tensor* a = new RamTensor({1,1,1}); - Tensor* b = new RamTensor({1,1,1}); - Tensor* c = new RamTensor({1,1,1}); - - // reference outputs - Tensor* out = new RamTensor({1,1,1}); - out->keep_alive(true); + TENSOR a = ctx.add(new RamTensor({1,1,1})); + TENSOR b = ctx.add(new RamTensor({1,1,1})); + TENSOR c = ctx.add(new RamTensor({1,1,1})); + //init values + *(a.lock()->write(0, 0)) = 1; + *(b.lock()->write(0, 0)) = 1; + *(c.lock()->write(0, 0)) = 1; - Context ctx; - timer_start(); + // reference outputs + TENSOR out = ctx.add(new RamTensor({1,1,1})); + S_TENSOR shr_out = out.lock(); - TList input0 = {a, b}; - TList output0 = {c}; + TList inputs0 = {a, b}; + //TList outputs0 = {c}; //2 + TList outputs0 = {out}; ctx.push(new AddOp(), inputs0, outputs0); - TList input1 = {c, a}; - TList output1 = {b}; - ctx.push(new AddOp(), inputs1, outputs1); + // TList inputs1 = {c, a}; + // TList outputs1 = {b}; //3 + // ctx.push(new AddOp(), inputs1, outputs1); - TList input2 = {a, b}; - TList output2 = {out}; - ctx.push(new AddOp(), inputs2, outputs2); + // TList inputs2 = {a, b}; + // TList outputs2 = {out}; //4 + // ctx.push(new AddOp(), inputs2, outputs2); ctx.eval(); timer_stop(); - if(a != nullptr || b != nullptr || c != nullptr) { + if(a.lock() || b.lock() || c.lock()) { failed(); return; } - passed(out->read(0, 0) != 1); + passed(*(shr_out->read(0, 0)) != 4); } - */ + void runAll(void) { MatMalTest(); - //RefCountTest(); + RefCountTest(); } }; diff --git a/tensor.hpp b/tensor.hpp index bf070c62..c93411ee 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -88,7 +88,7 @@ class Tensor : public uTensor { } template - void resize(std::vector& v) { + void resize(std::vector v) { uint32_t size = 0; s->shape.clear(); for (auto i : v) { From 12f4d9b69370577299226e02c69e7fdeb0c6edb7 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sat, 11 Nov 2017 01:59:17 +0900 Subject: [PATCH 46/80] ref counting seems to be working; added support for UBLOX_EVK_ODIN_W2 --- context.hpp | 2 ++ context_test.hpp | 20 ++++++++++---------- mbed_app.json | 6 ++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/context.hpp b/context.hpp index 045e3969..50548928 100644 --- a/context.hpp +++ b/context.hpp @@ -165,6 +165,8 @@ int Context::eval(void) { } + op_list.clear(); + return 0; } diff --git a/context_test.hpp b/context_test.hpp index f2cfc9dc..a68e8885 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -91,26 +91,26 @@ class contextTest : public Test { S_TENSOR shr_out = out.lock(); TList inputs0 = {a, b}; - //TList outputs0 = {c}; //2 - TList outputs0 = {out}; + TList outputs0 = {c}; //2 ctx.push(new AddOp(), inputs0, outputs0); - // TList inputs1 = {c, a}; - // TList outputs1 = {b}; //3 - // ctx.push(new AddOp(), inputs1, outputs1); + TList inputs1 = {c, a}; + TList outputs1 = {b}; //3 + ctx.push(new AddOp(), inputs1, outputs1); - // TList inputs2 = {a, b}; - // TList outputs2 = {out}; //4 - // ctx.push(new AddOp(), inputs2, outputs2); + TList inputs2 = {a, b}; + TList outputs2 = {out}; //4 + ctx.push(new AddOp(), inputs2, outputs2); ctx.eval(); timer_stop(); - if(a.lock() || b.lock() || c.lock()) { + if(a.lock() || b.lock() || c.lock() || !out.lock()) { failed(); return; } - passed(*(shr_out->read(0, 0)) != 4); + int result = *(shr_out->read(0, 0)); + passed(result == 4); } diff --git a/mbed_app.json b/mbed_app.json index b9ba10d8..98ecd908 100644 --- a/mbed_app.json +++ b/mbed_app.json @@ -33,6 +33,12 @@ "sd-miso": "D12", "sd-clk": "D13", "sd-cs": "D10" + }, + "UBLOX_EVK_ODIN_W2": { + "sd-mosi": "D11", + "sd-miso": "D12", + "sd-clk": "D13", + "sd-cs": "D9" } } } \ No newline at end of file From 514ebc3261b2c5bdb8df5762ab9fadf36013124a Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 11 Nov 2017 15:53:44 +0800 Subject: [PATCH 47/80] 1. make copy and copy assignment constructor private 2. add new default constructor 3. extract malloc from predlayer for simplicity --- deep_mnist_mlp.hpp | 33 ++++++++++++++++----------------- tensor.hpp | 20 ++++++++++++++++---- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index 5c61597e..af30ac02 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -104,17 +104,7 @@ void ReluLayer(Tensor* x, Tensor* x_min, Tensor* x_max, template void PredLayer(Tensor* input, Tensor* input_min, - Tensor* input_max, Tensor** output) { - TensorIdxImporter t_import; - Tensor* w = t_import.ubyte_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_quint8_const_0.idx"); - Tensor* w_min = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_min_0.idx"); - Tensor* w_max = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_max_0.idx"); + Tensor* input_max, Tensor** output, Tensor* w, Tensor* w_min, Tensor* w_max, Tensor* bias, Tensor* dim) { Tensor* out_c = nullptr; Tensor* matmul_out_min = new RamTensor({1}); @@ -157,17 +147,12 @@ void PredLayer(Tensor* input, Tensor* input_min, delete reqnt_out_max; //Add - Tensor* bias = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx"); Tensor* output_z = nullptr; Add(deqnt_out, bias, &output_z); delete deqnt_out; delete bias; //ArgMax - Tensor* dim = t_import.int_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" - "y_pred-dimension_0.idx"); ArgMax(output_z, dim, output); } @@ -238,9 +223,23 @@ int runMLP(string inputIdxFile) { ReluLayer(relu_output, relu_min, relu_max, w, w_min, w_max, b, &relu_output2, &relu_min2, &relu_max2); + Tensor* w2 = t_import.ubyte_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_quint8_const_0.idx"); + Tensor* w2_min = t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_min_0.idx"); + Tensor* w2_max = t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_max_0.idx"); + Tensor* bias = t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx"); + Tensor* dim = t_import.int_import( + "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" + "y_pred-dimension_0.idx"); Tensor* pred = nullptr; - PredLayer(relu_output2, relu_min2, relu_max2, &pred); + PredLayer(relu_output2, relu_min2, relu_max2, &pred, w2, w2_min, w2_max, bias, dim); Tensor* ref_out = t_import.float_import( diff --git a/tensor.hpp b/tensor.hpp index 4d6c5502..ef214e2f 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -87,11 +87,17 @@ class TensorBase { class Tensor : public uTensor { virtual void* read(size_t offset, size_t ele) { return nullptr; } virtual void* write(size_t offset, size_t ele) { return nullptr; } + Tensor(const Tensor&); + Tensor& operator=(const Tensor&); protected: std::shared_ptr s; // short for states public: - Tensor(void) {} + Tensor(void) { + s = std::make_shared(); + s->total_size = 0; + s->data = nullptr; + } // returns how far a given dimension is apart size_t getStride(size_t dim_index) { @@ -135,13 +141,16 @@ class Tensor : public uTensor { size *= i; } } + if (size == s->total_size) { return; - } else { + } + + if (s->data){ free(s->data); - s->total_size = size; - s->data = (void*)malloc(unit_size() * s->total_size); } + s->total_size = size; + s->data = (void*)malloc(unit_size() * s->total_size); if (s->data == NULL) ERR_EXIT("ran out of memory for %lu malloc", unit_size() * s->total_size); @@ -233,6 +242,9 @@ class RamTensor : public Tensor { return sizeof(T); } ~RamTensor() {} + private: + RamTensor(const RamTensor&); + RamTensor& operator=(const RamTensor&); }; From 65c3baaf270128e33d87181aefdaae999410ad1b Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 11 Nov 2017 23:33:19 +0800 Subject: [PATCH 48/80] 1. make arrayops pass test 2. fix context_test include error --- ArrayOps.hpp | 62 ++++++++++++++++++++++++++++++-------- ArrayTests.hpp | 78 +++++++++++++++++++++++++++++++----------------- context_test.hpp | 3 +- main.cpp | 20 +++++-------- 4 files changed, 109 insertions(+), 54 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index b5208c69..a67de0d9 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -5,13 +5,14 @@ #include #include "uTensor_util.hpp" #include "quantization_utils.hpp" +#include "uTensorBase.hpp" //T = inferred //mode = MIN_FIRST //name = unspecified template -void QuantizeV2(Tensor* input, Tensor* _min_range, Tensor* _max_range, - Tensor* output, Tensor* output_min, Tensor* output_max) { +void QuantizeV2(S_TENSOR input, S_TENSOR _min_range, S_TENSOR _max_range, + S_TENSOR output, S_TENSOR output_min, S_TENSOR output_max) { float input_min_range = *(_min_range->read(0, 0)); float input_max_range = *(_max_range->read(0, 0)); @@ -48,19 +49,32 @@ void QuantizeV2(Tensor* input, Tensor* _min_range, Tensor* _max_range, } +class QuantizeV2Op : public Operator { + public: + QuantizeV2Op() { + n_inputs = 3; + n_outputs = 3; + } + + virtual void compute() override { + QuantizeV2(inputs[0], inputs[1], inputs[2], + outputs[0], outputs[1], outputs[2]); + } +}; + //mode = MIN_FIRST //name = unspecified //dequantize_op.cc: 87 template -void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor** output) { +void dequantize(S_TENSOR input, S_TENSOR min_range, S_TENSOR max_range, S_TENSOR output) { float min = *(min_range->read(0, 0)); float max = *(max_range->read(0, 0)); //auto tensor allocation Shape out_shape; - tensorChkAlloc(output, input->getShape()); + output->resize(input->getShape()); const T* input_ptr = input->read(0, 0); - float* output_ptr = (*output)->write(0, 0); + float* output_ptr = output->write(0, 0); //quantization_utils.h: 771 QuantizedToFloatStruct q2f(min, max); @@ -71,6 +85,19 @@ void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor** ou output_ptr[i] = ((q2f.range_min_rounded - q2f.lowest_quantized() * q2f.range_scale) + \ val * q2f.range_scale); } +} +class Dequantize2Op : public Operator { + public: + Dequantize2Op() { + n_inputs = 3; + n_outputs = 1; + } + + virtual void compute() override { + dequantize(inputs[0], inputs[1], inputs[2], + outputs[0]); + } +}; /* number_of_steps = 1 << (# of bits in T) range_adjust = number_of_steps / (number_of_steps - 1) @@ -84,7 +111,6 @@ void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor** ou // output_ptr[i] = QuantizedToFloat(input_ptr[i], min, max); // } -} //Pre: //output.getShape == shape, or @@ -94,7 +120,7 @@ void dequantize(Tensor* input, Tensor* min_range, Tensor* max_range, Tensor** ou ///NT: This Op hasn't been tested extensively. We will have to increase the test-coverage for this function. template -void reshape(Tensor* input, Tensor* shape, Tensor** output) { +void reshape(S_TENSOR input, S_TENSOR shape, S_TENSOR output) { Shape dim; //validating and inferring dimensions @@ -125,20 +151,30 @@ void reshape(Tensor* input, Tensor* shape, Tensor** output) { const T* input_ptr = input->read(0, 0); //check if the output dim is valid - if(*output && (*output)->getSize() > 0 && dim == (*output)->getShape()) { + if(output && output->getSize() > 0 && dim == output->getShape()) { //copy - T* output_ptr = (*output)->write(0, 0); + T* output_ptr = output->write(0, 0); std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); - } else if(*output && (*output)->getSize() > 0 && dim != (*output)->getShape()) { + } else if(output && output->getSize() > 0 && dim != output->getShape()) { ERR_EXIT("output tensor dimension mismatches supplied shape") } else { //construct a new tensor and copy - Tensor* tmp = new RamTensor(dim); - T* output_ptr = tmp->write(0, 0); + output->resize(dim); + T* output_ptr = output->write(0, 0); std::memcpy(output_ptr, input_ptr, (std::size_t) input->getSize_in_bytes()); - *output = tmp; } } +class ReshapeOp : public Operator { + public: + ReshapeOp() { + n_inputs = 2; + n_outputs = 1; + } + + virtual void compute() override { + reshape(inputs[0], inputs[1], outputs[0]); + } +}; #endif //UTENSOR_ARRAY_OPS diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 08949ae6..7a2a1d5a 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -4,30 +4,43 @@ #include "ArrayOps.hpp" #include "test.hpp" #include "tensorIdxImporter.hpp" +#include "context.hpp" +#include "tensor.hpp" class ArrayOpsTest : public Test { + TensorIdxImporter t_import; + Context ctx; public: void quantize_v2Test(void) { testStart("quantize_v2"); - TensorIdxImporter t_import; //reference inputs /Users/neitan01/Documents/mbed/uTensor.git/TESTS/scripts/PRE-GEN/qA - Tensor* b = t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx"); - Tensor* b_min = t_import.float_import("/fs/testData/qB/in/Min_1_0.idx"); - Tensor* b_max = t_import.float_import("/fs/testData/qB/in/Max_1_0.idx"); + TENSOR b = ctx.add(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx")); + TENSOR b_min = ctx.add(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx")); + TENSOR b_max = ctx.add(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx")); //reference outputs - Tensor* b_q_ref = t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx"); - Tensor* b_min_q_ref = t_import.float_import("/fs/testData/qB/out/qB_1.idx"); - Tensor* b_max_q_ref = t_import.float_import("/fs/testData/qB/out/qb_2.idx"); - - Tensor* b_q = new RamTensor(b_q_ref->getShape()); - Tensor* b_min_q = new RamTensor(b_min_q_ref->getShape()); - Tensor* b_max_q = new RamTensor(b_max_q_ref->getShape()); + TENSOR b_q_ref = ctx.add(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx")); + TENSOR b_min_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/out/qB_1.idx")); + TENSOR b_max_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/out/qb_2.idx")); + + TENSOR b_q = ctx.add(new RamTensor(b_q_ref.lock()->getShape())); + TENSOR b_min_q = ctx.add(new RamTensor(b_min_q_ref.lock()->getShape())); + TENSOR b_max_q = ctx.add(new RamTensor(b_max_q_ref.lock()->getShape())); + + TList inputs = {b, b_min, b_max}; + TList outputs = {b_q, b_min_q, b_max_q}; + S_TENSOR out_b_q = b_q.lock(); + S_TENSOR out_b_min_q = b_min_q.lock(); + S_TENSOR out_b_max_q = b_max_q.lock(); + S_TENSOR ref_b_q = b_q_ref.lock(); + S_TENSOR ref_b_min_q = b_min_q_ref.lock(); + S_TENSOR ref_b_max_q = b_max_q_ref.lock(); //Implementation goes here timer_start(); - QuantizeV2(b, b_min, b_max, b_q, b_min_q, b_max_q); + ctx.push(new QuantizeV2Op(), inputs, outputs); + ctx.eval(); timer_stop(); // printf("refMin is : %f \r\n", *(b_min_q_ref.getPointer({0}))); @@ -35,31 +48,36 @@ class ArrayOpsTest : public Test { // printf("diff : output(%f), outMin(%f), outMax(%f)\r\n", // meanPercentErr(b_q_ref, b_q), meanPercentErr(b_min_q_ref, b_min_q), meanPercentErr(b_max_q_ref, b_max_q)); - double result = meanPercentErr(b_q_ref, b_q) + meanPercentErr(b_min_q_ref, b_min_q) + meanPercentErr(b_max_q_ref, b_max_q); + double result = meanPercentErr(ref_b_q.get(), out_b_q.get()) + meanPercentErr(ref_b_min_q.get(), out_b_min_q.get()) + meanPercentErr(ref_b_max_q.get(), out_b_max_q.get()); //passed(result < 0.0001); passed(result == 0); } void dequantizeTest(void) { testStart("dequantize"); - TensorIdxImporter t_import; //reference inputs - Tensor* a = t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx"); - Tensor* a_min = t_import.float_import("/fs/testData/deQ/in/rQ_1.idx"); - Tensor* a_max = t_import.float_import("/fs/testData/deQ/in/rQ_2.idx"); + TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx")); + TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx")); + TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx")); //reference outputs - Tensor* out_ref = t_import.float_import("/fs/testData/deQ/out/deQ_0.idx"); + TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx")); //modify the checks below: - Tensor* out = new RamTensor(out_ref->getShape()); + TENSOR out = ctx.add(new RamTensor(out_ref.lock()->getShape())); + TList inputs = {a, a_min, a_max}; + TList outputs = {out}; + + S_TENSOR out_val = out.lock(); + S_TENSOR ref_out = out_ref.lock(); timer_start(); - dequantize(a, a_min, a_max, &out); + ctx.push(new Dequantize2Op(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(out_ref, out); + double result = meanPercentErr(out_val.get(), ref_out.get()); //passed(result < 0.0001); passed(result == 0); } @@ -69,20 +87,26 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - Tensor* ref_a = t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx"); - Tensor* ref_dim = t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx"); + TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx")); + TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx")); //reference outputs - Tensor* out_ref = t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx"); + TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx")); //modify the checks below: - Tensor* out = new RamTensor(out_ref->getShape()); + TENSOR out = ctx.add(new RamTensor(out_ref.lock()->getShape())); + S_TENSOR out_val = out.lock(); + S_TENSOR ref_out = out_ref.lock(); + + TList inputs = {ref_a, ref_dim}; + TList outputs = {out}; timer_start(); - reshape(ref_a, ref_dim, &out); + ctx.push(new ReshapeOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(out_ref, out); + double result = meanPercentErr(out_val.get(), ref_out.get()); //passed(result < 0.0001); passed(result == 0); } diff --git a/context_test.hpp b/context_test.hpp index a68e8885..eaffcb26 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -7,6 +7,7 @@ #include "context.hpp" #include "tensorIdxImporter.hpp" #include "MatrixOps.hpp" +#include "MathOps.hpp" #include "test.hpp" @@ -121,4 +122,4 @@ class contextTest : public Test { } }; -#endif // UTENSOR_IDX_IMPORTER_TESTS \ No newline at end of file +#endif // UTENSOR_IDX_IMPORTER_TESTS diff --git a/main.cpp b/main.cpp index 8789333c..7440092f 100644 --- a/main.cpp +++ b/main.cpp @@ -6,14 +6,8 @@ #include "tensor.hpp" #include "tensorIdxImporterTests.hpp" #include "context.hpp" -#include "MatrixTests.hpp" #include "ArrayTests.hpp" -#include "MathTests.hpp" -#include "NnTests.hpp" -#include "tensor_test.hpp" #include "context_test.hpp" -#include "mlp_test.hpp" -#include "deep_mnist_mlp.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -28,26 +22,26 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); - int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - printf("prediction: %d\r\n", prediction); +/* int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + printf("prediction: %d\r\n", prediction);*/ idxImporterTest idxTest; idxTest.runAll(); - printf("running matrix test ...\r\n"); +/* printf("running matrix test ...\r\n"); matrixOpsTest matrixTests; - matrixTests.runAll(); + matrixTests.runAll();*/ printf("IDX import:\r\n"); idxTest.printSummary(); printf("Matrix: \r\n"); - matrixTests.printSummary(); +// matrixTests.printSummary(); ArrayOpsTest arrayTests; arrayTests.runAll(); printf("Array: \r\n"); arrayTests.printSummary(); - printf("Math: \r\n"); + /*printf("Math: \r\n"); MathOpsTest mathTests; mathTests.runAll(); printf("Math result...\r\n"); @@ -83,7 +77,7 @@ int main(int argc, char** argv) { tensorTest tenT; tenT.runAll(); printf("tensor result: \r\n"); - tenT.printSummary(); + tenT.printSummary();*/ //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) From 138285799f58c40ce55184ac2aba2a6d791f1850 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 12 Nov 2017 13:20:05 +0800 Subject: [PATCH 49/80] 1. make math op test pass 2. because the argmax and add op have different type in test function, therefore should use template function --- MathOps.hpp | 97 +++++++++++++--- MathTests.hpp | 315 +++++++++++++++++++++++++++++--------------------- main.cpp | 15 +-- 3 files changed, 271 insertions(+), 156 deletions(-) diff --git a/MathOps.hpp b/MathOps.hpp index c3694ec4..72f667c3 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -4,6 +4,7 @@ #include #include "quantization_utils.hpp" #include "tensor.hpp" +#include "uTensorBase.hpp" template void CalculateUsedRange(Tensor* input, int32_t* used_min_quan, @@ -23,14 +24,14 @@ void CalculateUsedRange(Tensor* input, int32_t* used_min_quan, *used_max_quan = maxmum; } template -void Requantization_Range(Tensor* input, Tensor* min, Tensor* max, - Tensor* out_min, Tensor* out_max) { +void Requantization_Range(S_TENSOR input, S_TENSOR min, S_TENSOR max, + S_TENSOR out_min, S_TENSOR out_max) { const float input_min = *(min->read(0, 0)); const float input_max = *(max->read(0, 0)); int32_t used_min_quan; int32_t used_max_quan; - CalculateUsedRange(input, &used_min_quan, &used_max_quan); + CalculateUsedRange(input.get(), &used_min_quan, &used_max_quan); const float used_min = std::min(0.0f, QuantizedToFloat(used_min_quan, input_min, input_max)); @@ -42,10 +43,22 @@ void Requantization_Range(Tensor* input, Tensor* min, Tensor* max, *c_max = used_max; } +class Requantization_RangeOp : public Operator { + public: + Requantization_RangeOp() { + n_inputs = 3; + n_outputs = 2; + } + + virtual void compute() override { + Requantization_Range(inputs[0], inputs[1], + inputs[2], outputs[0], outputs[1]); + } +}; template -void Requantize(Tensor* input, Tensor* in_min, Tensor* in_max, - Tensor* r_min, Tensor* r_max, Tensor* output, - Tensor* out_min, Tensor* out_max) { +void Requantize(S_TENSOR input, S_TENSOR in_min, S_TENSOR in_max, + S_TENSOR r_min, S_TENSOR r_max, S_TENSOR output, + S_TENSOR out_min, S_TENSOR out_max) { const float input_min = in_min->read(0, 0)[0]; const float input_max = in_max->read(0, 0)[0]; const float r_output_min = r_min->read(0, 0)[0]; @@ -65,6 +78,21 @@ void Requantize(Tensor* input, Tensor* in_min, Tensor* in_max, *v_out_max = r_output_max; } + +class RequantizeOp : public Operator { + public: + RequantizeOp() { + n_inputs = 5; + n_outputs = 3; + } + + virtual void compute() override { + Requantize(inputs[0], inputs[1], + inputs[2], inputs[3], inputs[4], + outputs[0], outputs[1], outputs[2]); + } +}; + template void Add(Tensor* input, Tensor* input2, Tensor** out) { const TIn* p_in = input->read(0, 0); @@ -81,7 +109,7 @@ void Add(Tensor* input, Tensor* input2, Tensor** out) { } } template -void Min(Tensor* input, Tensor* dim, Tensor* out) { +void Min(S_TENSOR input, S_TENSOR dim, S_TENSOR out) { const TIn* p_in = input->read(0, 0); const Td* p_in2 = dim->read(0, 0); TOut* p_out = out->write(0, 0); @@ -112,8 +140,19 @@ void Min(Tensor* input, Tensor* dim, Tensor* out) { } } +class MinOp : public Operator { + public: + MinOp() { + n_inputs = 2; + n_outputs = 1; + } + + virtual void compute() override { + Min(inputs[0], inputs[1], outputs[0]); + } +}; template -void Max(Tensor* input, Tensor* dim, Tensor* out) { +void Max(S_TENSOR input, S_TENSOR dim, S_TENSOR out) { const TIn* p_in = input->read(0, 0); const Td* p_in2 = dim->read(0, 0); TOut* p_out = out->write(0, 0); @@ -144,15 +183,27 @@ void Max(Tensor* input, Tensor* dim, Tensor* out) { } } +class MaxOp : public Operator { + public: + MaxOp() { + n_inputs = 2; + n_outputs = 1; + } + + virtual void compute() override { + Max(inputs[0], inputs[1], outputs[0]); + } +}; + template -void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { +void ArgMax(S_TENSOR input, S_TENSOR dim, S_TENSOR out) { int dim_reduce = *(dim->read(0, 0)); Shape outShape = input->getShape(); uint32_t reduce_dim_size = outShape[dim_reduce]; outShape.erase(outShape.begin() + dim_reduce); // construct the permute vector - vector permute; + std::vector permute; for (uint8_t i = 0; i < input->getShape().size(); i++) { permute.push_back(i); } @@ -160,15 +211,13 @@ void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { permute.erase(permute.begin() + dim_reduce); // check dimensionality - if (*out && (*out)->getSize() != 0 && (*out)->getShape() != outShape) { + if (out && out->getSize() != 0 && out->getShape() != outShape) { ERR_EXIT("output shape mismatch"); } // allocate output tensor if empty - if (*out && (*out)->getSize() == 0) { - (*out)->init(outShape); - } else { - *out = new RamTensor(outShape); + if (out && out->getSize() == 0) { + out->resize(outShape); } @@ -182,7 +231,7 @@ void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { permuteIndexTransform trans(vOutShape, permute); const TIn* inPtr = input->read(0, 0); - TOut* outPtr = (*out)->write(0, 0); + TOut* outPtr = out->write(0, 0); size_t out_index = 0; @@ -202,6 +251,19 @@ void ArgMax(Tensor* input, Tensor* dim, Tensor** out) { } } + +template +class ArgMaxOp : public Operator { + public: + ArgMaxOp() { + n_inputs = 2; + n_outputs = 1; + } + + virtual void compute() override { + ArgMax(inputs[0], inputs[1], outputs[0]); + } +}; template void Add(S_TENSOR input, S_TENSOR input2, S_TENSOR out) { const TIn* p_in = input->read(0, 0); @@ -218,6 +280,7 @@ void Add(S_TENSOR input, S_TENSOR input2, S_TENSOR out) { } } +template class AddOp : public Operator{ public: AddOp() { @@ -225,7 +288,7 @@ class AddOp : public Operator{ n_outputs = 1; } virtual void compute() override { - Add(inputs[0], inputs[1], outputs[0]); + Add(inputs[0], inputs[1], outputs[0]); } }; #endif // UTENSOR_MATH_OPS diff --git a/MathTests.hpp b/MathTests.hpp index c649ac59..af65cb85 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -4,126 +4,152 @@ #include "MathOps.hpp" #include "tensorIdxImporter.hpp" #include "test.hpp" +#include "context.hpp" class MathOpsTest : public Test { + TensorIdxImporter t_import; + Context ctx; public: void requantization_rangeTest(void) { testStart("requantization_range"); - TensorIdxImporter t_import; // reference inputs - Tensor* a = - t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx"); - Tensor* a_min = - t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx"); - Tensor* a_max = - t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx"); + TENSOR a = + ctx.add(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx")); + TENSOR a_min = + ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx")); + TENSOR a_max = + ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx")); // reference outputs - Tensor* ref_min = - t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx"); - Tensor* ref_max = - t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx"); + TENSOR ref_min = + ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx")); + TENSOR ref_max = + ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx")); // Implementation goes here // modify the checks below: - Tensor* out_min = new RamTensor(ref_min->getShape()); - Tensor* out_max = new RamTensor(ref_max->getShape()); + TENSOR out_min = ctx.add(new RamTensor(ref_min.lock()->getShape())); + TENSOR out_max = ctx.add(new RamTensor(ref_max.lock()->getShape())); + TList inputs = {a, a_min, a_max}; + TList outputs = {out_min, out_max}; + + S_TENSOR ref_min_val = ref_min.lock(); + S_TENSOR ref_max_val = ref_max.lock(); + S_TENSOR out_min_val = out_min.lock(); + S_TENSOR out_max_val = out_max.lock(); + timer_start(); - Requantization_Range(a, a_min, a_max, out_min, out_max); + ctx.push(new Requantization_RangeOp(), inputs, outputs); + ctx.eval(); timer_stop(); double result = - meanPercentErr(ref_min, out_min) + meanPercentErr(ref_max, out_max); + meanPercentErr(ref_min_val.get(), out_min_val.get()) + meanPercentErr(ref_max_val.get(), out_max_val.get()); // passed(result < 0.0001); passed(result == 0); } void requantizeTest(void) { testStart("requantize"); - TensorIdxImporter t_import; // reference inputs - Tensor* a = t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx"); - Tensor* a_min = - t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx"); - Tensor* a_max = - t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx"); - Tensor* r_a_min = - t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx"); - Tensor* r_a_max = - t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx"); + TENSOR a = ctx.add(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx")); + TENSOR a_min = + ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx")); + TENSOR a_max = + ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx")); + TENSOR r_a_min = + ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx")); + TENSOR r_a_max = + ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx")); // tf.quint8 // reference outputs - Tensor* ref_a_q = - t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx"); - Tensor* ref_a_min = - t_import.float_import("/fs/testData/rQ/out/rQ_1.idx"); - Tensor* ref_a_max = - t_import.float_import("/fs/testData/rQ/out/rQ_2.idx"); + TENSOR ref_a_q = + ctx.add(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx")); + TENSOR ref_a_min = + ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx")); + TENSOR ref_a_max = + ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx")); // modify the checks below: - Tensor* a_q = new RamTensor(ref_a_q->getShape()); - Tensor* a_min_q = new RamTensor(ref_a_min->getShape()); - Tensor* a_max_q = new RamTensor(ref_a_max->getShape()); - + TENSOR a_q = ctx.add(new RamTensor(ref_a_q.lock()->getShape())); + TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min.lock()->getShape())); + TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max.lock()->getShape())); + + TList inputs = {a, a_min, a_max, r_a_min, r_a_max}; + TList outputs = {a_q, a_min_q, a_max_q}; + + S_TENSOR ref_a = ref_a_q.lock(); + S_TENSOR out_a = a_q.lock(); + S_TENSOR ref_min = ref_a_min.lock(); + S_TENSOR out_min = a_min_q.lock(); + S_TENSOR ref_max = ref_a_max.lock(); + S_TENSOR out_max = a_max_q.lock(); // Implementation goes here timer_start(); - Requantize(a, a_min, a_max, r_a_min, r_a_max, - a_q, a_min_q, a_max_q); + ctx.push(new RequantizeOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_a_q, a_q) + - meanPercentErr(ref_a_min, a_min_q) + - meanPercentErr(ref_a_max, a_max_q); + double result = meanPercentErr(ref_a.get(), out_a.get()) + + meanPercentErr(ref_min.get(), out_min.get()) + + meanPercentErr(ref_max.get(), out_max.get()); // passed(result < 0.0001); passed(result == 0); } void requantizeTest2(void) { testStart("requantize2"); - TensorIdxImporter t_import; // reference inputs - Tensor* a = t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx"); - Tensor* a_min = - t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx"); - Tensor* a_max = - t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx"); - Tensor* r_a_min = - t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx"); - Tensor* r_a_max = - t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx"); + TENSOR a = ctx.add(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx")); + TENSOR a_min = + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx")); + TENSOR a_max = + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx")); + TENSOR r_a_min = + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx")); + TENSOR r_a_max = + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx")); // tf.quint8 // reference outputs - Tensor* ref_a_q = - t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx"); - Tensor* ref_a_min = - t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx"); - Tensor* ref_a_max = - t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx"); + TENSOR ref_a_q = + ctx.add(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx")); + TENSOR ref_a_min = + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx")); + TENSOR ref_a_max = + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx")); // modify the checks below: - Tensor* a_q = new RamTensor(ref_a_q->getShape()); - Tensor* a_min_q = new RamTensor(ref_a_min->getShape()); - Tensor* a_max_q = new RamTensor(ref_a_max->getShape()); + TENSOR a_q = ctx.add(new RamTensor(ref_a_q.lock()->getShape())); + TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min.lock()->getShape())); + TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max.lock()->getShape())); + TList inputs = {a, a_min, a_max, r_a_min, r_a_max}; + TList outputs = {a_q, a_min_q, a_max_q}; + S_TENSOR ref_val = ref_a_q.lock(); + S_TENSOR ref_min = ref_a_min.lock(); + S_TENSOR ref_max = ref_a_max.lock(); + S_TENSOR out_val = a_q.lock(); + S_TENSOR out_min = a_min_q.lock(); + S_TENSOR out_max = a_max_q.lock(); // Implementation goes here timer_start(); - Requantize(a, a_min, a_max, r_a_min, r_a_max, - a_q, a_min_q, a_max_q); + ctx.push(new RequantizeOp(), inputs, outputs); + ctx.eval(); timer_stop(); double result; - if((result = meanPercentErr(ref_a_q, a_q)) != 0) { + if((result = meanPercentErr(ref_val.get(), out_val.get())) != 0) { printf("Requantize a_q failed (%.6f)\r\n", result); - unsigned char* ref_ptr = ref_a_q->write(0, 0); - unsigned char* test_ptr = a_q->write(0, 0); - for(uint32_t i = 0; i < ref_a_q->getSize(); i++) { + unsigned char* ref_ptr = ref_val.get()->write(0, 0); + unsigned char* test_ptr = out_val.get()->write(0, 0); + for(uint32_t i = 0; i < ref_val->getSize(); i++) { if(ref_ptr[i] != test_ptr[i]) { printf("%lu: %d != %d\r\n", i, ref_ptr[i], test_ptr[i]); } else { @@ -133,42 +159,47 @@ class MathOpsTest : public Test { } - if((result = meanPercentErr(ref_a_min, a_min_q)) != 0) printf("Requantize a_min_q failed (%.6f)\r\n", result); + if((result = meanPercentErr(ref_min.get(), out_min.get())) != 0) printf("Requantize a_min_q failed (%.6f)\r\n", result); - if((result = meanPercentErr(ref_a_max, a_max_q)) != 0) printf("Requantize a_max_q failed (%.6f)\r\n", result); + if((result = meanPercentErr(ref_max.get(), out_max.get())) != 0) printf("Requantize a_max_q failed (%.6f)\r\n", result); - result = meanPercentErr(ref_a_q, a_q) + - meanPercentErr(ref_a_min, a_min_q) + - meanPercentErr(ref_a_max, a_max_q); + result = meanPercentErr(ref_val.get(), out_val.get()) + + meanPercentErr(ref_min.get(), out_min.get()) + + meanPercentErr(ref_max.get(), out_max.get()); // passed(result < 0.0001); passed(result == 0); } void argmaxTest(void) { // NT: WIP do not use t_import int 64 here testStart("argmax"); - TensorIdxImporter t_import; // reference inputs - Tensor* ref_a = t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx"); - Tensor* ref_dim = t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx"); + TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx")); + TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx")); // reference outputs /// NT: FIXME: argmax outputs int64 tensor which isn't supported by /// int_import. - Tensor* ref_out = t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx"); + TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx")); // Implementation goes here // modify the checks below: - Tensor* out = new RamTensor(ref_out->getShape()); + TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + TList inputs = {ref_a, ref_dim}; + TList outputs = {out}; + + S_TENSOR ref_val = ref_out.lock(); + S_TENSOR out_val = out.lock(); timer_start(); - ArgMax(ref_a, ref_dim, &out); + ctx.push(new ArgMaxOp(), inputs, outputs); + ctx.eval(); timer_stop(); - Tensor* out_float = TensorCast(out); + Tensor* out_float = TensorCast(out_val.get()); - double result = meanPercentErr(ref_out, out_float); + double result = meanPercentErr(ref_val.get(), out_float); // passed(result < 0.0001); passed(result == 0); @@ -176,110 +207,130 @@ class MathOpsTest : public Test { void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here testStart("argmax2"); - Tensor* test_input = TensorConstant({10, 5}, 0.0f); - *(test_input->write(25, 0)) = 1.0f; - *(test_input->write(26, 0)) = 1.0f; - *(test_input->write(7, 0)) = 1.0f; - *(test_input->write(48, 0)) = 1.0f; - *(test_input->write(14, 0)) = 1.0f; - - Tensor* test_dim = new RamTensor({1}); - *(test_dim->write(0, 0)) = 0; - - Tensor* test_out_ref = new RamTensor({5}); - *(test_out_ref->write(0, 0)) = 5.0f; - *(test_out_ref->write(1, 0)) = 5.0f; - *(test_out_ref->write(2, 0)) = 1.0f; - *(test_out_ref->write(3, 0)) = 9.0f; - *(test_out_ref->write(4, 0)) = 2.0f; - - Tensor* test_out = new RamTensor(test_out_ref->getShape()); + TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f)); + *(test_input.lock()->write(25, 0)) = 1.0f; + *(test_input.lock()->write(26, 0)) = 1.0f; + *(test_input.lock()->write(7, 0)) = 1.0f; + *(test_input.lock()->write(48, 0)) = 1.0f; + *(test_input.lock()->write(14, 0)) = 1.0f; + + TENSOR test_dim = ctx.add(new RamTensor({1})); + *(test_dim.lock()->write(0, 0)) = 0; + + TENSOR test_out_ref = ctx.add(new RamTensor({5})); + *(test_out_ref.lock()->write(0, 0)) = 5.0f; + *(test_out_ref.lock()->write(1, 0)) = 5.0f; + *(test_out_ref.lock()->write(2, 0)) = 1.0f; + *(test_out_ref.lock()->write(3, 0)) = 9.0f; + *(test_out_ref.lock()->write(4, 0)) = 2.0f; + + TENSOR test_out = ctx.add(new RamTensor(test_out_ref.lock()->getShape())); + TList inputs = {test_input, test_dim}; + TList outputs = {test_out}; + S_TENSOR ref_val = test_out_ref.lock(); + S_TENSOR out_val = test_out.lock(); + timer_start(); - ArgMax(test_input, test_dim, &test_out); + ctx.push(new ArgMaxOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(test_out_ref, test_out); - // passed(result < 0.0001); - passed(result == 0); + double result = meanPercentErr(ref_val.get(), out_val.get()); + std::cout << result << std::endl; + passed(result < 0.0001); + //passed(result == 0); } void addTest(void) { testStart("add"); - TensorIdxImporter t_import; // reference inputs - Tensor* a = - t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx"); - Tensor* b = - t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx"); + TENSOR a = + ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx")); + TENSOR b = + ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx")); // reference outputs - Tensor* ref_out = - t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx"); + TENSOR ref_out = + ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx")); // Implementation goes here // modify the checks below: - Tensor* out = new RamTensor(ref_out->getShape()); + TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + S_TENSOR out_vxx = out.lock(); + S_TENSOR ref_vxx = ref_out.lock(); + TList inputs = {a, b}; + TList outputs = {out}; timer_start(); - Add(a, b, &out); + ctx.push(new AddOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_out, out); - // passed(result < 0.0001); - passed(result == 0); + double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); + std::cout << result << std::endl; + passed(result < 0.0001); + //passed(result == 0); } void minTest(void) { testStart("min"); - TensorIdxImporter t_import; // reference inputs - Tensor* a = - t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx"); - Tensor* dim = - t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx"); + TENSOR a = + ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx")); + TENSOR dim = + ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx")); // reference outputs - Tensor* ref_out = - t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx"); + TENSOR ref_out = + ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx")); // Implementation goes here // modify the checks below: - Tensor* out = new RamTensor(ref_out->getShape()); + TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + TList inputs = {a, dim}; + TList outputs = {out}; + S_TENSOR ref_val = ref_out.lock(); + S_TENSOR out_val = out.lock(); timer_start(); - Min(a, dim, out); + ctx.push(new MinOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_out, out); + double result = meanPercentErr(ref_val.get(), out_val.get()); // passed(result < 0.0001); passed(result == 0); } void maxTest(void) { testStart("max"); - TensorIdxImporter t_import; // reference inputs - Tensor* a = - t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx"); - Tensor* dim = - t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx"); + TENSOR a = + ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx")); + TENSOR dim = + ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx")); // reference outputs - Tensor* ref_out = - t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx"); + TENSOR ref_out = + ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx")); // Implementation goes here // modify the checks below: - Tensor *out = new RamTensor(ref_out->getShape()); + TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + TList inputs = {a, dim}; + TList outputs = {out}; + S_TENSOR ref_val = ref_out.lock(); + S_TENSOR out_val = out.lock(); timer_start(); - Max(a, dim, out); + ctx.push(new MaxOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_out, out); + double result = meanPercentErr(ref_val.get(), out_val.get()); // passed(result < 0.0001); passed(result == 0); } diff --git a/main.cpp b/main.cpp index 7440092f..ca6efc18 100644 --- a/main.cpp +++ b/main.cpp @@ -8,6 +8,7 @@ #include "context.hpp" #include "ArrayTests.hpp" #include "context_test.hpp" +#include "MathTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -41,13 +42,18 @@ int main(int argc, char** argv) { printf("Array: \r\n"); arrayTests.printSummary(); - /*printf("Math: \r\n"); + printf("Math: \r\n"); MathOpsTest mathTests; mathTests.runAll(); printf("Math result...\r\n"); mathTests.printSummary(); - printf("NnOpS: \r\n"); + printf("Context test: \r\n"); + contextTest ctxTest; + ctxTest.runAll(); + printf("Context result...\r\n"); + ctxTest.printSummary(); + /* printf("NnOpS: \r\n"); NnOpsTest nnTest; nnTest.runAll(); printf("Nn Ops result...\r\n"); @@ -59,11 +65,6 @@ int main(int argc, char** argv) { printf("Transformation result...\r\n"); tTest.printSummary(); - printf("Context test: \r\n"); - contextTest ctxTest; - ctxTest.runAll(); - printf("Context result...\r\n"); - ctxTest.printSummary(); // printf("mlp test: \r\n"); From c95da6e5f979545f7da3d4dc01abb40c02920fe5 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 12 Nov 2017 13:24:52 +0800 Subject: [PATCH 50/80] 1. add function have different type to mathtest, so make addop have type --- context_test.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/context_test.hpp b/context_test.hpp index eaffcb26..36562eed 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -93,15 +93,15 @@ class contextTest : public Test { TList inputs0 = {a, b}; TList outputs0 = {c}; //2 - ctx.push(new AddOp(), inputs0, outputs0); + ctx.push(new AddOp(), inputs0, outputs0); TList inputs1 = {c, a}; TList outputs1 = {b}; //3 - ctx.push(new AddOp(), inputs1, outputs1); + ctx.push(new AddOp(), inputs1, outputs1); TList inputs2 = {a, b}; TList outputs2 = {out}; //4 - ctx.push(new AddOp(), inputs2, outputs2); + ctx.push(new AddOp(), inputs2, outputs2); ctx.eval(); timer_stop(); From b385766f5e9cb279cd7493424aa0589ed4bffc7e Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 12 Nov 2017 15:12:33 +0900 Subject: [PATCH 51/80] transformation test seems passing --- main.cpp | 5 ++++- tensor_test.hpp | 39 ++++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/main.cpp b/main.cpp index ca6efc18..9d991c0e 100644 --- a/main.cpp +++ b/main.cpp @@ -9,6 +9,7 @@ #include "ArrayTests.hpp" #include "context_test.hpp" #include "MathTests.hpp" +#include "tensor_test.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -58,6 +59,8 @@ int main(int argc, char** argv) { nnTest.runAll(); printf("Nn Ops result...\r\n"); nnTest.printSummary(); + */ + printf("Transformation test: \r\n"); transTest tTest; @@ -66,7 +69,7 @@ int main(int argc, char** argv) { tTest.printSummary(); - +/* // printf("mlp test: \r\n"); // mlpTest mlpt; // mlpt.runAll(); diff --git a/tensor_test.hpp b/tensor_test.hpp index 14b84cd6..d5a07a72 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -26,26 +26,27 @@ class tensorTest : public Test { }; class transTest : public Test { + Context ctx; public: void runShapeTest() { bool res = false; + testStart("transtest"); for (int i = 0; i < 10; i++) { - testStart("transtest"); std::default_random_engine gen; vector tmp({2, 3, 4, 5}); - Tensor* inputTensor = new RamTensor(tmp); + TENSOR inputTensor = ctx.add(new RamTensor(tmp)); vector permute = {2, 3, 1, 0}; - vector g = inputTensor->getShape(); + vector g = inputTensor.lock()->getShape(); std::shuffle(permute.begin(), permute.end(), gen); - permuteIndexTransform trans(inputTensor->getShape(), permute); + permuteIndexTransform trans(inputTensor.lock()->getShape(), permute); - Tensor* output = new RamTensor(trans.getNewShape()); - vector s = output->getShape(); + TENSOR output = ctx.add(new RamTensor(trans.getNewShape())); + vector s = output.lock()->getShape(); res = testshape(g, s, permute); if (!res) { - passed(res); + break; } } passed(res); @@ -58,19 +59,19 @@ class transTest : public Test { vector output_1({2, 2, 3, 5, 6, 6, 4, 5, 7, 5, 1, 9, 1, 3, 2, 2, 5, 3, 3, 6, 3, 4, 9, 2}); - Tensor* inputTensor = new RamTensor({2, 3, 4}); + TENSOR inputTensor = ctx.add(new RamTensor({2, 3, 4})); vector permute = {0, 2, 1}; - permuteIndexTransform trans(inputTensor->getShape(), permute); + permuteIndexTransform trans(inputTensor.lock()->getShape(), permute); size_t out_index = 0; bool res = false; + testStart("test vec 1 for transform"); for (uint32_t i = 0; i < input_1.size(); i++) { - testStart("test vec 1 for transform"); out_index = trans[i]; res = testval(input_1[i], output_1[out_index]); if (!res) { - passed(res); + break; } } passed(res); @@ -84,15 +85,15 @@ class transTest : public Test { vector output_2({2, 1, 2, 3, 3, 2, 5, 2, 6, 5, 6, 3, 4, 3, 5, 6, 7, 3, 5, 4, 1, 9, 9, 2}); - Tensor* inputTensor2 = new RamTensor({2, 4, 3}); + TENSOR inputTensor2 = ctx.add(new RamTensor({2, 4, 3})); vector permute2 = {1, 2, 0}; - permuteIndexTransform trans2(inputTensor2->getShape(), permute2); + permuteIndexTransform trans2(inputTensor2.lock()->getShape(), permute2); + testStart("test vec 2 for transform"); for (uint32_t i = 0; i < input_2.size(); i++) { - testStart("test vec 2 for transform"); out_index = trans2[i]; res = testval(input_2[i], output_2[out_index]); if (!res) { - passed(res); + break; } } passed(res); @@ -104,15 +105,15 @@ class transTest : public Test { vector output_3({8, 2, 8, 1, 0, 3, 4, 6, 2, 6, 0, 6, 3, 9, 2, 7, 0, 7, 0, 4, 8, 9, 0, 4, 3, 6, 8}); - Tensor* inputTensor3 = new RamTensor({1, 3, 3, 3}); + TENSOR inputTensor3 = ctx.add(new RamTensor({1, 3, 3, 3})); vector permute3 = {0, 3, 2, 1}; - permuteIndexTransform trans3(inputTensor3->getShape(), permute3); + permuteIndexTransform trans3(inputTensor3.lock()->getShape(), permute3); + testStart("test vec 4d for transform"); for (uint32_t i = 0; i < input_3.size(); i++) { - testStart("test vec 4d for transform"); out_index = trans3[i]; res = testval(input_3[i], output_3[out_index]); if (!res) { - passed(res); + break; } } passed(res); From 0cdd92e8183dac37b50e0f6131eafa348d4aaecd Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 12 Nov 2017 15:40:49 +0900 Subject: [PATCH 52/80] NnTest passed --- NnOps.hpp | 16 +++++++++++-- NnTests.hpp | 65 ++++++++++++++++++++++++++++------------------------- main.cpp | 6 +++-- 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/NnOps.hpp b/NnOps.hpp index 33744c50..a8f1f456 100644 --- a/NnOps.hpp +++ b/NnOps.hpp @@ -5,8 +5,8 @@ #include "tensor.hpp" template -void Relu(Tensor* input, Tensor* in_min, Tensor* in_max, - Tensor* output, Tensor* out_min, Tensor* out_max) { +void Relu(S_TENSOR input, S_TENSOR in_min, S_TENSOR in_max, + S_TENSOR output, S_TENSOR out_min, S_TENSOR out_max) { const float input_min = in_min->read(0, 0)[0]; const float input_max = in_max->read(0, 0)[0]; const TIn* in = input->read(0, 0); @@ -26,4 +26,16 @@ void Relu(Tensor* input, Tensor* in_min, Tensor* in_max, T2* v_out_max = out_max->write(0, 0); *v_out_max = input_max; } + +template +class ReluOp : public Operator { + public: + ReluOp() { + n_inputs = 3; + n_outputs = 3; + } + virtual void compute() override { + Relu(inputs[0], inputs[1], inputs[2], outputs[0], outputs[1], outputs[2]); + } +}; #endif // UTENSOR_NN_OPS diff --git a/NnTests.hpp b/NnTests.hpp index d3ee3833..ad867a77 100644 --- a/NnTests.hpp +++ b/NnTests.hpp @@ -6,51 +6,54 @@ #include "test.hpp" class NnOpsTest : public Test { + Context ctx; + TensorIdxImporter t_import; + public: void reluTest(void) { testStart("quantized_relu"); - TensorIdxImporter t_import; - // reference inputs - Tensor* a = - t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx"); - Tensor* min = - t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx"); - Tensor* max = - t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx"); + TENSOR a = + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx")); + TENSOR min = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx")); + TENSOR max = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx")); // reference outputs - Tensor* ref_out = - t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx"); - Tensor* ref_min = - t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx"); - Tensor* ref_max = - t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx"); + TENSOR ref_out = + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx")); + TENSOR ref_min = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx")); + TENSOR ref_max = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx")); // modify the checks below: - Tensor* out = new RamTensor(ref_out->getShape()); - Tensor* out_min = new RamTensor(ref_min->getShape()); - Tensor* out_max = new RamTensor(ref_max->getShape()); + TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + TENSOR out_min = ctx.add(new RamTensor(ref_min.lock()->getShape())); + TENSOR out_max = ctx.add(new RamTensor(ref_max.lock()->getShape())); + + //lock on to required output tensors + S_TENSOR ref_out_s = ref_out.lock(); + S_TENSOR ref_min_s = ref_min.lock(); + S_TENSOR ref_max_s = ref_max.lock(); + S_TENSOR out_s = out.lock(); + S_TENSOR out_min_s = out_min.lock(); + S_TENSOR out_max_s = out_max.lock(); + + TList inputs = {a, min, max}; + TList outputs = {out, out_min, out_max}; timer_start(); - Relu(a, min, max, out, out_min, - out_max); + ctx.push(new ReluOp(), inputs, outputs); + ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_out, out) + - meanPercentErr(ref_min, out_min) + - meanPercentErr(ref_max, out_max); + double result = meanPercentErr(ref_out_s.get(), out_s.get()) + + meanPercentErr(ref_min_s.get(), out_min_s.get()) + + meanPercentErr(ref_max_s.get(), out_max_s.get()); // passed(result < 0.0001); passed(result == 0); - delete a; - delete min; - delete max; - delete ref_out; - delete ref_min; - delete ref_max; - delete out; - delete out_min; - delete out_max; } void runAll(void) { reluTest(); } diff --git a/main.cpp b/main.cpp index 9d991c0e..0c8acb37 100644 --- a/main.cpp +++ b/main.cpp @@ -10,6 +10,7 @@ #include "context_test.hpp" #include "MathTests.hpp" #include "tensor_test.hpp" +#include "NnTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -54,12 +55,13 @@ int main(int argc, char** argv) { ctxTest.runAll(); printf("Context result...\r\n"); ctxTest.printSummary(); - /* printf("NnOpS: \r\n"); + + printf("NnOpS: \r\n"); NnOpsTest nnTest; nnTest.runAll(); printf("Nn Ops result...\r\n"); nnTest.printSummary(); - */ + printf("Transformation test: \r\n"); From e12c231b7f8ca9d456ba33a091dfc8510d582dc3 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 12 Nov 2017 15:55:15 +0900 Subject: [PATCH 53/80] matrix test passing, moved from context tests --- MatrixOps.hpp | 5 ++- MatrixTests.hpp | 85 +++++++++++++++++++++------------------ context_test.hpp | 102 +++++++++++++++++++++++------------------------ main.cpp | 11 ++--- 4 files changed, 106 insertions(+), 97 deletions(-) diff --git a/MatrixOps.hpp b/MatrixOps.hpp index 7b07d972..3b8016ad 100644 --- a/MatrixOps.hpp +++ b/MatrixOps.hpp @@ -217,14 +217,15 @@ void QuantizedMatMul2(S_TENSOR A, S_TENSOR B, S_TENSOR C, *c_max = max_c_value; } -class QntMatMulOp : public Operator{ +template +class QntMatMulOp : public Operator { public: QntMatMulOp() { n_inputs = 6; n_outputs = 3; } virtual void compute() override { - QuantizedMatMul2(inputs[0], inputs[3], + QuantizedMatMul2(inputs[0], inputs[3], outputs[0], inputs[1], inputs[4], inputs[2], inputs[5], outputs[1], outputs[2]); } diff --git a/MatrixTests.hpp b/MatrixTests.hpp index 7ef7385d..a0bf8d22 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -6,57 +6,64 @@ #include "tensorIdxImporter.hpp" class matrixOpsTest : public Test { + TensorIdxImporter t_import; + Context ctx; + public: void qMatMul(void) { testStart("Quantized Matrix Mul"); - TensorIdxImporter t_import; - - // reference inputs - Tensor* a = - t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx"); - Tensor* a_min = - t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx"); - Tensor* a_max = - t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx"); - Tensor* b = - t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx"); - Tensor* b_min = - t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx"); - Tensor* b_max = - t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx"); + //inputs + TENSOR a = + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); + TENSOR a_min = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); + TENSOR a_max = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); + TENSOR b = + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); + TENSOR b_min = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); + TENSOR b_max = + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); // reference outputs - Tensor* c = - t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx"); - Tensor* c_min = - t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx"); - Tensor* c_max = - t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx"); + TENSOR c = + ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); + TENSOR c_min = + ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); + TENSOR c_max = + ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - // actual implementation, uses ReferenceGemm() - // See gen_math_op.py:1619 - // See quantized_matmul_ops.cc:171, 178 - // Sub-functions: QuantizationRangeForMultiplication, - // QuantizationRangeForMultiplication, FloatForOneQuantizedLevel - Tensor* out_c = new RamTensor(c->getShape()); - Tensor* out_min = new RamTensor(c_min->getShape()); - Tensor* out_max = new RamTensor(c_max->getShape()); + //we need default constructor here + //so we can get ride of the shapes here + TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); + TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); + TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); + + TList inputs = {a, a_min, a_max, b, b_min, b_max}; + TList outputs = {out_c, out_min, out_max}; + + //if you want tensors to be alive after .eval() + //copies of the share_pointer needs to be here + S_TENSOR ref_c_rptr = c.lock(); + S_TENSOR ref_min_rptr = c_min.lock(); + S_TENSOR ref_max_rptr = c_max.lock(); + S_TENSOR out_c_rptr = out_c.lock(); + S_TENSOR out_min_rptr = out_min.lock(); + S_TENSOR out_max_rptr = out_max.lock(); + + timer_start(); - QuantizedMatMul(a, b, &out_c, a_min, b_min, a_max, - b_max, out_min, out_max); - timer_stop(); - // - // transpose_a=None, transpose_b=None + ctx.push(new QntMatMulOp(), inputs, outputs); + ctx.eval(); + timer_stop(); - // modify the checks below: + double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + + meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); - double result = meanPercentErr(c, out_c) + meanPercentErr(c_min, out_min) + - meanPercentErr(c_max, out_max); - // passed(result < 0.0001); passed(result == 0); - } void runAll(void) { qMatMul(); } diff --git a/context_test.hpp b/context_test.hpp index 36562eed..5d1ff4c2 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -19,60 +19,60 @@ class contextTest : public Test { public: - void MatMalTest(void) { - testStart("Context QntMatMal Op"); - //inputs - TENSOR a = - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); - TENSOR b = - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); - TENSOR b_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); - TENSOR b_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); - - // reference outputs - TENSOR c = - ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); - TENSOR c_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); - TENSOR c_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - - - //we need default constructor here - //so we can get ride of the shapes here - TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); - TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); - - TList inputs = {a, a_min, a_max, b, b_min, b_max}; - TList outputs = {out_c, out_min, out_max}; - - //if you want tensors to be alive after .eval() - //copies of the share_pointer needs to be here - S_TENSOR ref_c_rptr = c.lock(); - S_TENSOR ref_min_rptr = c_min.lock(); - S_TENSOR ref_max_rptr = c_max.lock(); - S_TENSOR out_c_rptr = out_c.lock(); - S_TENSOR out_min_rptr = out_min.lock(); - S_TENSOR out_max_rptr = out_max.lock(); +// void MatMalTest(void) { +// testStart("Context QntMatMal Op"); +// //inputs +// TENSOR a = +// ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); +// TENSOR a_min = +// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); +// TENSOR a_max = +// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); +// TENSOR b = +// ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); +// TENSOR b_min = +// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); +// TENSOR b_max = +// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); + +// // reference outputs +// TENSOR c = +// ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); +// TENSOR c_min = +// ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); +// TENSOR c_max = +// ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); + + +// //we need default constructor here +// //so we can get ride of the shapes here +// TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); +// TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); +// TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); + +// TList inputs = {a, a_min, a_max, b, b_min, b_max}; +// TList outputs = {out_c, out_min, out_max}; + +// //if you want tensors to be alive after .eval() +// //copies of the share_pointer needs to be here +// S_TENSOR ref_c_rptr = c.lock(); +// S_TENSOR ref_min_rptr = c_min.lock(); +// S_TENSOR ref_max_rptr = c_max.lock(); +// S_TENSOR out_c_rptr = out_c.lock(); +// S_TENSOR out_min_rptr = out_min.lock(); +// S_TENSOR out_max_rptr = out_max.lock(); - timer_start(); - ctx.push(new QntMatMulOp(), inputs, outputs); - ctx.eval(); - timer_stop(); +// timer_start(); +// ctx.push(new QntMatMulOp(), inputs, outputs); +// ctx.eval(); +// timer_stop(); - double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + - meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); +// double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + +// meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); - passed(result == 0); - } +// passed(result == 0); +// } void RefCountTest(void) { testStart("Context Ref Count"); @@ -117,7 +117,7 @@ class contextTest : public Test { void runAll(void) { - MatMalTest(); + // MatMalTest(); RefCountTest(); } }; diff --git a/main.cpp b/main.cpp index 0c8acb37..44b9336b 100644 --- a/main.cpp +++ b/main.cpp @@ -11,6 +11,7 @@ #include "MathTests.hpp" #include "tensor_test.hpp" #include "NnTests.hpp" +#include "MatrixTests.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -31,13 +32,8 @@ int main(int argc, char** argv) { idxTest.runAll(); -/* printf("running matrix test ...\r\n"); - matrixOpsTest matrixTests; - matrixTests.runAll();*/ printf("IDX import:\r\n"); idxTest.printSummary(); - printf("Matrix: \r\n"); -// matrixTests.printSummary(); ArrayOpsTest arrayTests; arrayTests.runAll(); @@ -50,6 +46,11 @@ int main(int argc, char** argv) { printf("Math result...\r\n"); mathTests.printSummary(); + printf("running matrix test ...\r\n"); + matrixOpsTest matrixTests; + matrixTests.runAll(); + matrixTests.printSummary(); + printf("Context test: \r\n"); contextTest ctxTest; ctxTest.runAll(); From c9d219e97b66d00dea6652d8442cd85e0b0523d4 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 12 Nov 2017 16:01:14 +0900 Subject: [PATCH 54/80] enable tensor tests as it is not dependent on Context --- main.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/main.cpp b/main.cpp index 44b9336b..e33cc724 100644 --- a/main.cpp +++ b/main.cpp @@ -71,6 +71,12 @@ int main(int argc, char** argv) { printf("Transformation result...\r\n"); tTest.printSummary(); + printf("tesnor test: \r\n"); + tensorTest tenT; + tenT.runAll(); + printf("tensor result: \r\n"); + tenT.printSummary(); + /* // printf("mlp test: \r\n"); @@ -80,11 +86,7 @@ int main(int argc, char** argv) { // mlpt.printSummary(); - printf("tesnor test: \r\n"); - tensorTest tenT; - tenT.runAll(); - printf("tensor result: \r\n"); - tenT.printSummary();*/ +*/ //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) From d05aa3d01bcab6f8b461c210d340305cf91df4b2 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 12 Nov 2017 16:19:52 +0900 Subject: [PATCH 55/80] context.add() now support initializer_list --- MatrixTests.hpp | 10 +++++++--- context.hpp | 17 +++++++++++++++++ main.cpp | 2 -- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/MatrixTests.hpp b/MatrixTests.hpp index a0bf8d22..d72815a9 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -42,8 +42,8 @@ class matrixOpsTest : public Test { TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); - TList inputs = {a, a_min, a_max, b, b_min, b_max}; - TList outputs = {out_c, out_min, out_max}; + //TList inputs = {a, a_min, a_max, b, b_min, b_max}; + //TList outputs = {out_c, out_min, out_max}; //if you want tensors to be alive after .eval() //copies of the share_pointer needs to be here @@ -56,7 +56,11 @@ class matrixOpsTest : public Test { timer_start(); - ctx.push(new QntMatMulOp(), inputs, outputs); + //ctx.push(new QntMatMulOp(), inputs, outputs); + ctx.push(new QntMatMulOp(), + {a, a_min, a_max, b, b_min, b_max}, + {out_c, out_min, out_max}); + ctx.eval(); timer_stop(); diff --git a/context.hpp b/context.hpp index 50548928..dabf2258 100644 --- a/context.hpp +++ b/context.hpp @@ -3,6 +3,7 @@ #include #include +#include #include "uTensorBase.hpp" #include "stdio.h" //#include @@ -44,6 +45,7 @@ class Context : public uTensor { public: TENSOR add(Tensor* t, uint8_t init_count = 0); void push(Operator *op, TList &_inputs, TList &_outputs); + void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); int eval(void); Context() { @@ -83,6 +85,21 @@ void Context::push(Operator *op, TList &_inputs, TList &_outputs) { } +void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { + TList inputs; + TList outputs; + + for(auto i:_inputs) { + inputs.push_back(i); + } + + for(auto o:_outputs) { + outputs.push_back(o); + } + + push(op, inputs, outputs); +} + void Context::incrTListRef(const TList &t_list) { for(auto t:t_list) { Tensor* ptr = t.lock().get(); diff --git a/main.cpp b/main.cpp index e33cc724..fd18954b 100644 --- a/main.cpp +++ b/main.cpp @@ -63,8 +63,6 @@ int main(int argc, char** argv) { printf("Nn Ops result...\r\n"); nnTest.printSummary(); - - printf("Transformation test: \r\n"); transTest tTest; tTest.runAll(); From 115eb55394e7b70df3cf8b0b8555c1f1a0fa3527 Mon Sep 17 00:00:00 2001 From: kazami Date: Mon, 13 Nov 2017 16:15:03 +0800 Subject: [PATCH 56/80] fix tensor constructor bug 1. when call tensor constructor, it just create empty shared_ptr 2. init function is responsible for malloc --- tensor.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index 713bfe8e..c22f2c19 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -75,8 +75,6 @@ class Tensor : public uTensor { } template void init(std::vector& v) { - s = std::make_shared(); - s->total_size = 0; for (auto i : v) { s->shape.push_back(i); @@ -151,6 +149,7 @@ template class RamTensor : public Tensor { // need deep copy public: + RamTensor() : Tensor() {} RamTensor(std::initializer_list l) : Tensor() { std::vector v; From 1c0392d0e50284f626091d9f0d70b0d768decdc0 Mon Sep 17 00:00:00 2001 From: kazami Date: Mon, 13 Nov 2017 16:16:10 +0800 Subject: [PATCH 57/80] 1. fix the name of DequantizeOp 2. add resize of output ptr in QuantizeV2 --- ArrayOps.hpp | 7 +++++-- ArrayTests.hpp | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index a67de0d9..aca7d3ff 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -22,6 +22,9 @@ void QuantizeV2(S_TENSOR input, S_TENSOR _min_range, S_TENSOR _max_range, float min_range = std::min(0.0f, input_min_range); const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range), fabsf(input_max_range))) / 100.0f; + if(output && output->getSize() == 0) { + output->resize(input->getShape()); + } float max_range = std::max(input_max_range, min_range + epsilon); max_range = std::max(0.0f, max_range); @@ -86,9 +89,9 @@ void dequantize(S_TENSOR input, S_TENSOR min_range, S_TENSOR max_range, S_TENSOR val * q2f.range_scale); } } -class Dequantize2Op : public Operator { +class DequantizeOp : public Operator { public: - Dequantize2Op() { + DequantizeOp() { n_inputs = 3; n_outputs = 1; } diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 7a2a1d5a..c5cbcb18 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -73,7 +73,7 @@ class ArrayOpsTest : public Test { S_TENSOR ref_out = out_ref.lock(); timer_start(); - ctx.push(new Dequantize2Op(), inputs, outputs); + ctx.push(new DequantizeOp(), inputs, outputs); ctx.eval(); timer_stop(); From 1e23677dffe4cba60445d00e7a3a7e7e4d746ca7 Mon Sep 17 00:00:00 2001 From: kazami Date: Mon, 13 Nov 2017 16:20:33 +0800 Subject: [PATCH 58/80] 1. add resize for output ptr in Relu 2. modify to pass mlp test --- NnOps.hpp | 3 + main.cpp | 15 +-- mlp_test.hpp | 324 +++++++++++++++++++++++++-------------------------- 3 files changed, 172 insertions(+), 170 deletions(-) diff --git a/NnOps.hpp b/NnOps.hpp index a8f1f456..985b9b5f 100644 --- a/NnOps.hpp +++ b/NnOps.hpp @@ -13,6 +13,9 @@ void Relu(S_TENSOR input, S_TENSOR in_min, S_TENSOR in_max, const TOut min_as_quantized = FloatToQuantized(0.0f, input_min, input_max); + if (output && output->getSize() == 0) { + output->resize(input->getShape()); + } TOut* out = output->write(0, 0); for (uint32_t i = 0; i < output->getSize(); i++) { if (in[i] > min_as_quantized) { diff --git a/main.cpp b/main.cpp index fd18954b..e0413ac4 100644 --- a/main.cpp +++ b/main.cpp @@ -12,6 +12,7 @@ #include "tensor_test.hpp" #include "NnTests.hpp" #include "MatrixTests.hpp" +#include "mlp_test.hpp" //#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); @@ -76,15 +77,15 @@ int main(int argc, char** argv) { tenT.printSummary(); -/* - // printf("mlp test: \r\n"); - // mlpTest mlpt; - // mlpt.runAll(); - // printf("mlp result...\r\n"); - // mlpt.printSummary(); + + printf("mlp test: \r\n"); + mlpTest mlpt; + mlpt.runAll(); + printf("mlp result...\r\n"); + mlpt.printSummary(); + -*/ //In [24]: tf.get_default_graph().get_tensor_by_name("import/y_pred:0").eval(feed_dict={x: mnist.test.images[0:1]}) //Out[24]: array([7]) diff --git a/mlp_test.hpp b/mlp_test.hpp index 3c9c696b..be390b9b 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -5,10 +5,14 @@ #include "ArrayOps.hpp" #include "MathOps.hpp" #include "MatrixOps.hpp" +#include "uTensorBase.hpp" +#include "context.hpp" +#include "test.hpp" class mlpTest : public Test { public: TensorIdxImporter t_import; + Context ctx; void runQuantization() { @@ -17,58 +21,56 @@ class mlpTest : public Test { //reshape //input - Tensor* mnist_input = t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx"); - Tensor* reshape_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx"); + TENSOR mnist_input = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx")); + TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx")); //output - Tensor* reshape_out = nullptr; - reshape(mnist_input, reshape_dim, &reshape_out); - delete mnist_input; - delete reshape_dim; + TENSOR reshape_out = ctx.add(new RamTensor()); +// S_TENSOR out_reshape_out = reshape_out.lock(); + ctx.push(new ReshapeOp(), {mnist_input, reshape_dim}, {reshape_out}); //min //input - Tensor* min_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx"); + TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx")); //output - Tensor* min_out = new RamTensor({1}); - Min(reshape_out, min_reduce_dim, min_out); - delete min_reduce_dim; + TENSOR min_out = ctx.add(new RamTensor({1})); + // S_TENSOR out_min_out = min_out.lock(); + ctx.push(new MinOp(), {reshape_out, min_reduce_dim}, {min_out}); //max //input - Tensor* max_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx"); + TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx")); //output - Tensor* max_out = new RamTensor({1}); - Max(reshape_out, max_reduce_dim, max_out); - delete max_reduce_dim; + TENSOR max_out = ctx.add(new RamTensor({1})); + // S_TENSOR out_max_out = max_out.lock(); + ctx.push(new MaxOp(), {reshape_out, max_reduce_dim}, {max_out}); //quantization //output - Tensor* qnt_out = new RamTensor(reshape_out->getShape()); - Tensor* qnt_min = new RamTensor({1}); - Tensor* qnt_max = new RamTensor({1}); - QuantizeV2(reshape_out, min_out, max_out, qnt_out, qnt_min, qnt_max); - delete reshape_out; + TENSOR qnt_out = ctx.add(new RamTensor()); + TENSOR qnt_min = ctx.add(new RamTensor({1})); + TENSOR qnt_max = ctx.add(new RamTensor({1})); - timer_stop(); + S_TENSOR out_qnt = qnt_out.lock(); + S_TENSOR out_min = qnt_min.lock(); + S_TENSOR out_max = qnt_max.lock(); + + TENSOR qnt_ref = ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx")); + TENSOR qnt_min_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx")); + TENSOR qnt_max_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx")); + S_TENSOR ref_qnt = qnt_ref.lock(); + S_TENSOR ref_max = qnt_max_ref.lock(); + S_TENSOR ref_min = qnt_min_ref.lock(); - Tensor* qnt_ref = t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx"); - Tensor* qnt_min_ref = t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx"); - Tensor* qnt_max_ref = t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx"); + ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {qnt_out, qnt_min, qnt_max}); + ctx.eval(); - double result = meanPercentErr(qnt_ref, qnt_out); - result += meanPercentErr(qnt_min_ref, qnt_min); - result += meanPercentErr(qnt_max_ref, qnt_max); + timer_stop(); + double result = meanPercentErr(ref_qnt.get(), out_qnt.get()); + result += meanPercentErr(ref_min.get(), out_min.get()); + result += meanPercentErr(ref_max.get(), out_max.get()); passed(result == 0); - delete qnt_ref; - delete qnt_min_ref; - delete qnt_max_ref; - delete qnt_out; - delete qnt_min; - delete qnt_max; - delete max_out; - delete min_out; } //quantized matmul dequant add @@ -80,52 +82,53 @@ class mlpTest : public Test { //quantized matrix multiplication //input - Tensor* x = - t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx"); - Tensor* x_min = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx"); - Tensor* x_max = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx"); - Tensor* w = - t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx"); - Tensor* w_min = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx"); - Tensor* w_max = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx"); + TENSOR x = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx")); + TENSOR x_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx")); + TENSOR x_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx")); + TENSOR w = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx")); + TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx")); + TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx")); DEBUG("all QuantizedMatMul input imported...\r\n"); //output - uint32_t out_col = (x->getShape())[0]; - uint32_t out_row = (w->getShape())[1]; - Tensor* out_c = new RamTensor({out_col, out_row}); + uint32_t out_col = (x.lock()->getShape())[0]; + uint32_t out_row = (w.lock()->getShape())[1]; + TENSOR out_c = ctx.add(new RamTensor({out_col, out_row})); // printf("x[0] = %d, x[1] = %d, b[0] = %d, b[1] = %d\r\n", (x.getShape())[0], (x.getShape())[1], // (w.getShape())[0], (w.getShape())[1]); // printf("c[0] = %d, c[1] = %d\r\n", (out_c.getShape())[0], (out_c.getShape())[1]); // fflush(stdout); - Tensor* matmul_out_min = new RamTensor({1}); - Tensor* matmul_out_max = new RamTensor({1}); + TENSOR matmul_out_min = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max = ctx.add(new RamTensor({1})); - QuantizedMatMul(x, w, &out_c, x_min, w_min, x_max, - w_max, matmul_out_min, matmul_out_max); + TList inputs = {x, x_min, x_max, w, w_min, w_max}; + TList outputs = {out_c, matmul_out_min, matmul_out_max}; + S_TENSOR out_val = out_c.lock(); + S_TENSOR out_min = matmul_out_min.lock(); + S_TENSOR out_max = matmul_out_max.lock(); + ctx.push(new QntMatMulOp(), inputs, outputs); //clean up - delete x; - delete w; - delete x_min; - delete w_min; - delete x_max; - delete w_max; - - Tensor* ref_out_c = - t_import.int_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_0.idx"); - Tensor* ref_matmul_out_min = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_1.idx"); - Tensor* ref_matmul_out_max = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_2.idx"); - - double temp_result = (meanPercentErr(ref_out_c, out_c) + meanPercentErr(ref_matmul_out_min, matmul_out_min) + meanPercentErr(ref_matmul_out_max, matmul_out_max)); + + TENSOR ref_out_c = + ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_0.idx")); + TENSOR ref_matmul_out_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_1.idx")); + TENSOR ref_matmul_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_2.idx")); + + S_TENSOR ref_val = ref_out_c.lock(); + S_TENSOR ref_min = ref_matmul_out_min.lock(); + S_TENSOR ref_max = ref_matmul_out_max.lock(); + /* double temp_result = (meanPercentErr(ref_val.get(), out_val.get()) + meanPercentErr(ref_min.get(), out_min.get()) + meanPercentErr(ref_max.get(), out_max.get())); if(temp_result > 0) { DEBUG("matrix mul failed\r\n"); failed(); @@ -133,23 +136,24 @@ class mlpTest : public Test { } else { DEBUG("matrix mul passed\r\n"); } - delete ref_out_c; - delete ref_matmul_out_max; - delete ref_matmul_out_min; - +*/ DEBUG("QuantizedMatMul completed!\r\n"); //output - Tensor* req_out_min = new RamTensor({1}); - Tensor* req_out_max = new RamTensor({1}); - Requantization_Range(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max); - - Tensor* ref_req_out_min = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_0.idx"); - Tensor* ref_req_out_max = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_1.idx"); - - temp_result = (meanPercentErr(ref_req_out_min, req_out_min) + meanPercentErr(ref_req_out_max, req_out_max)); + TENSOR req_out_min = ctx.add(new RamTensor({1})); + TENSOR req_out_max = ctx.add(new RamTensor({1})); + S_TENSOR out_req_min = req_out_min.lock(); + S_TENSOR out_req_max = req_out_max.lock(); + ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); + + TENSOR ref_req_out_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_0.idx")); + TENSOR ref_req_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_1.idx")); + S_TENSOR ref_req_min = ref_req_out_min.lock(); + S_TENSOR ref_req_max = ref_req_out_max.lock(); +/* + temp_result = (meanPercentErr(ref_req_min.get(), out_req_min.get()) + meanPercentErr(ref_req_max.get(), out_req_max.get())); if(temp_result > 0) { DEBUG("Requantization_Range failed\r\n"); failed(); @@ -157,31 +161,30 @@ class mlpTest : public Test { } else { DEBUG("Requantization_Range passed\r\n"); } - delete ref_req_out_min; - delete ref_req_out_max; - DEBUG("Requantization_Range completed!\r\n"); + DEBUG("Requantization_Range completed!\r\n");*/ //output - Tensor* reqnt_out = new RamTensor(out_c->getShape()); - Tensor* reqnt_out_min = new RamTensor({1}); - Tensor* reqnt_out_max = new RamTensor({1}); - Requantize(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max, - reqnt_out, reqnt_out_min, reqnt_out_max); + TENSOR reqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); + TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); + S_TENSOR out_reqnt = reqnt_out.lock(); + S_TENSOR out_reqnt_min = reqnt_out_min.lock(); + S_TENSOR out_reqnt_max = reqnt_out_max.lock(); + ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); //clean up - delete matmul_out_min; - delete matmul_out_max; - delete req_out_min; - delete req_out_max; - - Tensor* ref_reqnt_out = - t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx"); - Tensor* ref_reqnt_out_min = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx"); - Tensor* ref_reqnt_out_max = - t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx"); - - temp_result = (meanPercentErr(ref_reqnt_out, reqnt_out) + meanPercentErr(ref_reqnt_out_min, reqnt_out_min) + meanPercentErr(ref_reqnt_out_max, reqnt_out_max)); + + TENSOR ref_reqnt_out = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx")); + TENSOR ref_reqnt_out_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx")); + TENSOR ref_reqnt_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx")); + S_TENSOR ref_reqnt = ref_reqnt_out.lock(); + S_TENSOR ref_reqnt_min = ref_reqnt_out_min.lock(); + S_TENSOR ref_reqnt_max = ref_reqnt_out_max.lock(); +/* + temp_result = (meanPercentErr(ref_reqnt.get(), out_reqnt.get()) + meanPercentErr(ref_reqnt_min.get(), out_reqnt_min.get()) + meanPercentErr(ref_reqnt_max.get(), out_reqnt_max.get())); if(temp_result > 0) { DEBUG("Requantize failed\r\n"); failed(); @@ -189,27 +192,22 @@ class mlpTest : public Test { } else { DEBUG("Requantize passed\r\n"); } - delete ref_reqnt_out; - delete ref_reqnt_out_min; - delete ref_reqnt_out_max; - DEBUG("Requantize completed!\r\n"); + DEBUG("Requantize completed!\r\n");*/ //output - Tensor* deqnt_out = new RamTensor(out_c->getShape()); - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, &deqnt_out); - delete out_c; - delete reqnt_out_min; - delete reqnt_out_max; - delete reqnt_out; - - Tensor* ref_deqnt_out = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx"); - double temp = meanPercentErr(ref_deqnt_out, deqnt_out); + TENSOR deqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); + S_TENSOR out_deqnt = deqnt_out.lock(); + ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + + TENSOR ref_deqnt_out = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx")); + S_TENSOR ref_deqnt = ref_deqnt_out.lock(); + /*double temp = meanPercentErr(ref_deqnt.get(), out_deqnt.get()); if(temp > 0.0001) { printf("dequantize failed (%.6f)\r\n", temp); - const float* ref_ptr = ref_deqnt_out->read(0, 0); - const float* test_ptr = deqnt_out->read(0, 0); - for(uint32_t i; i < ref_deqnt_out->getSize(); i++) { + const float* ref_ptr = ref_deqnt.get()->read(0, 0); + const float* test_ptr = out_deqnt.get()->read(0, 0); + for(uint32_t i; i < ref_deqnt->getSize(); i++) { if(ref_ptr[i] != test_ptr[i]) { DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); } else { @@ -220,32 +218,29 @@ class mlpTest : public Test { return; } else { DEBUG("dequantize passed\r\n"); - } - delete ref_deqnt_out; + }*/ DEBUG("dequantize completed!\r\n"); //input - Tensor* bias = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx"); + TENSOR bias = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx")); //output - Tensor* output_z = new RamTensor(deqnt_out->getShape()); - Add(deqnt_out, bias, &output_z); - delete deqnt_out; + TENSOR output_z = ctx.add(new RamTensor(deqnt_out.lock()->getShape())); + S_TENSOR out_z = output_z.lock(); + ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); + ctx.eval(); DEBUG("Add completed!\r\n"); timer_stop(); //load reference - Tensor* ref_z = t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx"); + TENSOR ref_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx")); + S_TENSOR ref_z_v = ref_z.lock(); - double result = meanPercentErr(ref_z, output_z); - std::cout << result << std::endl; + double result = meanPercentErr(ref_z_v.get(), out_z.get()); passed(result < 0.0001); - delete ref_z; - delete output_z; - delete bias; } @@ -253,56 +248,59 @@ class mlpTest : public Test { testStart("runQntRelu"); - Tensor* input_z = t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx"); - Tensor* reshape_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx"); - Tensor* reshape_out = nullptr; + TENSOR input_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx")); + TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx")); + TENSOR reshape_out = ctx.add(new RamTensor()); timer_start(); - reshape(input_z, reshape_dim, &reshape_out); + ctx.push(new ReshapeOp(), {input_z, reshape_dim}, {reshape_out}); //min //input - Tensor* min_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx"); + TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx")); //output - Tensor* min_out = new RamTensor({1}); - Min(reshape_out, min_reduce_dim, min_out); - delete min_reduce_dim; + TENSOR min_out = ctx.add(new RamTensor({1})); + ctx.push(new MinOp(), {reshape_out, min_reduce_dim}, {min_out}); //max //input - Tensor* max_reduce_dim = t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx"); + TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx")); //output - Tensor* max_out = new RamTensor({1}); - Max(reshape_out, max_reduce_dim, max_out); - delete max_reduce_dim; + TENSOR max_out = ctx.add(new RamTensor({1})); + ctx.push(new MaxOp(), {reshape_out, max_reduce_dim}, {max_out}); //quantization //output - Tensor* qnt_out = new RamTensor(reshape_out->getShape()); - Tensor* qnt_min = new RamTensor({1}); - Tensor* qnt_max = new RamTensor({1}); - QuantizeV2(reshape_out, min_out, max_out, qnt_out, qnt_min, qnt_max); - delete reshape_out; + TENSOR qnt_out = ctx.add(new RamTensor()); + TENSOR qnt_min = ctx.add(new RamTensor({1})); + TENSOR qnt_max = ctx.add(new RamTensor({1})); + ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {qnt_out, qnt_min, qnt_max}); - Tensor* out = new RamTensor(qnt_out->getShape()); - Tensor* out_min = new RamTensor({1}); - Tensor* out_max = new RamTensor({1}); - Relu(qnt_out, qnt_min, qnt_max, out, out_min, - out_max); + TENSOR out = ctx.add(new RamTensor()); + TENSOR out_min = ctx.add(new RamTensor({1})); + TENSOR out_max = ctx.add(new RamTensor({1})); - timer_stop(); + S_TENSOR out_val = out.lock(); + S_TENSOR out_min_val = out_min.lock(); + S_TENSOR out_max_val = out_max.lock(); + ctx.push(new ReluOp(), {qnt_out, qnt_min, qnt_max}, {out, out_min, out_max}); + ctx.eval(); - Tensor* ref_out = - t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx"); - Tensor* ref_out_min = - t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx"); - Tensor* ref_out_max = - t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx"); + timer_stop(); - double result = meanPercentErr(ref_out, out); - result += meanPercentErr(ref_out_min, out_min); - result += meanPercentErr(ref_out_max, out_max); + TENSOR ref_out = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx")); + TENSOR ref_out_min = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx")); + TENSOR ref_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx")); + + S_TENSOR ref_val = ref_out.lock(); + S_TENSOR ref_min_val = ref_out_min.lock(); + S_TENSOR ref_max_val = ref_out_max.lock(); + double result = meanPercentErr(ref_val.get(), out_val.get()); + result += meanPercentErr(ref_min_val.get(), out_min_val.get()); + result += meanPercentErr(ref_max_val.get(), out_max_val.get()); passed(result == 0); From e3316f36cea169fca4327d1ddc67172f8de98430 Mon Sep 17 00:00:00 2001 From: kazami Date: Tue, 14 Nov 2017 02:24:59 +0800 Subject: [PATCH 59/80] 1. for bug test --- deep_mnist_mlp.hpp | 294 ++++++++++++++++++++------------------------- main.cpp | 6 +- 2 files changed, 132 insertions(+), 168 deletions(-) diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index af30ac02..6e29d145 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -7,153 +7,109 @@ #include "NnOps.hpp" #include "ArrayOps.hpp" #include "uTensor_util.hpp" +#include "uTensorBase.hpp" +#include "context.hpp" -template -void tensorQuantize(Tensor* input, Tensor** output, - Tensor** out_min, Tensor** out_max) { +void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, + TENSOR out_min, TENSOR out_max) { //reshape - Tensor* reshape_shape = new RamTensor({1}); - Tensor* reduce_dim = new RamTensor({1}); - Shape input_shape = input->getShape(); - Tensor* reshape_out = nullptr; + TENSOR reduce_dim = ctx.add(new RamTensor({1})); + TENSOR reshape_out = ctx.add(new RamTensor()); + TENSOR reshape_shape = ctx.add(new RamTensor({2})); - *(reshape_shape->write(0, 0)) = -1; - *(reduce_dim->write(0, 0)) = 0; + reshape_shape.lock()->write(0, 0)[1] = -1; + reshape_shape.lock()->write(0, 0)[0] = input.lock()->getSize(); + *(reduce_dim.lock()->write(0, 0)) = 0; + ctx.push(new ReshapeOp(), {input, reshape_shape}, {reshape_out}); - reshape(input, reshape_shape, &reshape_out); //Min and Max of (reshaped) input - Tensor* min_out = new RamTensor({1}); - Tensor* max_out = new RamTensor({1}); - Min(reshape_out, reduce_dim, min_out); - Max(reshape_out, reduce_dim, max_out); - - tensorChkAlloc(output, input->getShape()); - delete input; - Shape shape_one; - shape_one.push_back(1); - tensorChkAlloc(out_min, shape_one); - tensorChkAlloc(out_max, shape_one); - - QuantizeV2(reshape_out, min_out, max_out, *output, *out_min, *out_max); + TENSOR min_out = ctx.add(new RamTensor({1})); + TENSOR max_out = ctx.add(new RamTensor({1})); + ctx.push(new MinOp(), {reshape_out, reduce_dim}, {min_out}); + ctx.push(new MaxOp(), {reshape_out, reduce_dim}, {max_out}); + + output.lock()->resize(input.lock()->getShape()); + + ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {output, out_min, out_max}); } -template -void ReluLayer(Tensor* x, Tensor* x_min, Tensor* x_max, - Tensor* w, Tensor* w_min, Tensor* w_max, Tensor* b, - Tensor** output, Tensor** output_min, Tensor** output_max) { +void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, + TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, + TENSOR output, TENSOR output_min, TENSOR output_max) { //quantized matmul - Tensor* out_c = nullptr; - Tensor* matmul_out_min = new RamTensor({1}); - Tensor* matmul_out_max = new RamTensor({1}); - QuantizedMatMul(x, w, &out_c, x_min, w_min, x_max, - w_max, matmul_out_min, matmul_out_max); - //clean up - delete x; - delete w; - delete x_min; - delete w_min; - delete x_max; - delete w_max; + TENSOR out_c = ctx.add(new RamTensor()); + + TENSOR matmul_out_min = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + + ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); //Requantization_Range - Tensor* req_out_min = new RamTensor({1}); - Tensor* req_out_max = new RamTensor({1}); - Requantization_Range(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max); + TENSOR req_out_min = ctx.add(new RamTensor({1})); + TENSOR req_out_max = ctx.add(new RamTensor({1})); + ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); //Requantize - Tensor* reqnt_out = new RamTensor(out_c->getShape()); - Tensor* reqnt_out_min = new RamTensor({1}); - Tensor* reqnt_out_max = new RamTensor({1}); - Requantize(out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max, - reqnt_out, reqnt_out_min, reqnt_out_max); + TENSOR reqnt_out = ctx.add(new RamTensor()); + TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); + ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); - Shape out_shape = out_c->getShape(); + Shape out_shape = out_c.lock()->getShape(); //clean up - delete out_c; - delete matmul_out_min; - delete matmul_out_max; - delete req_out_min; - delete req_out_max; - - Tensor* deqnt_out = nullptr; - tensorChkAlloc(&deqnt_out, reqnt_out->getShape()); - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, &deqnt_out); - delete reqnt_out; - - Tensor* z_output = new RamTensor(deqnt_out->getShape()); - Add(deqnt_out, b, &z_output); - delete deqnt_out; - delete b; - - Tensor* z_qnt_output = nullptr; - Tensor* z_min = new RamTensor({1}); - Tensor* z_max = new RamTensor({1}); - tensorQuantize(z_output, &z_qnt_output, &z_min, &z_max); - - tensorChkAlloc(output, z_qnt_output->getShape()); - Shape shape_one; - shape_one.push_back(1); - tensorChkAlloc(output_min, shape_one); - tensorChkAlloc(output_max, shape_one); - Relu(z_qnt_output, z_min, z_max, *output, *output_min, *output_max); + + TENSOR deqnt_out = ctx.add(new RamTensor()); + ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + + TENSOR z_output = ctx.add(new RamTensor()); + ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); + + TENSOR z_qnt_output = ctx.add(new RamTensor()); + TENSOR z_min = ctx.add(new RamTensor({1})); + TENSOR z_max = ctx.add(new RamTensor({1})); + tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); + + ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {output, output_min, output_max}); } -template -void PredLayer(Tensor* input, Tensor* input_min, - Tensor* input_max, Tensor** output, Tensor* w, Tensor* w_min, Tensor* w_max, Tensor* bias, Tensor* dim) { +void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, + TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim) { - Tensor* out_c = nullptr; - Tensor* matmul_out_min = new RamTensor({1}); - Tensor* matmul_out_max = new RamTensor({1}); + TENSOR out_c = ctx.add(new RamTensor()); + TENSOR matmul_out_min = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max = ctx.add(new RamTensor({1})); //MatMul - QuantizedMatMul(input, w, &out_c, input_min, w_min, - input_max, w_max, matmul_out_min, - matmul_out_max); - //clean up - delete input; - delete w;; - delete w_min; - delete w_max; - delete input_min; - delete input_max; + ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, + {out_c, matmul_out_min, matmul_out_max}); //Requantization_Range - Tensor* req_out_min = new RamTensor({1}); - Tensor* req_out_max = new RamTensor({1}); - Requantization_Range(out_c, matmul_out_min, matmul_out_max, - req_out_min, req_out_max); + TENSOR req_out_min = ctx.add(new RamTensor({1})); + TENSOR req_out_max = ctx.add(new RamTensor({1})); + ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, + {req_out_min, req_out_max}); //Requantize - Tensor* reqnt_out = new RamTensor(out_c->getShape()); - Tensor* reqnt_out_min = new RamTensor({1}); - Tensor* reqnt_out_max = new RamTensor({1}); - Requantize(out_c, matmul_out_min, matmul_out_max, - req_out_min, req_out_max, reqnt_out, - reqnt_out_min, reqnt_out_max); - - delete out_c; - delete matmul_out_min; - delete matmul_out_max; + TENSOR reqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); + TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); + ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, + {reqnt_out, reqnt_out_min, reqnt_out_max}); //dequantize - Tensor* deqnt_out = nullptr; - dequantize(reqnt_out, reqnt_out_min, reqnt_out_max, &deqnt_out); - delete reqnt_out_min; - delete reqnt_out_max; + TENSOR deqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); + ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); //Add - Tensor* output_z = nullptr; - Add(deqnt_out, bias, &output_z); - delete deqnt_out; - delete bias; + TENSOR output_z = ctx.add(new RamTensor(deqnt_out.lock()->getShape())); + ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); //ArgMax - ArgMax(output_z, dim, output); + ctx.push(new ArgMaxOp(), {output_z, dim}, {output}); } //Test code @@ -184,69 +140,77 @@ void runPred(void) { } */ + int runMLP(string inputIdxFile) { TensorIdxImporter t_import; - Tensor* x = - t_import.float_import(inputIdxFile); - Tensor* x_quantized = nullptr; - Tensor* x_min = nullptr; - Tensor* x_max = nullptr; - - tensorQuantize(x, &x_quantized, &x_min, &x_max); - - Tensor* w = t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx"); - Tensor* w_min = - t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx"); - Tensor* w_max = - t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx"); - Tensor* b = - t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx"); - Tensor* relu_output = nullptr; - Tensor* relu_min = nullptr; - Tensor* relu_max = nullptr; - - ReluLayer(x_quantized, x_min, x_max, w, w_min, w_max, b, &relu_output, - &relu_min, &relu_max); - - w = t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx"); - w_min = t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_min_0.idx"); - w_max = t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_max_0.idx"); - b = t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx"); - Tensor* relu_output2 = nullptr; - Tensor* relu_min2 = nullptr; - Tensor* relu_max2 = nullptr; - - ReluLayer(relu_output, relu_min, relu_max, w, w_min, w_max, b, &relu_output2, - &relu_min2, &relu_max2); - - Tensor* w2 = t_import.ubyte_import( + Context ctx; + TENSOR x = + ctx.add(t_import.float_import(inputIdxFile)); + TENSOR x_quantized = ctx.add(new RamTensor()); + TENSOR x_min = ctx.add(new RamTensor({1})); + TENSOR x_max = ctx.add(new RamTensor({1})); + + tensorQuantize(ctx, x, x_quantized, x_min, x_max); + ctx.eval(); + + TENSOR w = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx")); + TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx")); + TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx")); + TENSOR b = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx")); + TENSOR relu_output = ctx.add(new RamTensor()); + TENSOR relu_min = ctx.add(new RamTensor({1})); + TENSOR relu_max = ctx.add(new RamTensor({1})); + + ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, relu_output, + relu_min, relu_max); + ctx.eval(); + + TENSOR w2 = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx")); + TENSOR w_min2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_min_0.idx")); + TENSOR w_max2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_max_0.idx")); + TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx")); + TENSOR relu_output2 = ctx.add(new RamTensor()); + TENSOR relu_min2 = ctx.add(new RamTensor({1})); + TENSOR relu_max2 = ctx.add(new RamTensor({1})); + + + ReluLayer(ctx, relu_output, relu_min, relu_max, w, w_min, w_max, b, relu_output2, + relu_min2, relu_max2); + ctx.eval(); + + TENSOR w3 = ctx.add(t_import.ubyte_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_quint8_const_0.idx"); - Tensor* w2_min = t_import.float_import( + "inputs/Variable_4_quint8_const_0.idx")); + TENSOR w2_min = ctx.add(t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_min_0.idx"); - Tensor* w2_max = t_import.float_import( + "inputs/Variable_4_min_0.idx")); + TENSOR w2_max = ctx.add(t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_max_0.idx"); - Tensor* bias = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx"); - Tensor* dim = t_import.int_import( + "inputs/Variable_4_max_0.idx")); + TENSOR bias2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx")); + TENSOR dim = ctx.add(t_import.int_import( "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" - "y_pred-dimension_0.idx"); + "y_pred-dimension_0.idx")); - Tensor* pred = nullptr; - PredLayer(relu_output2, relu_min2, relu_max2, &pred, w2, w2_min, w2_max, bias, dim); + TENSOR pred = ctx.add(new RamTensor()); + PredLayer(ctx, relu_output2, relu_min2, relu_max2, pred, w3, w2_min, w2_max, bias2, dim); + S_TENSOR pred_val = pred.lock(); + ctx.eval(); Tensor* ref_out = t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); Tensor* ref_pred = TensorCast(ref_out); - double result = Test::meanPercentErr(ref_pred, pred); + double result = Test::meanPercentErr(ref_pred, pred_val.get()); if (result < 0.0001) { printf("PASSED %.8f\r\n\r\n", result); @@ -254,6 +218,6 @@ int runMLP(string inputIdxFile) { printf("FAILED %.8f\r\n\r\n", result); } - return *(pred->read(0, 0)); + return *(pred.lock()->read(0, 0)); // output layer } diff --git a/main.cpp b/main.cpp index e0413ac4..fbc0a164 100644 --- a/main.cpp +++ b/main.cpp @@ -13,7 +13,7 @@ #include "NnTests.hpp" #include "MatrixTests.hpp" #include "mlp_test.hpp" -//#include "deep_mnist_mlp.hpp" +#include "deep_mnist_mlp.hpp" Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -27,8 +27,8 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); -/* int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - printf("prediction: %d\r\n", prediction);*/ + int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + printf("prediction: %d\r\n", prediction); idxImporterTest idxTest; idxTest.runAll(); From bdfd14599e9842ad3a4219cc204619746b44fab2 Mon Sep 17 00:00:00 2001 From: kazami Date: Tue, 14 Nov 2017 16:52:05 +0800 Subject: [PATCH 60/80] sounds like run mlp work (draft) --- ArrayOps.hpp | 22 ++++++++- MathOps.hpp | 3 ++ deep_mnist_mlp.hpp | 110 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 116 insertions(+), 19 deletions(-) diff --git a/ArrayOps.hpp b/ArrayOps.hpp index aca7d3ff..20cb7ac5 100644 --- a/ArrayOps.hpp +++ b/ArrayOps.hpp @@ -22,8 +22,15 @@ void QuantizeV2(S_TENSOR input, S_TENSOR _min_range, S_TENSOR _max_range, float min_range = std::min(0.0f, input_min_range); const float epsilon = std::max(1.0f, std::max(fabsf(input_min_range), fabsf(input_max_range))) / 100.0f; + std::vector v; + + std::vector org = input->getShape(); + for (int i = org.size() - 1; i >= 0; i--) { + v.push_back(org[i]); + } + if(output && output->getSize() == 0) { - output->resize(input->getShape()); + output->resize(v); } float max_range = std::max(input_max_range, min_range + epsilon); @@ -125,6 +132,19 @@ class DequantizeOp : public Operator { template void reshape(S_TENSOR input, S_TENSOR shape, S_TENSOR output) { Shape dim; + uint32_t t = input->getShape().size(); + if (t == 0) { + t = 1; + } + + shape->resize({t}); + + if (t == 1) { + shape->write(0, 0)[0] = -1; + } else { + shape->write(0, 0)[0] = input->getSize(); + shape->write(0, 0)[1] = -1; + } //validating and inferring dimensions int infer_index = -1; diff --git a/MathOps.hpp b/MathOps.hpp index 72f667c3..bea3b6d3 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -64,6 +64,9 @@ void Requantize(S_TENSOR input, S_TENSOR in_min, S_TENSOR in_max, const float r_output_min = r_min->read(0, 0)[0]; const float r_output_max = r_max->read(0, 0)[0]; const T1 *input_ptr = input->read(0, 0); + if (output && output->getSize() == 0) { + output->resize(input->getShape()); + } Toutput *out_ptr = output->write(0, 0); // RequantizeManyInNewRange(input, input.getSize(), input_min, diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index 6e29d145..97e1f0eb 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -16,10 +16,9 @@ void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, //reshape TENSOR reduce_dim = ctx.add(new RamTensor({1})); TENSOR reshape_out = ctx.add(new RamTensor()); - TENSOR reshape_shape = ctx.add(new RamTensor({2})); - reshape_shape.lock()->write(0, 0)[1] = -1; - reshape_shape.lock()->write(0, 0)[0] = input.lock()->getSize(); + TENSOR reshape_shape = ctx.add(new RamTensor()); + *(reduce_dim.lock()->write(0, 0)) = 0; ctx.push(new ReshapeOp(), {input, reshape_shape}, {reshape_out}); @@ -30,14 +29,12 @@ void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, ctx.push(new MinOp(), {reshape_out, reduce_dim}, {min_out}); ctx.push(new MaxOp(), {reshape_out, reduce_dim}, {max_out}); - output.lock()->resize(input.lock()->getShape()); - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {output, out_min, out_max}); } void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, - TENSOR output, TENSOR output_min, TENSOR output_max) { + TENSOR z_output) { //quantized matmul @@ -65,15 +62,14 @@ void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, TENSOR deqnt_out = ctx.add(new RamTensor()); ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); - TENSOR z_output = ctx.add(new RamTensor()); ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); - TENSOR z_qnt_output = ctx.add(new RamTensor()); +/* TENSOR z_qnt_output = ctx.add(new RamTensor()); TENSOR z_min = ctx.add(new RamTensor({1})); TENSOR z_max = ctx.add(new RamTensor({1})); tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); - ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {output, output_min, output_max}); + ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {output, output_min, output_max});*/ } void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, @@ -94,18 +90,18 @@ void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, {req_out_min, req_out_max}); //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); + TENSOR reqnt_out = ctx.add(new RamTensor()); TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); //dequantize - TENSOR deqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); + TENSOR deqnt_out = ctx.add(new RamTensor()); ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); //Add - TENSOR output_z = ctx.add(new RamTensor(deqnt_out.lock()->getShape())); + TENSOR output_z = ctx.add(new RamTensor()); ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); //ArgMax @@ -144,15 +140,19 @@ void runPred(void) { int runMLP(string inputIdxFile) { TensorIdxImporter t_import; Context ctx; - TENSOR x = - ctx.add(t_import.float_import(inputIdxFile)); TENSOR x_quantized = ctx.add(new RamTensor()); TENSOR x_min = ctx.add(new RamTensor({1})); TENSOR x_max = ctx.add(new RamTensor({1})); + TENSOR x = ctx.add(t_import.float_import(inputIdxFile)); + S_TENSOR xs_quantized = x_quantized.lock(); + S_TENSOR xs_min = x_min.lock(); + S_TENSOR xs_max = x_max.lock(); tensorQuantize(ctx, x, x_quantized, x_min, x_max); ctx.eval(); + //relu layer first + TENSOR w = ctx.add(t_import.ubyte_import( "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx")); TENSOR w_min = @@ -164,11 +164,48 @@ int runMLP(string inputIdxFile) { TENSOR relu_output = ctx.add(new RamTensor()); TENSOR relu_min = ctx.add(new RamTensor({1})); TENSOR relu_max = ctx.add(new RamTensor({1})); + S_TENSOR relus_output = relu_output.lock(); + S_TENSOR relus_min = relu_min.lock(); + S_TENSOR relus_max = relu_max.lock(); + TENSOR z_output = ctx.add(new RamTensor()); + + ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, z_output); +/* TENSOR out_c = ctx.add(new RamTensor()); + + TENSOR matmul_out_min = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + + ctx.push(new QntMatMulOp(), {x_quantized, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); + + //Requantization_Range + TENSOR req_out_min = ctx.add(new RamTensor({1})); + TENSOR req_out_max = ctx.add(new RamTensor({1})); + ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); + + //Requantize + TENSOR reqnt_out = ctx.add(new RamTensor()); + TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); + ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); + + + TENSOR deqnt_out = ctx.add(new RamTensor()); + ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + + TENSOR z_output = ctx.add(new RamTensor()); + TENSOR sz_output = z_output.lock(); + ctx.push(new AddOp(), {deqnt_out, b}, {z_output});*/ + + TENSOR z_qnt_output = ctx.add(new RamTensor()); + TENSOR z_min = ctx.add(new RamTensor({1})); + TENSOR z_max = ctx.add(new RamTensor({1})); + tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); + + ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {relu_output, relu_min, relu_max}); - ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, relu_output, - relu_min, relu_max); ctx.eval(); + //relu layer 2 TENSOR w2 = ctx.add(t_import.ubyte_import( "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx")); TENSOR w_min2 = ctx.add(t_import.float_import( @@ -180,9 +217,46 @@ int runMLP(string inputIdxFile) { TENSOR relu_min2 = ctx.add(new RamTensor({1})); TENSOR relu_max2 = ctx.add(new RamTensor({1})); + S_TENSOR relus_output2 = relu_output2.lock(); + S_TENSOR relus_min2 = relu_min2.lock(); + S_TENSOR relus_max2 = relu_max2.lock(); + TENSOR z_output2 = ctx.add(new RamTensor()); + ReluLayer(ctx, relu_output, relu_min, relu_max, w2, w_min2, w_max2, b2, z_output2); + +/* TENSOR out_c2 = ctx.add(new RamTensor()); + + TENSOR matmul_out_min2 = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max2 = ctx.add(new RamTensor({1})); + + ctx.push(new QntMatMulOp(), {relu_output, relu_min, relu_max, w2, w_min2, w_max2}, {out_c2, matmul_out_min2, matmul_out_max2}); + + //Requantization_Range + TENSOR req_out_min2 = ctx.add(new RamTensor({1})); + TENSOR req_out_max2 = ctx.add(new RamTensor({1})); + ctx.push(new Requantization_RangeOp(), {out_c2, matmul_out_min2, matmul_out_max2}, {req_out_min2, req_out_max2}); + + //Requantize + TENSOR reqnt_out2 = ctx.add(new RamTensor()); + TENSOR reqnt_out_min2 = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max2 = ctx.add(new RamTensor({1})); + ctx.push(new RequantizeOp(), {out_c2, matmul_out_min2, matmul_out_max2, req_out_min2, req_out_max2}, {reqnt_out2, reqnt_out_min2, reqnt_out_max2}); + + + TENSOR deqnt_out2 = ctx.add(new RamTensor()); + ctx.push(new DequantizeOp(), {reqnt_out2, reqnt_out_min2, reqnt_out_max2}, {deqnt_out2}); + + TENSOR z_output2 = ctx.add(new RamTensor()); + ctx.push(new AddOp(), {deqnt_out2, b2}, {z_output2});*/ + + TENSOR z_qnt_output2 = ctx.add(new RamTensor()); + TENSOR z_min2 = ctx.add(new RamTensor({1})); + TENSOR z_max2 = ctx.add(new RamTensor({1})); + tensorQuantize(ctx, z_output2, z_qnt_output2, z_min2, z_max2); + + ctx.push(new ReluOp(), {z_qnt_output2, z_min2, z_max2}, {relu_output2, relu_min2, relu_max2}); - ReluLayer(ctx, relu_output, relu_min, relu_max, w, w_min, w_max, b, relu_output2, - relu_min2, relu_max2); + //ReluLayer(ctx, relu_output, relu_min, relu_max, w, w_min, w_max, b, relu_output2, + // relu_min2, relu_max2); ctx.eval(); TENSOR w3 = ctx.add(t_import.ubyte_import( From 5174ababb25c93b36b5961a200ea2316d21d3dcd Mon Sep 17 00:00:00 2001 From: kazami Date: Tue, 14 Nov 2017 17:30:03 +0800 Subject: [PATCH 61/80] 1. remove comment for deep_mnist 2. remove debug code for math test --- MathTests.hpp | 2 -- deep_mnist_mlp.hpp | 86 ---------------------------------------------- 2 files changed, 88 deletions(-) diff --git a/MathTests.hpp b/MathTests.hpp index af65cb85..f4dcf057 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -236,7 +236,6 @@ class MathOpsTest : public Test { timer_stop(); double result = meanPercentErr(ref_val.get(), out_val.get()); - std::cout << result << std::endl; passed(result < 0.0001); //passed(result == 0); } @@ -268,7 +267,6 @@ class MathOpsTest : public Test { timer_stop(); double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); - std::cout << result << std::endl; passed(result < 0.0001); //passed(result == 0); } diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index 97e1f0eb..d1c7fbe4 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -64,12 +64,6 @@ void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); -/* TENSOR z_qnt_output = ctx.add(new RamTensor()); - TENSOR z_min = ctx.add(new RamTensor({1})); - TENSOR z_max = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); - - ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {output, output_min, output_max});*/ } void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, @@ -108,35 +102,6 @@ void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, ctx.push(new ArgMaxOp(), {output_z, dim}, {output}); } -//Test code -/* -void runPred(void) { - TensorIdxImporter t_import; - Tensor x = t_import.ubyte_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Relu_1_eightbit_quantized_0.idx"); - Tensor x_min = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Relu_1_eightbit_quantized_1.idx"); - Tensor x_max = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Relu_1_eightbit_quantized_2.idx"); - Tensor ref_out = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); - Tensor out(ref_out.getShape()); - - PredLayer(x, x_min, x_max, out); - Tensor out_float = TensorCast(out); - double result = Test::meanPercentErr(ref_out, out_float); - if (result < 0.0001) { - printf("PASSED %.8f\r\n\r\n", result); - } else { - printf("FAILED %.8f\r\n\r\n", result); - } -} -*/ - - int runMLP(string inputIdxFile) { TensorIdxImporter t_import; Context ctx; @@ -170,31 +135,6 @@ int runMLP(string inputIdxFile) { TENSOR z_output = ctx.add(new RamTensor()); ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, z_output); -/* TENSOR out_c = ctx.add(new RamTensor()); - - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); - - ctx.push(new QntMatMulOp(), {x_quantized, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); - - //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); - - //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); - - - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); - - TENSOR z_output = ctx.add(new RamTensor()); - TENSOR sz_output = z_output.lock(); - ctx.push(new AddOp(), {deqnt_out, b}, {z_output});*/ TENSOR z_qnt_output = ctx.add(new RamTensor()); TENSOR z_min = ctx.add(new RamTensor({1})); @@ -223,30 +163,6 @@ int runMLP(string inputIdxFile) { TENSOR z_output2 = ctx.add(new RamTensor()); ReluLayer(ctx, relu_output, relu_min, relu_max, w2, w_min2, w_max2, b2, z_output2); -/* TENSOR out_c2 = ctx.add(new RamTensor()); - - TENSOR matmul_out_min2 = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max2 = ctx.add(new RamTensor({1})); - - ctx.push(new QntMatMulOp(), {relu_output, relu_min, relu_max, w2, w_min2, w_max2}, {out_c2, matmul_out_min2, matmul_out_max2}); - - //Requantization_Range - TENSOR req_out_min2 = ctx.add(new RamTensor({1})); - TENSOR req_out_max2 = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c2, matmul_out_min2, matmul_out_max2}, {req_out_min2, req_out_max2}); - - //Requantize - TENSOR reqnt_out2 = ctx.add(new RamTensor()); - TENSOR reqnt_out_min2 = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max2 = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c2, matmul_out_min2, matmul_out_max2, req_out_min2, req_out_max2}, {reqnt_out2, reqnt_out_min2, reqnt_out_max2}); - - - TENSOR deqnt_out2 = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out2, reqnt_out_min2, reqnt_out_max2}, {deqnt_out2}); - - TENSOR z_output2 = ctx.add(new RamTensor()); - ctx.push(new AddOp(), {deqnt_out2, b2}, {z_output2});*/ TENSOR z_qnt_output2 = ctx.add(new RamTensor()); TENSOR z_min2 = ctx.add(new RamTensor({1})); @@ -255,8 +171,6 @@ int runMLP(string inputIdxFile) { ctx.push(new ReluOp(), {z_qnt_output2, z_min2, z_max2}, {relu_output2, relu_min2, relu_max2}); - //ReluLayer(ctx, relu_output, relu_min, relu_max, w, w_min, w_max, b, relu_output2, - // relu_min2, relu_max2); ctx.eval(); TENSOR w3 = ctx.add(t_import.ubyte_import( From b4a78234e475bafebbaa0cf2cd8580832e8fbfa9 Mon Sep 17 00:00:00 2001 From: Michael Bartling Date: Fri, 17 Nov 2017 08:24:36 -0600 Subject: [PATCH 62/80] Refactor non template functions to cpp files --- context.cpp | 135 +++++++++++++++++++++++++++ context.hpp | 134 +-------------------------- deep_mnist_mlp.cpp | 201 +++++++++++++++++++++++++++++++++++++++++ deep_mnist_mlp.hpp | 198 ++-------------------------------------- quantization_utils.cpp | 44 +++++++++ quantization_utils.hpp | 41 +-------- tensor.hpp | 6 +- tensorIdxImporter.cpp | 55 +++++++++++ tensorIdxImporter.hpp | 53 ----------- test.cpp | 15 +++ test.hpp | 14 +-- uTensorBase.cpp | 17 ++++ uTensorBase.hpp | 15 --- uTensor_util.cpp | 53 +++++++++++ uTensor_util.hpp | 54 +---------- 15 files changed, 539 insertions(+), 496 deletions(-) create mode 100644 context.cpp create mode 100644 deep_mnist_mlp.cpp create mode 100644 quantization_utils.cpp create mode 100644 tensorIdxImporter.cpp create mode 100644 test.cpp create mode 100644 uTensorBase.cpp create mode 100644 uTensor_util.cpp diff --git a/context.cpp b/context.cpp new file mode 100644 index 00000000..357c4fe7 --- /dev/null +++ b/context.cpp @@ -0,0 +1,135 @@ +#include "context.hpp" + +TENSOR Context::add(Tensor* t, uint8_t init_count) { + if(rTable.find(t) != rTable.end()) { + ERR_EXIT("tensor pointer address already exist in rTable"); + } + + S_TENSOR _sptr(t); + + Ref_Record record; + + if(init_count != 0) { + record.count = init_count; + record.allow_incr = false; + } + record.sptr = _sptr; + + rTable[t] = record; + + TENSOR wptr = _sptr; + + return wptr; +} + + +void Context::push(Operator *op, TList &_inputs, TList &_outputs) { + //error checking in the Op class + op->setInputs(_inputs); + op->setOutputs(_outputs); + op_list.push_back(op); + incrTListRef(_inputs); + +} + +void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { + TList inputs; + TList outputs; + + for(auto i:_inputs) { + inputs.push_back(i); + } + + for(auto o:_outputs) { + outputs.push_back(o); + } + + push(op, inputs, outputs); +} + +void Context::incrTListRef(const TList &t_list) { + for(auto t:t_list) { + Tensor* ptr = t.lock().get(); + if(rTable.find(ptr) == rTable.end()) { + ERR_EXIT("tensor not registered"); + } + + Ref_Record record = rTable[ptr]; + if(record.allow_incr) { + record.count++; + rTable[ptr] = record; + } + + //if an initial ref value is supplied to the tensor at compile time + //then this function does nothing + //otherwise, it increment the ref count of the tensor + //count is init to 0 by the record constructor + } +} + +void Context::initTensors(const S_TList &t_list) { + for(auto t:t_list) { + t->inFocus(); + } +} + +void Context::deinitTensors(const S_TList &t_list) { + for(auto t:t_list) { + t->deFocus(); + } +} + +void Context::delTensor(Tensor* t) { + Ref_Record record = rTable[t]; + record.sptr.reset(); + rTable.erase(t); +} + +void Context::dcrListRef(S_TList t_list) { + for(auto t:t_list) { + if(dcrRef(t.get()) < 1) { + delTensor(t.get()); + } + } +} + +uint8_t Context::dcrRef(Tensor* t) { + if(!isTracked(t)) { + ERR_EXIT("Tensor not registered"); + } + + Ref_Record record = rTable[t]; + if(record.count > 0) record.count -= 1; + rTable[t] = record; + + return record.count; +} + +bool Context::isTracked(Tensor* t) { + return (rTable.find(t) != rTable.end()); +} + +int Context::eval(void) { + //unref2nullTensors(); + + for(auto op:op_list) { + initTensors(op->getInputs()); + initTensors(op->getOutputs()); + + op->inFocus(); + op->compute(); + op->deFocus(); + + deinitTensors(op->getInputs()); + deinitTensors(op->getOutputs()); + + dcrListRef(op->getInputs()); + + delete op; + + } + + op_list.clear(); + + return 0; +} \ No newline at end of file diff --git a/context.hpp b/context.hpp index dabf2258..ce715a9a 100644 --- a/context.hpp +++ b/context.hpp @@ -23,7 +23,7 @@ class Ref_Record { class Context : public uTensor { protected: - vector op_list; + std::vector op_list; bool del_onsight; std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? @@ -53,138 +53,6 @@ class Context : public uTensor { } }; -TENSOR Context::add(Tensor* t, uint8_t init_count) { - if(rTable.find(t) != rTable.end()) { - ERR_EXIT("tensor pointer address already exist in rTable"); - } - - S_TENSOR _sptr(t); - - Ref_Record record; - - if(init_count != 0) { - record.count = init_count; - record.allow_incr = false; - } - record.sptr = _sptr; - - rTable[t] = record; - - TENSOR wptr = _sptr; - - return wptr; -} - - -void Context::push(Operator *op, TList &_inputs, TList &_outputs) { - //error checking in the Op class - op->setInputs(_inputs); - op->setOutputs(_outputs); - op_list.push_back(op); - incrTListRef(_inputs); - -} - -void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { - TList inputs; - TList outputs; - - for(auto i:_inputs) { - inputs.push_back(i); - } - - for(auto o:_outputs) { - outputs.push_back(o); - } - - push(op, inputs, outputs); -} - -void Context::incrTListRef(const TList &t_list) { - for(auto t:t_list) { - Tensor* ptr = t.lock().get(); - if(rTable.find(ptr) == rTable.end()) { - ERR_EXIT("tensor not registered"); - } - - Ref_Record record = rTable[ptr]; - if(record.allow_incr) { - record.count++; - rTable[ptr] = record; - } - - //if an initial ref value is supplied to the tensor at compile time - //then this function does nothing - //otherwise, it increment the ref count of the tensor - //count is init to 0 by the record constructor - } -} - -void Context::initTensors(const S_TList &t_list) { - for(auto t:t_list) { - t->inFocus(); - } -} - -void Context::deinitTensors(const S_TList &t_list) { - for(auto t:t_list) { - t->deFocus(); - } -} - -void Context::delTensor(Tensor* t) { - Ref_Record record = rTable[t]; - record.sptr.reset(); - rTable.erase(t); -} - -void Context::dcrListRef(S_TList t_list) { - for(auto t:t_list) { - if(dcrRef(t.get()) < 1) { - delTensor(t.get()); - } - } -} - -uint8_t Context::dcrRef(Tensor* t) { - if(!isTracked(t)) { - ERR_EXIT("Tensor not registered"); - } - - Ref_Record record = rTable[t]; - if(record.count > 0) record.count -= 1; - rTable[t] = record; - - return record.count; -} - -bool Context::isTracked(Tensor* t) { - return (rTable.find(t) != rTable.end()); -} - -int Context::eval(void) { - //unref2nullTensors(); - - for(auto op:op_list) { - initTensors(op->getInputs()); - initTensors(op->getOutputs()); - - op->inFocus(); - op->compute(); - op->deFocus(); - - deinitTensors(op->getInputs()); - deinitTensors(op->getOutputs()); - - dcrListRef(op->getInputs()); - - delete op; - - } - - op_list.clear(); - return 0; -} #endif // UTENSOR_CTX_H diff --git a/deep_mnist_mlp.cpp b/deep_mnist_mlp.cpp new file mode 100644 index 00000000..19f262e9 --- /dev/null +++ b/deep_mnist_mlp.cpp @@ -0,0 +1,201 @@ +#include "deep_mnist_mlp.hpp" + +void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, + TENSOR out_min, TENSOR out_max) { + + //reshape + TENSOR reduce_dim = ctx.add(new RamTensor({1})); + TENSOR reshape_out = ctx.add(new RamTensor()); + + TENSOR reshape_shape = ctx.add(new RamTensor()); + + *(reduce_dim.lock()->write(0, 0)) = 0; + ctx.push(new ReshapeOp(), {input, reshape_shape}, {reshape_out}); + + + //Min and Max of (reshaped) input + TENSOR min_out = ctx.add(new RamTensor({1})); + TENSOR max_out = ctx.add(new RamTensor({1})); + ctx.push(new MinOp(), {reshape_out, reduce_dim}, {min_out}); + ctx.push(new MaxOp(), {reshape_out, reduce_dim}, {max_out}); + + ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {output, out_min, out_max}); +} + +void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, + TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, + TENSOR z_output) { + + //quantized matmul + + TENSOR out_c = ctx.add(new RamTensor()); + + TENSOR matmul_out_min = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + + ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); + + //Requantization_Range + TENSOR req_out_min = ctx.add(new RamTensor({1})); + TENSOR req_out_max = ctx.add(new RamTensor({1})); + ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); + + //Requantize + TENSOR reqnt_out = ctx.add(new RamTensor()); + TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); + ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); + + Shape out_shape = out_c.lock()->getShape(); + //clean up + + TENSOR deqnt_out = ctx.add(new RamTensor()); + ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + + ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); + +} + +void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, + TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim) { + + TENSOR out_c = ctx.add(new RamTensor()); + TENSOR matmul_out_min = ctx.add(new RamTensor({1})); + TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + + //MatMul + ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, + {out_c, matmul_out_min, matmul_out_max}); + + //Requantization_Range + TENSOR req_out_min = ctx.add(new RamTensor({1})); + TENSOR req_out_max = ctx.add(new RamTensor({1})); + ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, + {req_out_min, req_out_max}); + + //Requantize + TENSOR reqnt_out = ctx.add(new RamTensor()); + TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); + TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); + ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, + {reqnt_out, reqnt_out_min, reqnt_out_max}); + + //dequantize + TENSOR deqnt_out = ctx.add(new RamTensor()); + ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + + //Add + TENSOR output_z = ctx.add(new RamTensor()); + ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); + + //ArgMax + ctx.push(new ArgMaxOp(), {output_z, dim}, {output}); +} + +int runMLP(string inputIdxFile) { + TensorIdxImporter t_import; + Context ctx; + TENSOR x_quantized = ctx.add(new RamTensor()); + TENSOR x_min = ctx.add(new RamTensor({1})); + TENSOR x_max = ctx.add(new RamTensor({1})); + TENSOR x = ctx.add(t_import.float_import(inputIdxFile)); + S_TENSOR xs_quantized = x_quantized.lock(); + S_TENSOR xs_min = x_min.lock(); + S_TENSOR xs_max = x_max.lock(); + + tensorQuantize(ctx, x, x_quantized, x_min, x_max); + ctx.eval(); + + //relu layer first + + TENSOR w = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx")); + TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx")); + TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx")); + TENSOR b = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx")); + TENSOR relu_output = ctx.add(new RamTensor()); + TENSOR relu_min = ctx.add(new RamTensor({1})); + TENSOR relu_max = ctx.add(new RamTensor({1})); + S_TENSOR relus_output = relu_output.lock(); + S_TENSOR relus_min = relu_min.lock(); + S_TENSOR relus_max = relu_max.lock(); + TENSOR z_output = ctx.add(new RamTensor()); + + ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, z_output); + + TENSOR z_qnt_output = ctx.add(new RamTensor()); + TENSOR z_min = ctx.add(new RamTensor({1})); + TENSOR z_max = ctx.add(new RamTensor({1})); + tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); + + ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {relu_output, relu_min, relu_max}); + + ctx.eval(); + + //relu layer 2 + TENSOR w2 = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx")); + TENSOR w_min2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_min_0.idx")); + TENSOR w_max2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_max_0.idx")); + TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx")); + TENSOR relu_output2 = ctx.add(new RamTensor()); + TENSOR relu_min2 = ctx.add(new RamTensor({1})); + TENSOR relu_max2 = ctx.add(new RamTensor({1})); + + S_TENSOR relus_output2 = relu_output2.lock(); + S_TENSOR relus_min2 = relu_min2.lock(); + S_TENSOR relus_max2 = relu_max2.lock(); + TENSOR z_output2 = ctx.add(new RamTensor()); + ReluLayer(ctx, relu_output, relu_min, relu_max, w2, w_min2, w_max2, b2, z_output2); + + + TENSOR z_qnt_output2 = ctx.add(new RamTensor()); + TENSOR z_min2 = ctx.add(new RamTensor({1})); + TENSOR z_max2 = ctx.add(new RamTensor({1})); + tensorQuantize(ctx, z_output2, z_qnt_output2, z_min2, z_max2); + + ctx.push(new ReluOp(), {z_qnt_output2, z_min2, z_max2}, {relu_output2, relu_min2, relu_max2}); + + ctx.eval(); + + TENSOR w3 = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_quint8_const_0.idx")); + TENSOR w2_min = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_min_0.idx")); + TENSOR w2_max = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_max_0.idx")); + TENSOR bias2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx")); + TENSOR dim = ctx.add(t_import.int_import( + "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" + "y_pred-dimension_0.idx")); + + TENSOR pred = ctx.add(new RamTensor()); + PredLayer(ctx, relu_output2, relu_min2, relu_max2, pred, w3, w2_min, w2_max, bias2, dim); + S_TENSOR pred_val = pred.lock(); + ctx.eval(); + + + Tensor* ref_out = t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); + Tensor* ref_pred = TensorCast(ref_out); + + double result = Test::meanPercentErr(ref_pred, pred_val.get()); + + if (result < 0.0001) { + printf("PASSED %.8f\r\n\r\n", result); + } else { + printf("FAILED %.8f\r\n\r\n", result); + } + + return *(pred.lock()->read(0, 0)); + // output layer +} diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index d1c7fbe4..dd649cd9 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -1,3 +1,5 @@ +#ifndef __DEEP_MNIST_MLP_HPP__ +#define __DEEP_MNIST_MLP_HPP__ #include "mbed.h" #include "tensor.hpp" #include "test.hpp" @@ -11,201 +13,15 @@ #include "context.hpp" void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, - TENSOR out_min, TENSOR out_max) { - - //reshape - TENSOR reduce_dim = ctx.add(new RamTensor({1})); - TENSOR reshape_out = ctx.add(new RamTensor()); - - TENSOR reshape_shape = ctx.add(new RamTensor()); - - *(reduce_dim.lock()->write(0, 0)) = 0; - ctx.push(new ReshapeOp(), {input, reshape_shape}, {reshape_out}); - - - //Min and Max of (reshaped) input - TENSOR min_out = ctx.add(new RamTensor({1})); - TENSOR max_out = ctx.add(new RamTensor({1})); - ctx.push(new MinOp(), {reshape_out, reduce_dim}, {min_out}); - ctx.push(new MaxOp(), {reshape_out, reduce_dim}, {max_out}); - - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {output, out_min, out_max}); -} + TENSOR out_min, TENSOR out_max); void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, - TENSOR z_output) { - - //quantized matmul - - TENSOR out_c = ctx.add(new RamTensor()); - - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); - - ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); - - //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); - - //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); - - Shape out_shape = out_c.lock()->getShape(); - //clean up - - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); - - ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); - -} + TENSOR z_output); void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, - TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim) { - - TENSOR out_c = ctx.add(new RamTensor()); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); - - //MatMul - ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, - {out_c, matmul_out_min, matmul_out_max}); - - //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, - {req_out_min, req_out_max}); - - //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, - {reqnt_out, reqnt_out_min, reqnt_out_max}); - - //dequantize - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); - - //Add - TENSOR output_z = ctx.add(new RamTensor()); - ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); - - //ArgMax - ctx.push(new ArgMaxOp(), {output_z, dim}, {output}); -} - -int runMLP(string inputIdxFile) { - TensorIdxImporter t_import; - Context ctx; - TENSOR x_quantized = ctx.add(new RamTensor()); - TENSOR x_min = ctx.add(new RamTensor({1})); - TENSOR x_max = ctx.add(new RamTensor({1})); - TENSOR x = ctx.add(t_import.float_import(inputIdxFile)); - S_TENSOR xs_quantized = x_quantized.lock(); - S_TENSOR xs_min = x_min.lock(); - S_TENSOR xs_max = x_max.lock(); - - tensorQuantize(ctx, x, x_quantized, x_min, x_max); - ctx.eval(); - - //relu layer first - - TENSOR w = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx")); - TENSOR w_min = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx")); - TENSOR w_max = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx")); - TENSOR b = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx")); - TENSOR relu_output = ctx.add(new RamTensor()); - TENSOR relu_min = ctx.add(new RamTensor({1})); - TENSOR relu_max = ctx.add(new RamTensor({1})); - S_TENSOR relus_output = relu_output.lock(); - S_TENSOR relus_min = relu_min.lock(); - S_TENSOR relus_max = relu_max.lock(); - TENSOR z_output = ctx.add(new RamTensor()); - - ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, z_output); - - TENSOR z_qnt_output = ctx.add(new RamTensor()); - TENSOR z_min = ctx.add(new RamTensor({1})); - TENSOR z_max = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); - - ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {relu_output, relu_min, relu_max}); - - ctx.eval(); - - //relu layer 2 - TENSOR w2 = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx")); - TENSOR w_min2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_min_0.idx")); - TENSOR w_max2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_max_0.idx")); - TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx")); - TENSOR relu_output2 = ctx.add(new RamTensor()); - TENSOR relu_min2 = ctx.add(new RamTensor({1})); - TENSOR relu_max2 = ctx.add(new RamTensor({1})); - - S_TENSOR relus_output2 = relu_output2.lock(); - S_TENSOR relus_min2 = relu_min2.lock(); - S_TENSOR relus_max2 = relu_max2.lock(); - TENSOR z_output2 = ctx.add(new RamTensor()); - ReluLayer(ctx, relu_output, relu_min, relu_max, w2, w_min2, w_max2, b2, z_output2); - - - TENSOR z_qnt_output2 = ctx.add(new RamTensor()); - TENSOR z_min2 = ctx.add(new RamTensor({1})); - TENSOR z_max2 = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output2, z_qnt_output2, z_min2, z_max2); - - ctx.push(new ReluOp(), {z_qnt_output2, z_min2, z_max2}, {relu_output2, relu_min2, relu_max2}); - - ctx.eval(); - - TENSOR w3 = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_quint8_const_0.idx")); - TENSOR w2_min = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_min_0.idx")); - TENSOR w2_max = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_max_0.idx")); - TENSOR bias2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx")); - TENSOR dim = ctx.add(t_import.int_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" - "y_pred-dimension_0.idx")); - - TENSOR pred = ctx.add(new RamTensor()); - PredLayer(ctx, relu_output2, relu_min2, relu_max2, pred, w3, w2_min, w2_max, bias2, dim); - S_TENSOR pred_val = pred.lock(); - ctx.eval(); - - - Tensor* ref_out = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); - Tensor* ref_pred = TensorCast(ref_out); + TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim); - double result = Test::meanPercentErr(ref_pred, pred_val.get()); - - if (result < 0.0001) { - printf("PASSED %.8f\r\n\r\n", result); - } else { - printf("FAILED %.8f\r\n\r\n", result); - } +int runMLP(string inputIdxFile); - return *(pred.lock()->read(0, 0)); - // output layer -} +#endif diff --git a/quantization_utils.cpp b/quantization_utils.cpp new file mode 100644 index 00000000..c5ca397f --- /dev/null +++ b/quantization_utils.cpp @@ -0,0 +1,44 @@ +#include "quantization_utils.hpp" + +void RequantizeManyInNewRangeReference(const int* input, int32_t count, + float min_input, float max_input, + float min_output, + float max_output, + unsigned char* output) { + // Initially we calculate all the constants we need once, before we go into + // the inner loop. If this is updated, also update the Eigen version. + const int fp_shift = 16; + const float input_range = max_input - min_input; + const float output_range = max_output - min_output; + const float recip_output_range = + output_range == 0.0 ? 0.0 : (255.0 / output_range); + const float input_rezero = (min_input + max_input) / 2.0; + const int64_t range_scale_fp = + output_range == 0.0 ? 0.0 + : static_cast(255.0 * (1 << fp_shift) * + input_range / output_range); + const int64_t input_offset_fp = + static_cast(input_rezero * recip_output_range * (1 << fp_shift)); + const int64_t output_offset_fp = + output_range == 0.0 + ? 0 + : static_cast((1 << fp_shift) * (min_output * 255.0) / + output_range); + const int64_t rounding_delta = 1 << (fp_shift - 1); + + // Inside this loop we just do minimal adds, multiplies, and shifts, in a way + // that could be easily adapted for a SIMD implementation. It should also be + // possible to perform all the calculations in 32-bit rather than 64, but + // that's not been implemented yet. + for (int32_t index = 0; index < count; ++index) { + const int64_t input_value = static_cast(input[index]); + const int64_t fp_value = + ((input_value * range_scale_fp) >> 32) + input_offset_fp; + const int64_t offset_intermediate = fp_value - output_offset_fp; + const int64_t round_intermediate = offset_intermediate + rounding_delta; + int64_t quantized_int64 = round_intermediate >> fp_shift; + quantized_int64 = std::max(quantized_int64, 0LL); + quantized_int64 = std::min(quantized_int64, 255LL); + output[index] = static_cast(static_cast(quantized_int64)); + } +} diff --git a/quantization_utils.hpp b/quantization_utils.hpp index 6352cb77..79252e11 100644 --- a/quantization_utils.hpp +++ b/quantization_utils.hpp @@ -79,48 +79,11 @@ inline void RequantizeManyInNewRange(Tensor* input, uint32_t count, } //quantization_utils.h : 239 -inline void RequantizeManyInNewRangeReference(const int* input, int32_t count, +void RequantizeManyInNewRangeReference(const int* input, int32_t count, float min_input, float max_input, float min_output, float max_output, - unsigned char* output) { - // Initially we calculate all the constants we need once, before we go into - // the inner loop. If this is updated, also update the Eigen version. - const int fp_shift = 16; - const float input_range = max_input - min_input; - const float output_range = max_output - min_output; - const float recip_output_range = - output_range == 0.0 ? 0.0 : (255.0 / output_range); - const float input_rezero = (min_input + max_input) / 2.0; - const int64_t range_scale_fp = - output_range == 0.0 ? 0.0 - : static_cast(255.0 * (1 << fp_shift) * - input_range / output_range); - const int64_t input_offset_fp = - static_cast(input_rezero * recip_output_range * (1 << fp_shift)); - const int64_t output_offset_fp = - output_range == 0.0 - ? 0 - : static_cast((1 << fp_shift) * (min_output * 255.0) / - output_range); - const int64_t rounding_delta = 1 << (fp_shift - 1); - - // Inside this loop we just do minimal adds, multiplies, and shifts, in a way - // that could be easily adapted for a SIMD implementation. It should also be - // possible to perform all the calculations in 32-bit rather than 64, but - // that's not been implemented yet. - for (int32_t index = 0; index < count; ++index) { - const int64_t input_value = static_cast(input[index]); - const int64_t fp_value = - ((input_value * range_scale_fp) >> 32) + input_offset_fp; - const int64_t offset_intermediate = fp_value - output_offset_fp; - const int64_t round_intermediate = offset_intermediate + rounding_delta; - int64_t quantized_int64 = round_intermediate >> fp_shift; - quantized_int64 = std::max(quantized_int64, 0LL); - quantized_int64 = std::min(quantized_int64, 255LL); - output[index] = static_cast(static_cast(quantized_int64)); - } -} + unsigned char* output); template struct FloatToQuantizedStruct { diff --git a/tensor.hpp b/tensor.hpp index c22f2c19..91d99741 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -21,8 +21,8 @@ class Tensor; typedef std::shared_ptr S_TENSOR; typedef std::weak_ptr TENSOR; -typedef vector TList; -typedef vector S_TList; +typedef std::vector TList; +typedef std::vector S_TList; class uTensor { public: @@ -33,7 +33,7 @@ class uTensor { }; -uTensor::~uTensor() {} +inline uTensor::~uTensor() {} class TensorBase { public: std::vector shape; diff --git a/tensorIdxImporter.cpp b/tensorIdxImporter.cpp new file mode 100644 index 00000000..b74cb090 --- /dev/null +++ b/tensorIdxImporter.cpp @@ -0,0 +1,55 @@ +#include "tensorIdxImporter.hpp" + +uint8_t TensorIdxImporter::getIdxDTypeSize(IDX_DTYPE dtype) { + switch (dtype) { + case idx_ubyte: + return 1; + case idx_byte: + return 1; + case idx_short: + return 2; + case idx_int: + return 4; + case idx_float: + return 4; + case idx_double: + return 8; + } + + return 0; +} + +uint32_t TensorIdxImporter::getMagicNumber(unsigned char dtype, + unsigned char dim) { + uint32_t magic = 0; + + magic = (magic | dtype) << 8; + magic = magic | dim; + + return magic; +} + +HeaderMeta TensorIdxImporter::parseHeader(void) { + unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 4); + + fread(buf, 1, 4, fp); + if (buf[0] != 0 || buf[0] != 0) { + printf("Error, header magic number invalid\r\n"); + } + + HeaderMeta header; + header.dataType = static_cast(buf[2]); + header.numDim = buf[3]; + + for (int i = 0; i < header.numDim; i++) { + fread(buf, 1, 4, fp); + uint32_t dimSize = ntoh32(*(uint32_t*)buf); + header.dim.push_back(dimSize); + } + + free(buf); + + header.dataPos = ftell(fp); + + return header; +} diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 632454a9..3fcaf5cd 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -63,59 +63,6 @@ class TensorIdxImporter { // header = parseHeader(); // } -uint8_t TensorIdxImporter::getIdxDTypeSize(IDX_DTYPE dtype) { - switch (dtype) { - case idx_ubyte: - return 1; - case idx_byte: - return 1; - case idx_short: - return 2; - case idx_int: - return 4; - case idx_float: - return 4; - case idx_double: - return 8; - } - - return 0; -} - -uint32_t TensorIdxImporter::getMagicNumber(unsigned char dtype, - unsigned char dim) { - uint32_t magic = 0; - - magic = (magic | dtype) << 8; - magic = magic | dim; - - return magic; -} - -HeaderMeta TensorIdxImporter::parseHeader(void) { - unsigned char* buf = (unsigned char*)malloc(sizeof(unsigned char) * 4); - - fread(buf, 1, 4, fp); - if (buf[0] != 0 || buf[0] != 0) { - printf("Error, header magic number invalid\r\n"); - } - - HeaderMeta header; - header.dataType = static_cast(buf[2]); - header.numDim = buf[3]; - - for (int i = 0; i < header.numDim; i++) { - fread(buf, 1, 4, fp); - uint32_t dimSize = ntoh32(*(uint32_t*)buf); - header.dim.push_back(dimSize); - } - - free(buf); - - header.dataPos = ftell(fp); - - return header; -} template Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { diff --git a/test.cpp b/test.cpp new file mode 100644 index 00000000..88fb56c1 --- /dev/null +++ b/test.cpp @@ -0,0 +1,15 @@ +#include "test.hpp" + +void printBits(size_t const size, void const* const ptr) { + unsigned char* b = (unsigned char*)ptr; + unsigned char byte; + int i, j; + + for (i = size - 1; i >= 0; i--) { + for (j = 7; j >= 0; j--) { + byte = (b[i] >> j) & 1; + printf("%d", byte); + } + } + puts(""); +} diff --git a/test.hpp b/test.hpp index 77c7b808..2630b823 100644 --- a/test.hpp +++ b/test.hpp @@ -191,18 +191,6 @@ class Test { }; // https://stackoverflow.com/questions/111928/is-there-a-printf-converter-to-print-in-binary-format -void printBits(size_t const size, void const* const ptr) { - unsigned char* b = (unsigned char*)ptr; - unsigned char byte; - int i, j; - - for (i = size - 1; i >= 0; i--) { - for (j = 7; j >= 0; j--) { - byte = (b[i] >> j) & 1; - printf("%d", byte); - } - } - puts(""); -} +void printBits(size_t const size, void const* const ptr); #endif diff --git a/uTensorBase.cpp b/uTensorBase.cpp new file mode 100644 index 00000000..5f1e3b6e --- /dev/null +++ b/uTensorBase.cpp @@ -0,0 +1,17 @@ +#include "uTensorBase.hpp" + +void Operator::setInputs(TList &_inputs) { + if(_inputs.size() != n_inputs) ERR_EXIT("Input Tensor list mismatched..."); + + for(uint8_t i=0; i < _inputs.size(); i++) { + inputs.push_back(_inputs[i].lock()); + } +} + +void Operator::setOutputs(TList &_outputs) { + if(_outputs.size() != n_outputs) ERR_EXIT("Input Tensor list mismatched..."); + + for(uint8_t i=0; i < _outputs.size(); i++) { + outputs.push_back(_outputs[i].lock()); + } +} diff --git a/uTensorBase.hpp b/uTensorBase.hpp index e8ea0bb1..8616aad4 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -31,20 +31,5 @@ class Operator : public uTensor { } }; -void Operator::setInputs(TList &_inputs) { - if(_inputs.size() != n_inputs) ERR_EXIT("Input Tensor list mismatched..."); - - for(uint8_t i=0; i < _inputs.size(); i++) { - inputs.push_back(_inputs[i].lock()); - } -} - -void Operator::setOutputs(TList &_outputs) { - if(_outputs.size() != n_outputs) ERR_EXIT("Input Tensor list mismatched..."); - - for(uint8_t i=0; i < _outputs.size(); i++) { - outputs.push_back(_outputs[i].lock()); - } -} #endif //UTENSOR_BASE_H diff --git a/uTensor_util.cpp b/uTensor_util.cpp new file mode 100644 index 00000000..2710f071 --- /dev/null +++ b/uTensor_util.cpp @@ -0,0 +1,53 @@ +#include "uTensor_util.hpp" +#include + +void return_error(int ret_val) { + if (ret_val) { + printf(" [**Failure**] %d\r\n", ret_val); + printf("Exiting...\r\n"); + fflush(stdout); + exit(-1); + } else { + printf(" [DONE]\r\n"); + } +} +void printVector(std::vector vec) { + printf("vector: \r\n"); + for (uint32_t i : vec) { + printf("%d ", (unsigned int)i); + } + + printf("\r\n"); +} +uint32_t htonl(uint32_t& val) { + const uint32_t mask = 0b11111111; + uint32_t ret = 0; + + ret |= val >> 24; + ret |= (val & (mask << 16)) >> 8; + ret |= (val & (mask << 8)) << 8; + ret |= val << 24; + + return ret; +} + +uint16_t ntoh16(uint16_t val) { + uint16_t ret = 0; + + ret |= val >> 8; + ret |= val << 8; + + return ret; + } + +uint32_t ntoh32(uint32_t val) { + const uint32_t mask = 0b11111111; + uint32_t ret = 0; + + ret |= val >> 24; + ret |= (val & (mask << 16)) >> 8; + ret |= (val & (mask << 8)) << 8; + ret |= val << 24; + + return ret; +} diff --git a/uTensor_util.hpp b/uTensor_util.hpp index dafccb6a..a9374eef 100644 --- a/uTensor_util.hpp +++ b/uTensor_util.hpp @@ -6,17 +6,8 @@ // #define MAX(A, B) ((A > B)? A:B) +void return_error(int ret_val); #if MBED_CONF_APP_DEBUG_MSG -void return_error(int ret_val) { - if (ret_val) { - printf(" [**Failure**] %d\r\n", ret_val); - printf("Exiting...\r\n"); - fflush(stdout); - exit(-1); - } else { - printf(" [DONE]\r\n"); - } -} // void errno_error(void* ret_val) { // if (ret_val == NULL) { @@ -62,48 +53,13 @@ void return_error(int ret_val) { typedef std::vector Shape; -void printVector(std::vector vec) { - printf("vector: \r\n"); - for (uint32_t i : vec) { - printf("%d ", (unsigned int)i); - } - - printf("\r\n"); -} +void printVector(std::vector vec); // little endian to big endian -uint32_t htonl(uint32_t& val) { - const uint32_t mask = 0b11111111; - uint32_t ret = 0; - - ret |= val >> 24; - ret |= (val & (mask << 16)) >> 8; - ret |= (val & (mask << 8)) << 8; - ret |= val << 24; - - return ret; -} +uint32_t htonl(uint32_t& val); // big endian to little endian -uint16_t ntoh16(uint16_t val) { - uint16_t ret = 0; - - ret |= val >> 8; - ret |= val << 8; - - return ret; -} - -uint32_t ntoh32(uint32_t val) { - const uint32_t mask = 0b11111111; - uint32_t ret = 0; - - ret |= val >> 24; - ret |= (val & (mask << 16)) >> 8; - ret |= (val & (mask << 8)) << 8; - ret |= val << 24; - - return ret; -} +uint16_t ntoh16(uint16_t val); +uint32_t ntoh32(uint32_t val); #endif From 0fe89c532cf21cb289128bb7d46ab9b8de969d6f Mon Sep 17 00:00:00 2001 From: Michael Bartling Date: Fri, 17 Nov 2017 08:25:31 -0600 Subject: [PATCH 63/80] Add vim to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 12881705..bbcccbd5 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,6 @@ #Project generated test files TESTS/scripts/testData + +#Vim stuff +*.swp From 838dae12e92de8894d6677b0802d54dfae4db370 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 18 Nov 2017 15:57:34 +0800 Subject: [PATCH 64/80] 1. tensor have the name to perform lookup 2. modify tensorIdxTest --- tensor.hpp | 18 ++++++++++++------ tensorIdxImporter.hpp | 26 +++++++++++++------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/tensor.hpp b/tensor.hpp index 91d99741..20c61d96 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -19,17 +19,22 @@ class Tensor; +typedef std::string TName; +typedef std::vector TList; typedef std::shared_ptr S_TENSOR; -typedef std::weak_ptr TENSOR; -typedef std::vector TList; typedef std::vector S_TList; class uTensor { public: virtual void inFocus(){}; virtual void deFocus(){}; + virtual std::string getName() { return name; } + virtual void setName(std::string _name) { name = _name; } + virtual ~uTensor() = 0; +private: + std::string name; }; @@ -57,10 +62,11 @@ class Tensor : public uTensor { protected: std::shared_ptr s; // short for states public: - Tensor(void) { + Tensor(TName &_name) { s = std::make_shared(); s->total_size = 0; s->data = nullptr; + setName(_name); } // returns how far a given dimension is apart @@ -149,9 +155,9 @@ template class RamTensor : public Tensor { // need deep copy public: - RamTensor() : Tensor() {} + RamTensor(TName _name) : Tensor(_name) {} - RamTensor(std::initializer_list l) : Tensor() { + RamTensor(std::initializer_list l, TName _name) : Tensor(_name) { std::vector v; for (auto i : l) { v.push_back(i); @@ -160,7 +166,7 @@ class RamTensor : public Tensor { Tensor::init(v); } - RamTensor(std::vector v) : Tensor() { + RamTensor(std::vector v, TName _name) : Tensor(_name) { Tensor::init(v); } diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 3fcaf5cd..a5f61ab7 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -33,25 +33,25 @@ class TensorIdxImporter { HeaderMeta header; HeaderMeta parseHeader(void); template - Tensor* loader(string& filename, IDX_DTYPE idx_type); + Tensor* loader(string& filename, IDX_DTYPE idx_type, string name); void open(string filename); // void open(FILE *fp); public: - Tensor* ubyte_import(string filename) { - return loader(filename, IDX_DTYPE::idx_ubyte); + Tensor* ubyte_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_ubyte, name); } - Tensor* byte_import(string filename) { - return loader(filename, IDX_DTYPE::idx_byte); + Tensor* byte_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_byte, name); } - Tensor* short_import(string filename) { - return loader(filename, IDX_DTYPE::idx_short); + Tensor* short_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_short, name); } - Tensor* int_import(string filename) { - return loader(filename, IDX_DTYPE::idx_int); + Tensor* int_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_int, name); } - Tensor* float_import(string filename) { - return loader(filename, IDX_DTYPE::idx_float); + Tensor* float_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_float, name); } uint32_t getMagicNumber(unsigned char dtype, unsigned char dim); uint8_t getIdxDTypeSize(IDX_DTYPE dtype); @@ -65,7 +65,7 @@ class TensorIdxImporter { template -Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { +Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type, string name) { fp = fopen(filename.c_str(), "r"); DEBUG("Opening file %s ", filename.c_str()); @@ -79,7 +79,7 @@ Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { fseek(fp, header.dataPos, SEEK_SET); // need error handling - Tensor* t = new RamTensor(header.dim); // tensor allocated + Tensor* t = new RamTensor(header.dim, name); // tensor allocated const uint8_t unit_size = t->unit_size(); U* val = (U*)malloc(unit_size); From 4c6de655eff7b56519357bdbfa21f21a9ba12fd0 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sat, 18 Nov 2017 16:00:13 +0800 Subject: [PATCH 65/80] modifying context class to use TName --- context.cpp | 66 ++++++++++++++++++++++++++++++------------------- context.hpp | 15 +++++------ tensor.hpp | 6 ++--- uTensorBase.cpp | 13 ++++------ uTensorBase.hpp | 4 +-- 5 files changed, 58 insertions(+), 46 deletions(-) diff --git a/context.cpp b/context.cpp index 357c4fe7..ab841940 100644 --- a/context.cpp +++ b/context.cpp @@ -1,7 +1,8 @@ #include "context.hpp" -TENSOR Context::add(Tensor* t, uint8_t init_count) { - if(rTable.find(t) != rTable.end()) { +S_TENSOR Context::add(Tensor* t, uint8_t init_count) { + if(t == nullptr) { ERR_EXIT("null pointer tensor"); } + if(rTable.find(t->getName()) != rTable.end()) { ERR_EXIT("tensor pointer address already exist in rTable"); } @@ -13,18 +14,31 @@ TENSOR Context::add(Tensor* t, uint8_t init_count) { record.count = init_count; record.allow_incr = false; } - record.sptr = _sptr; - rTable[t] = record; + record.sptr = _sptr; - TENSOR wptr = _sptr; + rTable[t->getName()] = record; - return wptr; + return _sptr; } -void Context::push(Operator *op, TList &_inputs, TList &_outputs) { +void Context::push(Operator *op, TNameList &in_names, TNameList &out_names) { //error checking in the Op class + S_TList _inputs; + for(auto in:in_names) { + Ref_Record r = rTable[in]; + if(r == rTable.end()) { ERROR_EXIT("Tensor \"%s\" not found", in.c_str()); } + _inputs.push_back(r.sptr); + } + + S_TList _outputs; + for(auto out:out_names) { + Ref_Record r = rTable[out]; + if(r == rTable.end()) { ERROR_EXIT("Tensor \"%s\" not found", in.c_str()); } + _outputs.push_back(r.sptr); + } + op->setInputs(_inputs); op->setOutputs(_outputs); op_list.push_back(op); @@ -32,9 +46,9 @@ void Context::push(Operator *op, TList &_inputs, TList &_outputs) { } -void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { - TList inputs; - TList outputs; +void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { + TNameList inputs; + TNameList outputs; for(auto i:_inputs) { inputs.push_back(i); @@ -47,17 +61,17 @@ void Context::push(Operator *op, std::initializer_list _inputs, std::ini push(op, inputs, outputs); } -void Context::incrTListRef(const TList &t_list) { +void Context::incrTListRef(const S_TList &t_list) { for(auto t:t_list) { - Tensor* ptr = t.lock().get(); - if(rTable.find(ptr) == rTable.end()) { + TName t_name = t->getName(); + if(rTable.find(t_name) == rTable.end()) { ERR_EXIT("tensor not registered"); } - Ref_Record record = rTable[ptr]; + Ref_Record record = rTable[t_name]; if(record.allow_incr) { record.count++; - rTable[ptr] = record; + rTable[t_name] = record; } //if an initial ref value is supplied to the tensor at compile time @@ -79,34 +93,34 @@ void Context::deinitTensors(const S_TList &t_list) { } } -void Context::delTensor(Tensor* t) { - Ref_Record record = rTable[t]; +void Context::delTensor(TName t_name) { + Ref_Record record = rTable[t_name]; record.sptr.reset(); - rTable.erase(t); + rTable.erase(t_name); } void Context::dcrListRef(S_TList t_list) { for(auto t:t_list) { - if(dcrRef(t.get()) < 1) { - delTensor(t.get()); + if(dcrRef(t->getName()) < 1) { + delTensor(t->getName()); } } } -uint8_t Context::dcrRef(Tensor* t) { - if(!isTracked(t)) { +uint8_t Context::dcrRef(TName t_name) { + if(!isTracked(t_name)) { ERR_EXIT("Tensor not registered"); } - Ref_Record record = rTable[t]; + Ref_Record record = rTable[t_name]; if(record.count > 0) record.count -= 1; - rTable[t] = record; + rTable[t_name] = record; return record.count; } -bool Context::isTracked(Tensor* t) { - return (rTable.find(t) != rTable.end()); +bool Context::isTracked(TName t_name) { + return (rTable.find(t_name) != rTable.end()); } int Context::eval(void) { diff --git a/context.hpp b/context.hpp index ce715a9a..d80945d4 100644 --- a/context.hpp +++ b/context.hpp @@ -26,7 +26,7 @@ class Context : public uTensor { std::vector op_list; bool del_onsight; - std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? + std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); //void gc(void); //garbage collector, delete any tracked unreferenced tensor @@ -34,18 +34,19 @@ class Context : public uTensor { void initTensors(const S_TList &t_list); void deinitTensors(const S_TList &t_list); - void incrTListRef(const TList &t_list); + void incrTNameListRef(const TNameList &t_list); void dcrListRef(S_TList t_list); - void delTensor(Tensor* t); + void delTensor(TName t); //uint16_t incrRef(std::shared_ptr sptr); - uint8_t dcrRef(Tensor* t); - bool isTracked(Tensor* t); + uint8_t dcrRef(TName name); + bool isTracked(TName name); + bool isTracked(TName name); //uint16_t getRef(); public: TENSOR add(Tensor* t, uint8_t init_count = 0); - void push(Operator *op, TList &_inputs, TList &_outputs); - void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); + void push(Operator *op, TNameList &_inputs, TNameList &_outputs); + void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); int eval(void); Context() { diff --git a/tensor.hpp b/tensor.hpp index 91d99741..32c0298c 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -18,10 +18,10 @@ // }; class Tensor; - +typedef std::string TName; +typedef std::string OpName; +typedef std::vector TNameList; typedef std::shared_ptr S_TENSOR; -typedef std::weak_ptr TENSOR; -typedef std::vector TList; typedef std::vector S_TList; class uTensor { diff --git a/uTensorBase.cpp b/uTensorBase.cpp index 5f1e3b6e..57b0ec91 100644 --- a/uTensorBase.cpp +++ b/uTensorBase.cpp @@ -1,17 +1,14 @@ #include "uTensorBase.hpp" -void Operator::setInputs(TList &_inputs) { +void Operator::setInputs(S_TList &_inputs) { if(_inputs.size() != n_inputs) ERR_EXIT("Input Tensor list mismatched..."); - for(uint8_t i=0; i < _inputs.size(); i++) { - inputs.push_back(_inputs[i].lock()); - } + inputs = _inputs; } -void Operator::setOutputs(TList &_outputs) { +void Operator::setOutputs(S_TList &_outputs) { if(_outputs.size() != n_outputs) ERR_EXIT("Input Tensor list mismatched..."); - for(uint8_t i=0; i < _outputs.size(); i++) { - outputs.push_back(_outputs[i].lock()); - } + outputs = _outputs; + } diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 8616aad4..7886705d 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -18,8 +18,8 @@ class Operator : public uTensor { public: virtual void compute() = 0; - void setInputs(TList &_inputs); - void setOutputs(TList &_outputs); + void setInputs(S_TList &_inputs); + void setOutputs(S_TList &_outputs); S_TList getInputs(void) { return inputs; } S_TList getOutputs(void) { return outputs;} uint8_t getNumInputs(void) { return n_inputs; } From 7041775b0f009dc2697861dc2b9b17c6bc4ac4b1 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 18 Nov 2017 16:32:01 +0800 Subject: [PATCH 66/80] 1. implement lookup for reference count 2. define guard for deep_mnist_mlp for avoid preprocessing non include file 3. modify main function to focus on refactor point --- context.cpp | 13 ++++++------- context.hpp | 3 +-- deep_mnist_mlp.cpp | 2 ++ main.cpp | 12 ++++++------ tensorIdxImporterTests.hpp | 8 ++++---- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/context.cpp b/context.cpp index ab841940..9111d900 100644 --- a/context.cpp +++ b/context.cpp @@ -27,22 +27,22 @@ void Context::push(Operator *op, TNameList &in_names, TNameList &out_names) { //error checking in the Op class S_TList _inputs; for(auto in:in_names) { + if(rTable.find(in) == rTable.end()) { ERR_EXIT("Tensor \"%s\" not found", in.c_str()); } Ref_Record r = rTable[in]; - if(r == rTable.end()) { ERROR_EXIT("Tensor \"%s\" not found", in.c_str()); } _inputs.push_back(r.sptr); } S_TList _outputs; for(auto out:out_names) { + if(rTable.find(out) == rTable.end()) { ERR_EXIT("Tensor \"%s\" not found", out.c_str()); } Ref_Record r = rTable[out]; - if(r == rTable.end()) { ERROR_EXIT("Tensor \"%s\" not found", in.c_str()); } _outputs.push_back(r.sptr); } op->setInputs(_inputs); op->setOutputs(_outputs); op_list.push_back(op); - incrTListRef(_inputs); + incrTNameListRef(in_names); } @@ -61,9 +61,8 @@ void Context::push(Operator *op, std::initializer_list _inputs, std::init push(op, inputs, outputs); } -void Context::incrTListRef(const S_TList &t_list) { - for(auto t:t_list) { - TName t_name = t->getName(); +void Context::incrTNameListRef(const TNameList &t_list) { + for(auto t_name:t_list) { if(rTable.find(t_name) == rTable.end()) { ERR_EXIT("tensor not registered"); } @@ -146,4 +145,4 @@ int Context::eval(void) { op_list.clear(); return 0; -} \ No newline at end of file +} diff --git a/context.hpp b/context.hpp index d80945d4..2adbcb63 100644 --- a/context.hpp +++ b/context.hpp @@ -40,11 +40,10 @@ class Context : public uTensor { //uint16_t incrRef(std::shared_ptr sptr); uint8_t dcrRef(TName name); bool isTracked(TName name); - bool isTracked(TName name); //uint16_t getRef(); public: - TENSOR add(Tensor* t, uint8_t init_count = 0); + S_TENSOR add(Tensor* t, uint8_t init_count = 0); void push(Operator *op, TNameList &_inputs, TNameList &_outputs); void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); int eval(void); diff --git a/deep_mnist_mlp.cpp b/deep_mnist_mlp.cpp index 19f262e9..a57789bb 100644 --- a/deep_mnist_mlp.cpp +++ b/deep_mnist_mlp.cpp @@ -1,3 +1,4 @@ +#ifdef __DEEP_MNIST_MLP_HPP__ #include "deep_mnist_mlp.hpp" void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, @@ -199,3 +200,4 @@ int runMLP(string inputIdxFile) { return *(pred.lock()->read(0, 0)); // output layer } +#endif diff --git a/main.cpp b/main.cpp index fbc0a164..12541fe4 100644 --- a/main.cpp +++ b/main.cpp @@ -6,14 +6,14 @@ #include "tensor.hpp" #include "tensorIdxImporterTests.hpp" #include "context.hpp" -#include "ArrayTests.hpp" +/*#include "ArrayTests.hpp" #include "context_test.hpp" #include "MathTests.hpp" #include "tensor_test.hpp" #include "NnTests.hpp" #include "MatrixTests.hpp" #include "mlp_test.hpp" -#include "deep_mnist_mlp.hpp" +#include "deep_mnist_mlp.hpp"*/ Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -27,8 +27,8 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); - int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - printf("prediction: %d\r\n", prediction); +// int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); +// printf("prediction: %d\r\n", prediction); idxImporterTest idxTest; idxTest.runAll(); @@ -36,7 +36,7 @@ int main(int argc, char** argv) { printf("IDX import:\r\n"); idxTest.printSummary(); - ArrayOpsTest arrayTests; +/* ArrayOpsTest arrayTests; arrayTests.runAll(); printf("Array: \r\n"); arrayTests.printSummary(); @@ -82,7 +82,7 @@ int main(int argc, char** argv) { mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); - mlpt.printSummary(); + mlpt.printSummary();*/ diff --git a/tensorIdxImporterTests.hpp b/tensorIdxImporterTests.hpp index cf052073..833eb811 100644 --- a/tensorIdxImporterTests.hpp +++ b/tensorIdxImporterTests.hpp @@ -20,7 +20,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx"); + t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx", "uchar1"); timer_stop(); double result = sum(t); passed(result == 4518); @@ -32,7 +32,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx"); + t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx", "short1"); timer_stop(); double result = sum(t); passed(result == 270250); @@ -44,7 +44,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx"); + t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx", "int1"); timer_stop(); double result = sum(t); passed(result == 5748992600); @@ -56,7 +56,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx"); + t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx", "float1"); timer_stop(); double result = sum(t); From 1dba94be218dfe02c14152c08a4fb398b0e0ae79 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 18 Nov 2017 21:22:53 +0800 Subject: [PATCH 67/80] 1. make array pass test for name lookup optimization --- ArrayTests.hpp | 70 +++++++++++++++++--------------------------------- main.cpp | 8 +++--- 2 files changed, 28 insertions(+), 50 deletions(-) diff --git a/ArrayTests.hpp b/ArrayTests.hpp index c5cbcb18..36d40d79 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -15,38 +15,25 @@ class ArrayOpsTest : public Test { testStart("quantize_v2"); //reference inputs /Users/neitan01/Documents/mbed/uTensor.git/TESTS/scripts/PRE-GEN/qA - TENSOR b = ctx.add(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx")); - TENSOR b_min = ctx.add(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx")); - TENSOR b_max = ctx.add(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx")); + S_TENSOR b_q_ref = ctx.add(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx", "b_q_ref")); + S_TENSOR b_min_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx", "b_min_q_ref")); + S_TENSOR b_max_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx", "b_max_q_ref")); //reference outputs - TENSOR b_q_ref = ctx.add(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx")); - TENSOR b_min_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/out/qB_1.idx")); - TENSOR b_max_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/out/qb_2.idx")); - - TENSOR b_q = ctx.add(new RamTensor(b_q_ref.lock()->getShape())); - TENSOR b_min_q = ctx.add(new RamTensor(b_min_q_ref.lock()->getShape())); - TENSOR b_max_q = ctx.add(new RamTensor(b_max_q_ref.lock()->getShape())); - - TList inputs = {b, b_min, b_max}; - TList outputs = {b_q, b_min_q, b_max_q}; - S_TENSOR out_b_q = b_q.lock(); - S_TENSOR out_b_min_q = b_min_q.lock(); - S_TENSOR out_b_max_q = b_max_q.lock(); - S_TENSOR ref_b_q = b_q_ref.lock(); - S_TENSOR ref_b_min_q = b_min_q_ref.lock(); - S_TENSOR ref_b_max_q = b_max_q_ref.lock(); + S_TENSOR ref_b_q = ctx.add(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx", "ref_b_q")); + S_TENSOR ref_b_min_q = ctx.add(t_import.float_import("/fs/testData/qB/out/qB_1.idx", "ref_b_min_q")); + S_TENSOR ref_b_max_q = ctx.add(t_import.float_import("/fs/testData/qB/out/qb_2.idx", "ref_b_max_q")); + + S_TENSOR out_b_q = ctx.add(new RamTensor(b_q_ref->getShape(), "b_q")); + S_TENSOR out_b_min_q = ctx.add(new RamTensor(b_min_q_ref->getShape(), "b_min_q")); + S_TENSOR out_b_max_q = ctx.add(new RamTensor(b_max_q_ref->getShape(), "b_max_q")); //Implementation goes here timer_start(); - ctx.push(new QuantizeV2Op(), inputs, outputs); + ctx.push(new QuantizeV2Op(), {"b_q_ref", "b_min_q_ref", "b_max_q_ref"}, {"b_q", "b_min_q", "b_max_q"}); ctx.eval(); timer_stop(); - // printf("refMin is : %f \r\n", *(b_min_q_ref.getPointer({0}))); - // printf("outMin is : %f \r\n", *(b_min_q.getPointer({0}))); - // printf("diff : output(%f), outMin(%f), outMax(%f)\r\n", - // meanPercentErr(b_q_ref, b_q), meanPercentErr(b_min_q_ref, b_min_q), meanPercentErr(b_max_q_ref, b_max_q)); double result = meanPercentErr(ref_b_q.get(), out_b_q.get()) + meanPercentErr(ref_b_min_q.get(), out_b_min_q.get()) + meanPercentErr(ref_b_max_q.get(), out_b_max_q.get()); //passed(result < 0.0001); @@ -57,27 +44,22 @@ class ArrayOpsTest : public Test { testStart("dequantize"); //reference inputs - TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx")); - TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx")); - TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx")); + S_TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx", "a")); + S_TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx", "a_min")); + S_TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx", "a_max")); //reference outputs - TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx")); + S_TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx", "out_ref")); //modify the checks below: - TENSOR out = ctx.add(new RamTensor(out_ref.lock()->getShape())); - TList inputs = {a, a_min, a_max}; - TList outputs = {out}; - - S_TENSOR out_val = out.lock(); - S_TENSOR ref_out = out_ref.lock(); + S_TENSOR out = ctx.add(new RamTensor(out_ref->getShape(), "out")); timer_start(); - ctx.push(new DequantizeOp(), inputs, outputs); + ctx.push(new DequantizeOp(), {"a", "a_min", "a_max"}, {"out"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(out_val.get(), ref_out.get()); + double result = meanPercentErr(out.get(), out_ref.get()); //passed(result < 0.0001); passed(result == 0); } @@ -87,26 +69,22 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx")); - TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx")); + S_TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx", "ref_a")); + S_TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx", "ref_dim")); //reference outputs - TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx")); + S_TENSOR out_ref_2 = ctx.add(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx", "out_ref_2")); //modify the checks below: - TENSOR out = ctx.add(new RamTensor(out_ref.lock()->getShape())); - S_TENSOR out_val = out.lock(); - S_TENSOR ref_out = out_ref.lock(); + S_TENSOR out_2 = ctx.add(new RamTensor(out_ref_2->getShape(), "out_2")); - TList inputs = {ref_a, ref_dim}; - TList outputs = {out}; timer_start(); - ctx.push(new ReshapeOp(), inputs, outputs); + ctx.push(new ReshapeOp(), {"ref_a", "ref_dim"}, {"out_2"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(out_val.get(), ref_out.get()); + double result = meanPercentErr(out_2.get(), out_ref_2.get()); //passed(result < 0.0001); passed(result == 0); } diff --git a/main.cpp b/main.cpp index 12541fe4..793ad00d 100644 --- a/main.cpp +++ b/main.cpp @@ -6,8 +6,8 @@ #include "tensor.hpp" #include "tensorIdxImporterTests.hpp" #include "context.hpp" -/*#include "ArrayTests.hpp" -#include "context_test.hpp" +#include "ArrayTests.hpp" +/*#include "context_test.hpp" #include "MathTests.hpp" #include "tensor_test.hpp" #include "NnTests.hpp" @@ -36,12 +36,12 @@ int main(int argc, char** argv) { printf("IDX import:\r\n"); idxTest.printSummary(); -/* ArrayOpsTest arrayTests; + ArrayOpsTest arrayTests; arrayTests.runAll(); printf("Array: \r\n"); arrayTests.printSummary(); - printf("Math: \r\n"); +/* printf("Math: \r\n"); MathOpsTest mathTests; mathTests.runAll(); printf("Math result...\r\n"); From 1bf0026f012261be7c62776e2edbfd928a8caf56 Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 18 Nov 2017 21:28:38 +0800 Subject: [PATCH 68/80] 1. make nntest pass for name lookup optimization --- NnTests.hpp | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/NnTests.hpp b/NnTests.hpp index ad867a77..e77f603b 100644 --- a/NnTests.hpp +++ b/NnTests.hpp @@ -13,45 +13,35 @@ class NnOpsTest : public Test { void reluTest(void) { testStart("quantized_relu"); // reference inputs - TENSOR a = - ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx")); - TENSOR min = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx")); - TENSOR max = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx")); + S_TENSOR a = + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx", "a")); + S_TENSOR min = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx", "min")); + S_TENSOR max = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx", "max")); // reference outputs - TENSOR ref_out = - ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx")); - TENSOR ref_min = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx")); - TENSOR ref_max = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx")); + S_TENSOR ref_out = + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx", "ref_out")); + S_TENSOR ref_min = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx", "ref_min")); + S_TENSOR ref_max = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx", "ref_max")); // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TENSOR out_min = ctx.add(new RamTensor(ref_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(ref_max.lock()->getShape())); + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + S_TENSOR out_min = ctx.add(new RamTensor(ref_min->getShape(), "out_min")); + S_TENSOR out_max = ctx.add(new RamTensor(ref_max->getShape(), "out_max")); - //lock on to required output tensors - S_TENSOR ref_out_s = ref_out.lock(); - S_TENSOR ref_min_s = ref_min.lock(); - S_TENSOR ref_max_s = ref_max.lock(); - S_TENSOR out_s = out.lock(); - S_TENSOR out_min_s = out_min.lock(); - S_TENSOR out_max_s = out_max.lock(); - - TList inputs = {a, min, max}; - TList outputs = {out, out_min, out_max}; timer_start(); - ctx.push(new ReluOp(), inputs, outputs); + ctx.push(new ReluOp(), {"a", "min", "max"}, {"out", "out_min", "out_max"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_out_s.get(), out_s.get()) + - meanPercentErr(ref_min_s.get(), out_min_s.get()) + - meanPercentErr(ref_max_s.get(), out_max_s.get()); + double result = meanPercentErr(ref_out.get(), out.get()) + + meanPercentErr(ref_min.get(), out_min.get()) + + meanPercentErr(ref_max.get(), out_max.get()); // passed(result < 0.0001); passed(result == 0); } From cb3959771620f60d285e3e6f2db265973d3260eb Mon Sep 17 00:00:00 2001 From: kazami Date: Sat, 18 Nov 2017 22:01:33 +0800 Subject: [PATCH 69/80] 1. make tensor transform pass for name lookup --- main.cpp | 10 +++++----- tensor_test.hpp | 28 +++++++++++++++------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/main.cpp b/main.cpp index 793ad00d..1407863f 100644 --- a/main.cpp +++ b/main.cpp @@ -7,10 +7,10 @@ #include "tensorIdxImporterTests.hpp" #include "context.hpp" #include "ArrayTests.hpp" +#include "NnTests.hpp" +#include "tensor_test.hpp" /*#include "context_test.hpp" #include "MathTests.hpp" -#include "tensor_test.hpp" -#include "NnTests.hpp" #include "MatrixTests.hpp" #include "mlp_test.hpp" #include "deep_mnist_mlp.hpp"*/ @@ -56,7 +56,7 @@ int main(int argc, char** argv) { contextTest ctxTest; ctxTest.runAll(); printf("Context result...\r\n"); - ctxTest.printSummary(); + ctxTest.printSummary();*/ printf("NnOpS: \r\n"); NnOpsTest nnTest; @@ -64,7 +64,7 @@ int main(int argc, char** argv) { printf("Nn Ops result...\r\n"); nnTest.printSummary(); - printf("Transformation test: \r\n"); + printf("Transformation test: \r\n"); transTest tTest; tTest.runAll(); printf("Transformation result...\r\n"); @@ -78,7 +78,7 @@ int main(int argc, char** argv) { - printf("mlp test: \r\n"); +/* printf("mlp test: \r\n"); mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); diff --git a/tensor_test.hpp b/tensor_test.hpp index d5a07a72..8cab02b6 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -12,7 +12,7 @@ class tensorTest : public Test { public: void runResize() { testStart("tensortest"); - Tensor* a = new RamTensor({3, 2, 3}); + Tensor* a = new RamTensor({3, 2, 3}, "a"); std::vector v({1, 5, 8}); a->resize(v); bool res = testsize(1 * 5 * 8, a->getSize()); @@ -32,18 +32,20 @@ class transTest : public Test { bool res = false; testStart("transtest"); - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 9; i++) { std::default_random_engine gen; vector tmp({2, 3, 4, 5}); - TENSOR inputTensor = ctx.add(new RamTensor(tmp)); + std::string a_s = "input" + std::to_string(i); + S_TENSOR inputTensor = ctx.add(new RamTensor(tmp, a_s)); vector permute = {2, 3, 1, 0}; - vector g = inputTensor.lock()->getShape(); + vector g = inputTensor->getShape(); std::shuffle(permute.begin(), permute.end(), gen); - permuteIndexTransform trans(inputTensor.lock()->getShape(), permute); + permuteIndexTransform trans(inputTensor->getShape(), permute); - TENSOR output = ctx.add(new RamTensor(trans.getNewShape())); - vector s = output.lock()->getShape(); + std::string a_o = "output" + std::to_string(i); + S_TENSOR output = ctx.add(new RamTensor(trans.getNewShape(), a_o)); + vector s = output->getShape(); res = testshape(g, s, permute); if (!res) { break; @@ -59,10 +61,10 @@ class transTest : public Test { vector output_1({2, 2, 3, 5, 6, 6, 4, 5, 7, 5, 1, 9, 1, 3, 2, 2, 5, 3, 3, 6, 3, 4, 9, 2}); - TENSOR inputTensor = ctx.add(new RamTensor({2, 3, 4})); + S_TENSOR inputTensor2 = ctx.add(new RamTensor({2, 3, 4}, "inputTensor2")); vector permute = {0, 2, 1}; - permuteIndexTransform trans(inputTensor.lock()->getShape(), permute); + permuteIndexTransform trans(inputTensor2->getShape(), permute); size_t out_index = 0; bool res = false; @@ -85,9 +87,9 @@ class transTest : public Test { vector output_2({2, 1, 2, 3, 3, 2, 5, 2, 6, 5, 6, 3, 4, 3, 5, 6, 7, 3, 5, 4, 1, 9, 9, 2}); - TENSOR inputTensor2 = ctx.add(new RamTensor({2, 4, 3})); + S_TENSOR inputTensor3 = ctx.add(new RamTensor({2, 4, 3}, "inputTensor3")); vector permute2 = {1, 2, 0}; - permuteIndexTransform trans2(inputTensor2.lock()->getShape(), permute2); + permuteIndexTransform trans2(inputTensor3->getShape(), permute2); testStart("test vec 2 for transform"); for (uint32_t i = 0; i < input_2.size(); i++) { out_index = trans2[i]; @@ -105,9 +107,9 @@ class transTest : public Test { vector output_3({8, 2, 8, 1, 0, 3, 4, 6, 2, 6, 0, 6, 3, 9, 2, 7, 0, 7, 0, 4, 8, 9, 0, 4, 3, 6, 8}); - TENSOR inputTensor3 = ctx.add(new RamTensor({1, 3, 3, 3})); + S_TENSOR inputTensor4 = ctx.add(new RamTensor({1, 3, 3, 3}, "inputTensor4")); vector permute3 = {0, 3, 2, 1}; - permuteIndexTransform trans3(inputTensor3.lock()->getShape(), permute3); + permuteIndexTransform trans3(inputTensor4->getShape(), permute3); testStart("test vec 4d for transform"); for (uint32_t i = 0; i < input_3.size(); i++) { out_index = trans3[i]; From 3d310f393541069cbcb7c031c085b6cb41c38ae5 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sat, 18 Nov 2017 22:19:28 +0800 Subject: [PATCH 70/80] porting MathTests.hpp; added ctx.get() and ctx.gc(); WIP --- MathTests.hpp | 464 ++++++++++++++++++++++++-------------------------- context.cpp | 25 ++- context.hpp | 3 + main.cpp | 21 +-- 4 files changed, 261 insertions(+), 252 deletions(-) diff --git a/MathTests.hpp b/MathTests.hpp index f4dcf057..d772f9c9 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -13,40 +13,35 @@ class MathOpsTest : public Test { void requantization_rangeTest(void) { testStart("requantization_range"); + ctx.gc(); + + //Note: raw pointers should be owned ONLY by the context. no copy of the raw pointer should exist elsewhere // reference inputs - TENSOR a = - ctx.add(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx")); + ctx.add(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx", "a_max")); - // reference outputs - TENSOR ref_min = - ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx")); - TENSOR ref_max = - ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx")); + // reference output + ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx", "ref_min")); + ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx", "ref_max")); // Implementation goes here // modify the checks below: - TENSOR out_min = ctx.add(new RamTensor(ref_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(ref_max.lock()->getShape())); - TList inputs = {a, a_min, a_max}; - TList outputs = {out_min, out_max}; - - S_TENSOR ref_min_val = ref_min.lock(); - S_TENSOR ref_max_val = ref_max.lock(); - S_TENSOR out_min_val = out_min.lock(); - S_TENSOR out_max_val = out_max.lock(); + ctx.add(new RamTensor(ctx.get("ref_min")->getShape(), "out_min")); + ctx.add(new RamTensor(ctx.get("ref_max")->getShape(), "out_max")); + TNameList inputs = {"a", "a_min", "a_max"}; + TNameList outputs = {"out_min", "out_max"}; timer_start(); ctx.push(new Requantization_RangeOp(), inputs, outputs); ctx.eval(); timer_stop(); + //Note: an output tensor will not be auto-deleted by context unless it has been used as an input double result = - meanPercentErr(ref_min_val.get(), out_min_val.get()) + meanPercentErr(ref_max_val.get(), out_max_val.get()); + meanPercentErr(ctx.get("ref_min").get(), ctx.get("out_min").get()) + + meanPercentErr(ctx.get("ref_max").get(), ctx.get("out_max").get()); // passed(result < 0.0001); passed(result == 0); } @@ -54,49 +49,42 @@ class MathOpsTest : public Test { void requantizeTest(void) { testStart("requantize"); + ctx.gc(); + // reference inputs - TENSOR a = ctx.add(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx")); - TENSOR r_a_min = - ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx")); - TENSOR r_a_max = - ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx")); + ctx.add(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx", "a_max")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx", "r_a_min")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx", "r_a_max")); // tf.quint8 + //Note: + //Instead of using ctx.get() to obtain a shared_ptr, you may also use the shared_ptr returned by ctx.add() + // reference outputs - TENSOR ref_a_q = - ctx.add(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx")); - TENSOR ref_a_min = - ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx")); - TENSOR ref_a_max = - ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx")); + S_TENSOR ref_a_q = ctx.add(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx", "ref_a_q")); + S_TENSOR ref_a_min = ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx", "ref_a_min")); + S_TENSOR ref_a_max = ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx", "ref_a_max")); // modify the checks below: - TENSOR a_q = ctx.add(new RamTensor(ref_a_q.lock()->getShape())); - TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min.lock()->getShape())); - TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max.lock()->getShape())); - - TList inputs = {a, a_min, a_max, r_a_min, r_a_max}; - TList outputs = {a_q, a_min_q, a_max_q}; - - S_TENSOR ref_a = ref_a_q.lock(); - S_TENSOR out_a = a_q.lock(); - S_TENSOR ref_min = ref_a_min.lock(); - S_TENSOR out_min = a_min_q.lock(); - S_TENSOR ref_max = ref_a_max.lock(); - S_TENSOR out_max = a_max_q.lock(); + S_TENSOR a_q = ctx.add(new RamTensor(ref_a_q->getShape(), "a_q")); + S_TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min->getShape(), "a_min_q")); + S_TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max->getShape(), "a_max_q")); + + + TNameList inputs = {"a", "a_min", "a_max", "r_a_min", "r_a_max"}; + TNameList outputs = {"a_q", "a_min_q", "a_max_q"}; + // Implementation goes here timer_start(); ctx.push(new RequantizeOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_a.get(), out_a.get()) + - meanPercentErr(ref_min.get(), out_min.get()) + - meanPercentErr(ref_max.get(), out_max.get()); + double result = meanPercentErr(ref_a_q.get(), a_q.get()) + + meanPercentErr(ref_a_min.get(), a_min_q.get()) + + meanPercentErr(ref_a_max.get(), a_max_q.get()); // passed(result < 0.0001); passed(result == 0); } @@ -104,48 +92,42 @@ class MathOpsTest : public Test { void requantizeTest2(void) { testStart("requantize2"); + ctx.gc(); + // reference inputs - TENSOR a = ctx.add(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx")); - TENSOR r_a_min = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx")); - TENSOR r_a_max = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx")); + ctx.add(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx", "a_max")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx", "r_a_min")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx", "r_a_max")); // tf.quint8 // reference outputs - TENSOR ref_a_q = - ctx.add(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx")); - TENSOR ref_a_min = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx")); - TENSOR ref_a_max = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx")); + ctx.add(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx", "ref_a_q")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx", "ref_a_min")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx", "ref_a_max")); // modify the checks below: - TENSOR a_q = ctx.add(new RamTensor(ref_a_q.lock()->getShape())); - TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min.lock()->getShape())); - TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max.lock()->getShape())); - TList inputs = {a, a_min, a_max, r_a_min, r_a_max}; - TList outputs = {a_q, a_min_q, a_max_q}; - S_TENSOR ref_val = ref_a_q.lock(); - S_TENSOR ref_min = ref_a_min.lock(); - S_TENSOR ref_max = ref_a_max.lock(); - S_TENSOR out_val = a_q.lock(); - S_TENSOR out_min = a_min_q.lock(); - S_TENSOR out_max = a_max_q.lock(); + ctx.add(new RamTensor(ctx.get("ref_a_q")->getShape(), "a_q")); + ctx.add(new RamTensor(ctx.get("ref_a_min")->getShape(), "a_min_q")); + ctx.add(new RamTensor(ctx.get("ref_a_max")->getShape(), "a_max_q")); + + S_TENSOR ref_val = ctx.get("ref_a_q"); + S_TENSOR ref_min = ctx.get("ref_a_min"); + S_TENSOR ref_max = ctx.get("ref_a_max"); + S_TENSOR out_val = ctx.get("a_q"); + S_TENSOR out_min = ctx.get("a_min_q"); + S_TENSOR out_max = ctx.get("a_max_q"); // Implementation goes here timer_start(); - ctx.push(new RequantizeOp(), inputs, outputs); + ctx.push(new RequantizeOp(), {"a", "a_min", "a_max", "r_a_min", "r_a_max"}, {"a_q", "a_min_q", "a_max_q"}); ctx.eval(); timer_stop(); double result; - if((result = meanPercentErr(ref_val.get(), out_val.get())) != 0) { + if((result = meanPercentErr(ctx.get("ref_a_q").get(), out_val.get())) != 0) { printf("Requantize a_q failed (%.6f)\r\n", result); unsigned char* ref_ptr = ref_val.get()->write(0, 0); unsigned char* test_ptr = out_val.get()->write(0, 0); @@ -170,178 +152,178 @@ class MathOpsTest : public Test { passed(result == 0); } - void argmaxTest(void) { // NT: WIP do not use t_import int 64 here - testStart("argmax"); +// void argmaxTest(void) { // NT: WIP do not use t_import int 64 here +// testStart("argmax"); - // reference inputs - TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx")); - TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx")); +// // reference inputs +// TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx")); +// TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx")); - // reference outputs - /// NT: FIXME: argmax outputs int64 tensor which isn't supported by - /// int_import. - TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx")); +// // reference outputs +// /// NT: FIXME: argmax outputs int64 tensor which isn't supported by +// /// int_import. +// TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx")); - // Implementation goes here +// // Implementation goes here - // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); +// // modify the checks below: +// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TList inputs = {ref_a, ref_dim}; - TList outputs = {out}; - - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR out_val = out.lock(); - timer_start(); - ctx.push(new ArgMaxOp(), inputs, outputs); - ctx.eval(); - timer_stop(); +// TList inputs = {ref_a, ref_dim}; +// TList outputs = {out}; + +// S_TENSOR ref_val = ref_out.lock(); +// S_TENSOR out_val = out.lock(); +// timer_start(); +// ctx.push(new ArgMaxOp(), inputs, outputs); +// ctx.eval(); +// timer_stop(); - Tensor* out_float = TensorCast(out_val.get()); - - double result = meanPercentErr(ref_val.get(), out_float); - - // passed(result < 0.0001); - passed(result == 0); - } - - void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here - testStart("argmax2"); - TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f)); - *(test_input.lock()->write(25, 0)) = 1.0f; - *(test_input.lock()->write(26, 0)) = 1.0f; - *(test_input.lock()->write(7, 0)) = 1.0f; - *(test_input.lock()->write(48, 0)) = 1.0f; - *(test_input.lock()->write(14, 0)) = 1.0f; - - TENSOR test_dim = ctx.add(new RamTensor({1})); - *(test_dim.lock()->write(0, 0)) = 0; - - TENSOR test_out_ref = ctx.add(new RamTensor({5})); - *(test_out_ref.lock()->write(0, 0)) = 5.0f; - *(test_out_ref.lock()->write(1, 0)) = 5.0f; - *(test_out_ref.lock()->write(2, 0)) = 1.0f; - *(test_out_ref.lock()->write(3, 0)) = 9.0f; - *(test_out_ref.lock()->write(4, 0)) = 2.0f; - - TENSOR test_out = ctx.add(new RamTensor(test_out_ref.lock()->getShape())); - TList inputs = {test_input, test_dim}; - TList outputs = {test_out}; - S_TENSOR ref_val = test_out_ref.lock(); - S_TENSOR out_val = test_out.lock(); - - timer_start(); - ctx.push(new ArgMaxOp(), inputs, outputs); - ctx.eval(); - timer_stop(); - - double result = meanPercentErr(ref_val.get(), out_val.get()); - passed(result < 0.0001); - //passed(result == 0); - } - - void addTest(void) { - testStart("add"); - - // reference inputs - TENSOR a = - ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx")); - TENSOR b = - ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx")); - - // reference outputs - TENSOR ref_out = - ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx")); - - // Implementation goes here - - // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - S_TENSOR out_vxx = out.lock(); - S_TENSOR ref_vxx = ref_out.lock(); - TList inputs = {a, b}; - TList outputs = {out}; - timer_start(); - ctx.push(new AddOp(), inputs, outputs); - ctx.eval(); - timer_stop(); - - double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); - passed(result < 0.0001); - //passed(result == 0); - } - - void minTest(void) { - testStart("min"); - - // reference inputs - TENSOR a = - ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx")); - TENSOR dim = - ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx")); - - // reference outputs - TENSOR ref_out = - ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx")); - - // Implementation goes here - - // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TList inputs = {a, dim}; - TList outputs = {out}; - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR out_val = out.lock(); - timer_start(); - ctx.push(new MinOp(), inputs, outputs); - ctx.eval(); - timer_stop(); - - double result = meanPercentErr(ref_val.get(), out_val.get()); - // passed(result < 0.0001); - passed(result == 0); - } - - void maxTest(void) { - testStart("max"); - - // reference inputs - TENSOR a = - ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx")); - TENSOR dim = - ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx")); - - // reference outputs - TENSOR ref_out = - ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx")); - - // Implementation goes here - - // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TList inputs = {a, dim}; - TList outputs = {out}; - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR out_val = out.lock(); - timer_start(); - ctx.push(new MaxOp(), inputs, outputs); - ctx.eval(); - timer_stop(); - - double result = meanPercentErr(ref_val.get(), out_val.get()); - // passed(result < 0.0001); - passed(result == 0); - } +// Tensor* out_float = TensorCast(out_val.get()); + +// double result = meanPercentErr(ref_val.get(), out_float); + +// // passed(result < 0.0001); +// passed(result == 0); +// } + +// void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here +// testStart("argmax2"); +// TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f)); +// *(test_input.lock()->write(25, 0)) = 1.0f; +// *(test_input.lock()->write(26, 0)) = 1.0f; +// *(test_input.lock()->write(7, 0)) = 1.0f; +// *(test_input.lock()->write(48, 0)) = 1.0f; +// *(test_input.lock()->write(14, 0)) = 1.0f; + +// TENSOR test_dim = ctx.add(new RamTensor({1})); +// *(test_dim.lock()->write(0, 0)) = 0; + +// TENSOR test_out_ref = ctx.add(new RamTensor({5})); +// *(test_out_ref.lock()->write(0, 0)) = 5.0f; +// *(test_out_ref.lock()->write(1, 0)) = 5.0f; +// *(test_out_ref.lock()->write(2, 0)) = 1.0f; +// *(test_out_ref.lock()->write(3, 0)) = 9.0f; +// *(test_out_ref.lock()->write(4, 0)) = 2.0f; + +// TENSOR test_out = ctx.add(new RamTensor(test_out_ref.lock()->getShape())); +// TList inputs = {test_input, test_dim}; +// TList outputs = {test_out}; +// S_TENSOR ref_val = test_out_ref.lock(); +// S_TENSOR out_val = test_out.lock(); + +// timer_start(); +// ctx.push(new ArgMaxOp(), inputs, outputs); +// ctx.eval(); +// timer_stop(); + +// double result = meanPercentErr(ref_val.get(), out_val.get()); +// passed(result < 0.0001); +// //passed(result == 0); +// } + +// void addTest(void) { +// testStart("add"); + +// // reference inputs +// TENSOR a = +// ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx")); +// TENSOR b = +// ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx")); + +// // reference outputs +// TENSOR ref_out = +// ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx")); + +// // Implementation goes here + +// // modify the checks below: +// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); +// S_TENSOR out_vxx = out.lock(); +// S_TENSOR ref_vxx = ref_out.lock(); +// TList inputs = {a, b}; +// TList outputs = {out}; +// timer_start(); +// ctx.push(new AddOp(), inputs, outputs); +// ctx.eval(); +// timer_stop(); + +// double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); +// passed(result < 0.0001); +// //passed(result == 0); +// } + +// void minTest(void) { +// testStart("min"); + +// // reference inputs +// TENSOR a = +// ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx")); +// TENSOR dim = +// ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx")); + +// // reference outputs +// TENSOR ref_out = +// ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx")); + +// // Implementation goes here + +// // modify the checks below: +// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); +// TList inputs = {a, dim}; +// TList outputs = {out}; +// S_TENSOR ref_val = ref_out.lock(); +// S_TENSOR out_val = out.lock(); +// timer_start(); +// ctx.push(new MinOp(), inputs, outputs); +// ctx.eval(); +// timer_stop(); + +// double result = meanPercentErr(ref_val.get(), out_val.get()); +// // passed(result < 0.0001); +// passed(result == 0); +// } + +// void maxTest(void) { +// testStart("max"); + +// // reference inputs +// TENSOR a = +// ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx")); +// TENSOR dim = +// ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx")); + +// // reference outputs +// TENSOR ref_out = +// ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx")); + +// // Implementation goes here + +// // modify the checks below: +// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); +// TList inputs = {a, dim}; +// TList outputs = {out}; +// S_TENSOR ref_val = ref_out.lock(); +// S_TENSOR out_val = out.lock(); +// timer_start(); +// ctx.push(new MaxOp(), inputs, outputs); +// ctx.eval(); +// timer_stop(); + +// double result = meanPercentErr(ref_val.get(), out_val.get()); +// // passed(result < 0.0001); +// passed(result == 0); +// } void runAll(void) { - argmaxTest(); - argmaxTest2(); + //argmaxTest(); + // argmaxTest2(); requantization_rangeTest(); requantizeTest(); requantizeTest2(); - addTest(); - minTest(); - maxTest(); + // addTest(); + // minTest(); + // maxTest(); } }; diff --git a/context.cpp b/context.cpp index 9111d900..ddaebbca 100644 --- a/context.cpp +++ b/context.cpp @@ -3,7 +3,7 @@ S_TENSOR Context::add(Tensor* t, uint8_t init_count) { if(t == nullptr) { ERR_EXIT("null pointer tensor"); } if(rTable.find(t->getName()) != rTable.end()) { - ERR_EXIT("tensor pointer address already exist in rTable"); + ERR_EXIT("tensor with name \"%s\" address already exist in rTable", t->getName().c_str()); } S_TENSOR _sptr(t); @@ -22,6 +22,11 @@ S_TENSOR Context::add(Tensor* t, uint8_t init_count) { return _sptr; } +S_TENSOR Context::get(TName const &t_name) { + if(rTable.find(t_name) == rTable.end()) ERR_EXIT("No tensor with name: %s", t_name.c_str()); + return rTable[t_name].sptr; +} + void Context::push(Operator *op, TNameList &in_names, TNameList &out_names) { //error checking in the Op class @@ -146,3 +151,21 @@ int Context::eval(void) { return 0; } + +uint32_t Context::gc(void) { + TNameList nlist; + + for ( auto it : rTable) { + Ref_Record r = it.second; + if(r.count < 1) { + nlist.push_back(it.first); + } + } + + for(auto name:nlist) { + delTensor(name); + } + + return (uint32_t) nlist.size(); +} + diff --git a/context.hpp b/context.hpp index 2adbcb63..15076833 100644 --- a/context.hpp +++ b/context.hpp @@ -40,12 +40,15 @@ class Context : public uTensor { //uint16_t incrRef(std::shared_ptr sptr); uint8_t dcrRef(TName name); bool isTracked(TName name); + //bool isTracked(Tensor* t); //uint16_t getRef(); public: S_TENSOR add(Tensor* t, uint8_t init_count = 0); + S_TENSOR get(TName const &t_name); void push(Operator *op, TNameList &_inputs, TNameList &_outputs); void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); + uint32_t gc(void); int eval(void); Context() { diff --git a/main.cpp b/main.cpp index 12541fe4..6567db4c 100644 --- a/main.cpp +++ b/main.cpp @@ -6,9 +6,9 @@ #include "tensor.hpp" #include "tensorIdxImporterTests.hpp" #include "context.hpp" +#include "MathTests.hpp" /*#include "ArrayTests.hpp" #include "context_test.hpp" -#include "MathTests.hpp" #include "tensor_test.hpp" #include "NnTests.hpp" #include "MatrixTests.hpp" @@ -29,23 +29,24 @@ int main(int argc, char** argv) { // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); // printf("prediction: %d\r\n", prediction); - idxImporterTest idxTest; - idxTest.runAll(); + idxImporterTest idxTest; + idxTest.runAll(); + + printf("IDX import:\r\n"); + idxTest.printSummary(); - printf("IDX import:\r\n"); - idxTest.printSummary(); + printf("Math: \r\n"); + MathOpsTest mathTests; + mathTests.runAll(); + printf("Math result...\r\n"); + mathTests.printSummary(); /* ArrayOpsTest arrayTests; arrayTests.runAll(); printf("Array: \r\n"); arrayTests.printSummary(); - printf("Math: \r\n"); - MathOpsTest mathTests; - mathTests.runAll(); - printf("Math result...\r\n"); - mathTests.printSummary(); printf("running matrix test ...\r\n"); matrixOpsTest matrixTests; From e12a69aecca9c9c794ce9ff6bd3e329b772031c3 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 19 Nov 2017 13:59:45 +0800 Subject: [PATCH 71/80] 1. pass mlp test for name lookup --- main.cpp | 6 +- mlp_test.hpp | 324 ++++++++++++++++++++++----------------------------- 2 files changed, 142 insertions(+), 188 deletions(-) diff --git a/main.cpp b/main.cpp index 1407863f..d35ffb0f 100644 --- a/main.cpp +++ b/main.cpp @@ -9,10 +9,10 @@ #include "ArrayTests.hpp" #include "NnTests.hpp" #include "tensor_test.hpp" +#include "mlp_test.hpp" /*#include "context_test.hpp" #include "MathTests.hpp" #include "MatrixTests.hpp" -#include "mlp_test.hpp" #include "deep_mnist_mlp.hpp"*/ Serial pc(USBTX, USBRX, 115200); @@ -78,11 +78,11 @@ int main(int argc, char** argv) { -/* printf("mlp test: \r\n"); + printf("mlp test: \r\n"); mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); - mlpt.printSummary();*/ + mlpt.printSummary(); diff --git a/mlp_test.hpp b/mlp_test.hpp index be390b9b..4c1fe189 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -21,54 +21,47 @@ class mlpTest : public Test { //reshape //input - TENSOR mnist_input = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx")); - TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx")); + S_TENSOR mnist_input = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx", "mnist_input")); + S_TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx", "reshape_dim")); //output - TENSOR reshape_out = ctx.add(new RamTensor()); + S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); // S_TENSOR out_reshape_out = reshape_out.lock(); - ctx.push(new ReshapeOp(), {mnist_input, reshape_dim}, {reshape_out}); + ctx.push(new ReshapeOp(), {"mnist_input", "reshape_dim"}, {"reshape_out"}); //min //input - TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx")); + S_TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx", "min_reduce_dim")); //output - TENSOR min_out = ctx.add(new RamTensor({1})); + S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); // S_TENSOR out_min_out = min_out.lock(); - ctx.push(new MinOp(), {reshape_out, min_reduce_dim}, {min_out}); + ctx.push(new MinOp(), {"reshape_out", "min_reduce_dim"}, {"min_out"}); //max //input - TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx")); + S_TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx", "max_reduce_dim")); //output - TENSOR max_out = ctx.add(new RamTensor({1})); + S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); // S_TENSOR out_max_out = max_out.lock(); - ctx.push(new MaxOp(), {reshape_out, max_reduce_dim}, {max_out}); + ctx.push(new MaxOp(), {"reshape_out", "max_reduce_dim"}, {"max_out"}); //quantization //output - TENSOR qnt_out = ctx.add(new RamTensor()); - TENSOR qnt_min = ctx.add(new RamTensor({1})); - TENSOR qnt_max = ctx.add(new RamTensor({1})); - - S_TENSOR out_qnt = qnt_out.lock(); - S_TENSOR out_min = qnt_min.lock(); - S_TENSOR out_max = qnt_max.lock(); - - TENSOR qnt_ref = ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx")); - TENSOR qnt_min_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx")); - TENSOR qnt_max_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx")); - S_TENSOR ref_qnt = qnt_ref.lock(); - S_TENSOR ref_max = qnt_max_ref.lock(); - S_TENSOR ref_min = qnt_min_ref.lock(); - - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {qnt_out, qnt_min, qnt_max}); + S_TENSOR qnt_out = ctx.add(new RamTensor("qnt_out")); + S_TENSOR qnt_min = ctx.add(new RamTensor({1}, "qnt_min")); + S_TENSOR qnt_max = ctx.add(new RamTensor({1}, "qnt_max")); + + S_TENSOR qnt_ref = ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx", "qnt_ref")); + S_TENSOR qnt_min_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx", "qnt_min_ref")); + S_TENSOR qnt_max_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx", "qnt_max_ref")); + + ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {"qnt_out", "qnt_min", "qnt_max"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_qnt.get(), out_qnt.get()); - result += meanPercentErr(ref_min.get(), out_min.get()); - result += meanPercentErr(ref_max.get(), out_max.get()); + double result = meanPercentErr(qnt_ref.get(), qnt_out.get()); + result += meanPercentErr(qnt_min_ref.get(), qnt_min.get()); + result += meanPercentErr(qnt_max_ref.get(), qnt_max.get()); passed(result == 0); } @@ -82,152 +75,120 @@ class mlpTest : public Test { //quantized matrix multiplication //input - TENSOR x = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx")); - TENSOR x_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx")); - TENSOR x_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx")); - TENSOR w = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx")); - TENSOR w_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx")); - TENSOR w_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx")); + S_TENSOR x = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx", "x")); + S_TENSOR x_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx", "x_min")); + S_TENSOR x_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx", "x_max")); + S_TENSOR w = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx", "w")); + S_TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx", "w_min")); + S_TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx", "w_max")); DEBUG("all QuantizedMatMul input imported...\r\n"); //output - uint32_t out_col = (x.lock()->getShape())[0]; - uint32_t out_row = (w.lock()->getShape())[1]; - TENSOR out_c = ctx.add(new RamTensor({out_col, out_row})); + uint32_t out_col = (x->getShape())[0]; + uint32_t out_row = (w->getShape())[1]; + S_TENSOR out_c = ctx.add(new RamTensor({out_col, out_row}, "out_c")); // printf("x[0] = %d, x[1] = %d, b[0] = %d, b[1] = %d\r\n", (x.getShape())[0], (x.getShape())[1], // (w.getShape())[0], (w.getShape())[1]); // printf("c[0] = %d, c[1] = %d\r\n", (out_c.getShape())[0], (out_c.getShape())[1]); // fflush(stdout); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); + S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); - TList inputs = {x, x_min, x_max, w, w_min, w_max}; - TList outputs = {out_c, matmul_out_min, matmul_out_max}; - S_TENSOR out_val = out_c.lock(); - S_TENSOR out_min = matmul_out_min.lock(); - S_TENSOR out_max = matmul_out_max.lock(); - ctx.push(new QntMatMulOp(), inputs, outputs); + ctx.push(new QntMatMulOp(), {"x", "x_min", "x_max", "w", "w_min", "w_max"}, {"out_c", "matmul_out_min", "matmul_out_max"}); //clean up - TENSOR ref_out_c = - ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_0.idx")); - TENSOR ref_matmul_out_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_1.idx")); - TENSOR ref_matmul_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_2.idx")); - - S_TENSOR ref_val = ref_out_c.lock(); - S_TENSOR ref_min = ref_matmul_out_min.lock(); - S_TENSOR ref_max = ref_matmul_out_max.lock(); - /* double temp_result = (meanPercentErr(ref_val.get(), out_val.get()) + meanPercentErr(ref_min.get(), out_min.get()) + meanPercentErr(ref_max.get(), out_max.get())); - if(temp_result > 0) { - DEBUG("matrix mul failed\r\n"); - failed(); - return; - } else { - DEBUG("matrix mul passed\r\n"); - } -*/ + // double temp_result = (meanPercentErr(ref_val.get(), out_val.get()) + meanPercentErr(ref_min.get(), out_min.get()) + meanPercentErr(ref_max.get(), out_max.get())); + //if(temp_result > 0) { + // DEBUG("matrix mul failed\r\n"); + // failed(); + // return; + // } else { + // DEBUG("matrix mul passed\r\n"); + // } + DEBUG("QuantizedMatMul completed!\r\n"); //output - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - S_TENSOR out_req_min = req_out_min.lock(); - S_TENSOR out_req_max = req_out_max.lock(); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); - - TENSOR ref_req_out_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_0.idx")); - TENSOR ref_req_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_1.idx")); - S_TENSOR ref_req_min = ref_req_out_min.lock(); - S_TENSOR ref_req_max = ref_req_out_max.lock(); -/* - temp_result = (meanPercentErr(ref_req_min.get(), out_req_min.get()) + meanPercentErr(ref_req_max.get(), out_req_max.get())); - if(temp_result > 0) { - DEBUG("Requantization_Range failed\r\n"); - failed(); - return; - } else { - DEBUG("Requantization_Range passed\r\n"); - } - - DEBUG("Requantization_Range completed!\r\n");*/ + S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); + ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); + + +// temp_result = (meanPercentErr(ref_req_min.get(), out_req_min.get()) + meanPercentErr(ref_req_max.get(), out_req_max.get())); +// if(temp_result > 0) { +// DEBUG("Requantization_Range failed\r\n"); +// failed(); +// return; +// } else { +// DEBUG("Requantization_Range passed\r\n"); +// } + +// DEBUG("Requantization_Range completed!\r\n"); //output - TENSOR reqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - S_TENSOR out_reqnt = reqnt_out.lock(); - S_TENSOR out_reqnt_min = reqnt_out_min.lock(); - S_TENSOR out_reqnt_max = reqnt_out_max.lock(); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); + S_TENSOR reqnt_out = ctx.add(new RamTensor(out_c->getShape(), "reqnt_out")); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); + ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); //clean up - TENSOR ref_reqnt_out = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx")); - TENSOR ref_reqnt_out_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx")); - TENSOR ref_reqnt_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx")); - S_TENSOR ref_reqnt = ref_reqnt_out.lock(); - S_TENSOR ref_reqnt_min = ref_reqnt_out_min.lock(); - S_TENSOR ref_reqnt_max = ref_reqnt_out_max.lock(); -/* - temp_result = (meanPercentErr(ref_reqnt.get(), out_reqnt.get()) + meanPercentErr(ref_reqnt_min.get(), out_reqnt_min.get()) + meanPercentErr(ref_reqnt_max.get(), out_reqnt_max.get())); - if(temp_result > 0) { - DEBUG("Requantize failed\r\n"); - failed(); - return; - } else { - DEBUG("Requantize passed\r\n"); - } - - DEBUG("Requantize completed!\r\n");*/ + S_TENSOR ref_reqnt_out = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx", "ref_reqnt_out")); + S_TENSOR ref_reqnt_out_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx", "ref_reqnt_out_min")); + S_TENSOR ref_reqnt_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx", "ref_reqnt_out_max")); + +// temp_result = (meanPercentErr(ref_reqnt.get(), out_reqnt.get()) + meanPercentErr(ref_reqnt_min.get(), out_reqnt_min.get()) + meanPercentErr(ref_reqnt_max.get(), out_reqnt_max.get())); +// if(temp_result > 0) { +// DEBUG("Requantize failed\r\n"); +// failed(); +// return; +// } else { +// DEBUG("Requantize passed\r\n"); +// } + + DEBUG("Requantize completed!\r\n"); //output - TENSOR deqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); - S_TENSOR out_deqnt = deqnt_out.lock(); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); - - TENSOR ref_deqnt_out = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx")); - S_TENSOR ref_deqnt = ref_deqnt_out.lock(); - /*double temp = meanPercentErr(ref_deqnt.get(), out_deqnt.get()); - if(temp > 0.0001) { - printf("dequantize failed (%.6f)\r\n", temp); - const float* ref_ptr = ref_deqnt.get()->read(0, 0); - const float* test_ptr = out_deqnt.get()->read(0, 0); - for(uint32_t i; i < ref_deqnt->getSize(); i++) { - if(ref_ptr[i] != test_ptr[i]) { - DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); - } else { - DEBUG("%d: %.3f == %.3f\r\n", i, ref_ptr[i], test_ptr[i]); - } - } - failed(); - return; - } else { - DEBUG("dequantize passed\r\n"); - }*/ + S_TENSOR deqnt_out = ctx.add(new RamTensor(out_c->getShape(), "deqnt_out")); + ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); + + S_TENSOR ref_deqnt_out = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx", "ref_deqnt_out")); + //double temp = meanPercentErr(ref_deqnt.get(), out_deqnt.get()); + //if(temp > 0.0001) { + // printf("dequantize failed (%.6f)\r\n", temp); + // const float* ref_ptr = ref_deqnt.get()->read(0, 0); + // const float* test_ptr = out_deqnt.get()->read(0, 0); + // for(uint32_t i; i < ref_deqnt->getSize(); i++) { + // if(ref_ptr[i] != test_ptr[i]) { + // DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); + // } else { + // DEBUG("%d: %.3f == %.3f\r\n", i, ref_ptr[i], test_ptr[i]); + // } + // } + // failed(); + // return; + // } else { + // DEBUG("dequantize passed\r\n"); + // } DEBUG("dequantize completed!\r\n"); //input - TENSOR bias = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx")); + S_TENSOR bias = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx", "bias")); //output - TENSOR output_z = ctx.add(new RamTensor(deqnt_out.lock()->getShape())); - S_TENSOR out_z = output_z.lock(); - ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); + S_TENSOR output_z = ctx.add(new RamTensor(deqnt_out->getShape(), "output_z")); + ctx.push(new AddOp(), {"deqnt_out", "bias"}, {"output_z"}); ctx.eval(); DEBUG("Add completed!\r\n"); @@ -235,10 +196,9 @@ class mlpTest : public Test { timer_stop(); //load reference - TENSOR ref_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx")); - S_TENSOR ref_z_v = ref_z.lock(); + S_TENSOR ref_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx", "ref_z")); - double result = meanPercentErr(ref_z_v.get(), out_z.get()); + double result = meanPercentErr(ref_z.get(), output_z.get()); passed(result < 0.0001); @@ -248,59 +208,53 @@ class mlpTest : public Test { testStart("runQntRelu"); - TENSOR input_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx")); - TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx")); - TENSOR reshape_out = ctx.add(new RamTensor()); + S_TENSOR input_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx", "input_z1")); + S_TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx", "reshape_dim1")); + S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out1")); timer_start(); - ctx.push(new ReshapeOp(), {input_z, reshape_dim}, {reshape_out}); + ctx.push(new ReshapeOp(), {"input_z1", "reshape_dim1"}, {"reshape_out1"}); //min //input - TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx")); + S_TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx", "min_reduce_dim1")); //output - TENSOR min_out = ctx.add(new RamTensor({1})); - ctx.push(new MinOp(), {reshape_out, min_reduce_dim}, {min_out}); + S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out1")); + ctx.push(new MinOp(), {"reshape_out1", "min_reduce_dim1"}, {"min_out1"}); //max //input - TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx")); + S_TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx", "max_reduce_dim1")); //output - TENSOR max_out = ctx.add(new RamTensor({1})); - ctx.push(new MaxOp(), {reshape_out, max_reduce_dim}, {max_out}); + S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out1")); + ctx.push(new MaxOp(), {"reshape_out1", "max_reduce_dim1"}, {"max_out1"}); //quantization //output - TENSOR qnt_out = ctx.add(new RamTensor()); - TENSOR qnt_min = ctx.add(new RamTensor({1})); - TENSOR qnt_max = ctx.add(new RamTensor({1})); - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {qnt_out, qnt_min, qnt_max}); + S_TENSOR qnt_out = ctx.add(new RamTensor("qnt_out1")); + S_TENSOR qnt_min = ctx.add(new RamTensor({1}, "qnt_min1")); + S_TENSOR qnt_max = ctx.add(new RamTensor({1}, "qnt_max1")); + ctx.push(new QuantizeV2Op(), {"reshape_out1", "min_out1", "max_out1"}, {"qnt_out1", "qnt_min1", "qnt_max1"}); - TENSOR out = ctx.add(new RamTensor()); - TENSOR out_min = ctx.add(new RamTensor({1})); - TENSOR out_max = ctx.add(new RamTensor({1})); - - S_TENSOR out_val = out.lock(); - S_TENSOR out_min_val = out_min.lock(); - S_TENSOR out_max_val = out_max.lock(); - ctx.push(new ReluOp(), {qnt_out, qnt_min, qnt_max}, {out, out_min, out_max}); + S_TENSOR out = ctx.add(new RamTensor("out1")); + S_TENSOR out_min = ctx.add(new RamTensor({1}, "out_min1")); + S_TENSOR out_max = ctx.add(new RamTensor({1}, "out_max1")); + + ctx.push(new ReluOp(), {"qnt_out1", "qnt_min1", "qnt_max1"}, {"out1", "out_min1", "out_max1"}); ctx.eval(); timer_stop(); - TENSOR ref_out = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx")); - TENSOR ref_out_min = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx")); - TENSOR ref_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx")); - - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR ref_min_val = ref_out_min.lock(); - S_TENSOR ref_max_val = ref_out_max.lock(); - double result = meanPercentErr(ref_val.get(), out_val.get()); - result += meanPercentErr(ref_min_val.get(), out_min_val.get()); - result += meanPercentErr(ref_max_val.get(), out_max_val.get()); + S_TENSOR ref_out = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx", "ref_out1")); + S_TENSOR ref_out_min = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx", "ref_out_min1")); + S_TENSOR ref_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx", "ref_out_max1")); + + double result = meanPercentErr(ref_out.get(), out.get()); + result += meanPercentErr(ref_out_min.get(), out_min.get()); + result += meanPercentErr(ref_out_max.get(), out_max.get()); passed(result == 0); From 13071ba65f3f29eec1edc5530b0bf801e3c335df Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 19 Nov 2017 17:15:44 +0800 Subject: [PATCH 72/80] Math, Matrix, Context passed --- MathTests.hpp | 313 +++++++++++++++++++++++------------------------ MatrixTests.hpp | 51 +++----- context_test.hpp | 90 +++----------- main.cpp | 27 ++-- tensor.hpp | 12 +- 5 files changed, 208 insertions(+), 285 deletions(-) diff --git a/MathTests.hpp b/MathTests.hpp index d772f9c9..6d4fcab2 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -152,178 +152,169 @@ class MathOpsTest : public Test { passed(result == 0); } -// void argmaxTest(void) { // NT: WIP do not use t_import int 64 here -// testStart("argmax"); + void argmaxTest(void) { // NT: WIP do not use t_import int 64 here + testStart("argmax"); -// // reference inputs -// TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx")); -// TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx")); + ctx.gc(); -// // reference outputs -// /// NT: FIXME: argmax outputs int64 tensor which isn't supported by -// /// int_import. -// TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx")); + // reference inputs + ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx", "ref_a")); + ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx", "ref_dim")); -// // Implementation goes here + // reference outputs + /// NT: FIXME: argmax outputs int64 tensor which isn't supported by + /// int_import. + S_TENSOR ref_val = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx", "ref_out")); + + // Implementation goes here -// // modify the checks below: -// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + // modify the checks below: + S_TENSOR out_val = ctx.add(new RamTensor(ref_val->getShape(), "out")); -// TList inputs = {ref_a, ref_dim}; -// TList outputs = {out}; - -// S_TENSOR ref_val = ref_out.lock(); -// S_TENSOR out_val = out.lock(); -// timer_start(); -// ctx.push(new ArgMaxOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); + TNameList inputs = {"ref_a", "ref_dim"}; + TNameList outputs = {"out"}; + + timer_start(); + ctx.push(new ArgMaxOp(), inputs, outputs); + ctx.eval(); + timer_stop(); -// Tensor* out_float = TensorCast(out_val.get()); - -// double result = meanPercentErr(ref_val.get(), out_float); - -// // passed(result < 0.0001); -// passed(result == 0); -// } - -// void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here -// testStart("argmax2"); -// TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f)); -// *(test_input.lock()->write(25, 0)) = 1.0f; -// *(test_input.lock()->write(26, 0)) = 1.0f; -// *(test_input.lock()->write(7, 0)) = 1.0f; -// *(test_input.lock()->write(48, 0)) = 1.0f; -// *(test_input.lock()->write(14, 0)) = 1.0f; - -// TENSOR test_dim = ctx.add(new RamTensor({1})); -// *(test_dim.lock()->write(0, 0)) = 0; - -// TENSOR test_out_ref = ctx.add(new RamTensor({5})); -// *(test_out_ref.lock()->write(0, 0)) = 5.0f; -// *(test_out_ref.lock()->write(1, 0)) = 5.0f; -// *(test_out_ref.lock()->write(2, 0)) = 1.0f; -// *(test_out_ref.lock()->write(3, 0)) = 9.0f; -// *(test_out_ref.lock()->write(4, 0)) = 2.0f; - -// TENSOR test_out = ctx.add(new RamTensor(test_out_ref.lock()->getShape())); -// TList inputs = {test_input, test_dim}; -// TList outputs = {test_out}; -// S_TENSOR ref_val = test_out_ref.lock(); -// S_TENSOR out_val = test_out.lock(); - -// timer_start(); -// ctx.push(new ArgMaxOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); - -// double result = meanPercentErr(ref_val.get(), out_val.get()); -// passed(result < 0.0001); -// //passed(result == 0); -// } - -// void addTest(void) { -// testStart("add"); - -// // reference inputs -// TENSOR a = -// ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx")); -// TENSOR b = -// ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx")); - -// // reference outputs -// TENSOR ref_out = -// ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx")); - -// // Implementation goes here - -// // modify the checks below: -// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); -// S_TENSOR out_vxx = out.lock(); -// S_TENSOR ref_vxx = ref_out.lock(); -// TList inputs = {a, b}; -// TList outputs = {out}; -// timer_start(); -// ctx.push(new AddOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); - -// double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); -// passed(result < 0.0001); -// //passed(result == 0); -// } - -// void minTest(void) { -// testStart("min"); - -// // reference inputs -// TENSOR a = -// ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx")); -// TENSOR dim = -// ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx")); - -// // reference outputs -// TENSOR ref_out = -// ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx")); - -// // Implementation goes here - -// // modify the checks below: -// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); -// TList inputs = {a, dim}; -// TList outputs = {out}; -// S_TENSOR ref_val = ref_out.lock(); -// S_TENSOR out_val = out.lock(); -// timer_start(); -// ctx.push(new MinOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); - -// double result = meanPercentErr(ref_val.get(), out_val.get()); -// // passed(result < 0.0001); -// passed(result == 0); -// } - -// void maxTest(void) { -// testStart("max"); - -// // reference inputs -// TENSOR a = -// ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx")); -// TENSOR dim = -// ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx")); - -// // reference outputs -// TENSOR ref_out = -// ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx")); - -// // Implementation goes here - -// // modify the checks below: -// TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); -// TList inputs = {a, dim}; -// TList outputs = {out}; -// S_TENSOR ref_val = ref_out.lock(); -// S_TENSOR out_val = out.lock(); -// timer_start(); -// ctx.push(new MaxOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); - -// double result = meanPercentErr(ref_val.get(), out_val.get()); -// // passed(result < 0.0001); -// passed(result == 0); -// } + Tensor* out_float = TensorCast(out_val.get(), "out_float"); ///NT: /WIP how to handle the name? + + double result = meanPercentErr(ref_val.get(), out_float); + + // passed(result < 0.0001); + passed(result == 0); + } + + void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here + testStart("argmax2"); + + ctx.gc(); + + S_TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f, "test_input")); + *(test_input->write(25, 0)) = 1.0f; + *(test_input->write(26, 0)) = 1.0f; + *(test_input->write(7, 0)) = 1.0f; + *(test_input->write(48, 0)) = 1.0f; + *(test_input->write(14, 0)) = 1.0f; + + S_TENSOR test_dim = ctx.add(new RamTensor({1}, "test_dim")); + *(test_dim->write(0, 0)) = 0; + + S_TENSOR ref_val = ctx.add(new RamTensor({5}, "test_out_ref")); + *(ref_val->write(0, 0)) = 5.0f; + *(ref_val->write(1, 0)) = 5.0f; + *(ref_val->write(2, 0)) = 1.0f; + *(ref_val->write(3, 0)) = 9.0f; + *(ref_val->write(4, 0)) = 2.0f; + + S_TENSOR out_val = ctx.add(new RamTensor(ref_val->getShape(), "test_out")); + TNameList inputs = {"test_input", "test_dim"}; + TNameList outputs = {"test_out"}; + + timer_start(); + ctx.push(new ArgMaxOp(), inputs, outputs); + ctx.eval(); + timer_stop(); + + double result = meanPercentErr(ref_val.get(), out_val.get()); + passed(result < 0.0001); + //passed(result == 0); + } + + void addTest(void) { + testStart("add"); + + // reference inputs + ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx", "b")); + + // reference outputs + S_TENSOR ref_vxx = ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx", "ref_out")); + + // Implementation goes here + + // modify the checks below: + S_TENSOR out_vxx = ctx.add(new RamTensor(ref_vxx->getShape(), "out")); + TNameList inputs = {"a", "b"}; + TNameList outputs = {"out"}; + timer_start(); + ctx.push(new AddOp(), inputs, outputs); + ctx.eval(); + timer_stop(); + + double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); + passed(result < 0.0001); + //passed(result == 0); + } + + void minTest(void) { + testStart("min"); + + ctx.gc(); + + // reference inputs + ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx", "a")); + ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx", "dim")); + + // reference outputs + S_TENSOR ref_val = ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx", "ref_out")); + + // Implementation goes here + + // modify the checks below: + S_TENSOR out_val = ctx.add(new RamTensor(ref_val->getShape(), "out")); + TNameList inputs = {"a", "dim"}; + TNameList outputs = {"out"}; + + timer_start(); + ctx.push(new MinOp(), inputs, outputs); + ctx.eval(); + timer_stop(); + + double result = meanPercentErr(ref_val.get(), out_val.get()); + // passed(result < 0.0001); + passed(result == 0); + } + + void maxTest(void) { + testStart("max"); + + ctx.gc(); + + // reference inputs + ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx", "a")); + ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx", "dim")); + + // reference outputs + S_TENSOR ref_val = ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx", "ref_out")); + + // Implementation goes here + + // modify the checks below: + S_TENSOR out_val = ctx.add(new RamTensor(ref_val->getShape(), "out")); + TNameList inputs = {"a", "dim"}; + TNameList outputs = {"out"}; + timer_start(); + ctx.push(new MaxOp(), inputs, outputs); + ctx.eval(); + timer_stop(); + + double result = meanPercentErr(ref_val.get(), out_val.get()); + // passed(result < 0.0001); + passed(result == 0); + } void runAll(void) { - //argmaxTest(); - // argmaxTest2(); + argmaxTest(); + argmaxTest2(); requantization_rangeTest(); requantizeTest(); requantizeTest2(); - // addTest(); - // minTest(); - // maxTest(); + addTest(); + minTest(); + maxTest(); } }; diff --git a/MatrixTests.hpp b/MatrixTests.hpp index d72815a9..b42fa0c4 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -13,59 +13,46 @@ class matrixOpsTest : public Test { void qMatMul(void) { testStart("Quantized Matrix Mul"); + + ctx.gc(); + //inputs - TENSOR a = - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); - TENSOR b = - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); - TENSOR b_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); - TENSOR b_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx", "a_max")); + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx", "b")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx", "b_min")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx", "b_max")); // reference outputs - TENSOR c = - ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); - TENSOR c_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); - TENSOR c_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); + S_TENSOR c = ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx", "c")); + S_TENSOR c_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx", "c_min")); + S_TENSOR c_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx", "c_max")); //we need default constructor here //so we can get ride of the shapes here - TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); - TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); + S_TENSOR out_c = ctx.add(new RamTensor(c->getShape(), "out_c")); + S_TENSOR out_min = ctx.add(new RamTensor(c_min->getShape(), "out_min")); + S_TENSOR out_max = ctx.add(new RamTensor(c_max->getShape(), "out_max")); //TList inputs = {a, a_min, a_max, b, b_min, b_max}; //TList outputs = {out_c, out_min, out_max}; //if you want tensors to be alive after .eval() //copies of the share_pointer needs to be here - S_TENSOR ref_c_rptr = c.lock(); - S_TENSOR ref_min_rptr = c_min.lock(); - S_TENSOR ref_max_rptr = c_max.lock(); - S_TENSOR out_c_rptr = out_c.lock(); - S_TENSOR out_min_rptr = out_min.lock(); - S_TENSOR out_max_rptr = out_max.lock(); - timer_start(); //ctx.push(new QntMatMulOp(), inputs, outputs); ctx.push(new QntMatMulOp(), - {a, a_min, a_max, b, b_min, b_max}, - {out_c, out_min, out_max}); + {"a", "a_min", "a_max", "b", "b_min", "b_max"}, + {"out_c", "out_min", "out_max"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + - meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); + double result = meanPercentErr(c.get(), out_c.get()) + meanPercentErr(c_min.get(), out_min.get()) + + meanPercentErr(c_max.get(), out_max.get()); passed(result == 0); } diff --git a/context_test.hpp b/context_test.hpp index 5d1ff4c2..607628ed 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -18,99 +18,45 @@ class contextTest : public Test { Context ctx; public: - -// void MatMalTest(void) { -// testStart("Context QntMatMal Op"); -// //inputs -// TENSOR a = -// ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); -// TENSOR a_min = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); -// TENSOR a_max = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); -// TENSOR b = -// ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); -// TENSOR b_min = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); -// TENSOR b_max = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); - -// // reference outputs -// TENSOR c = -// ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); -// TENSOR c_min = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); -// TENSOR c_max = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - - -// //we need default constructor here -// //so we can get ride of the shapes here -// TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); -// TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); -// TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); - -// TList inputs = {a, a_min, a_max, b, b_min, b_max}; -// TList outputs = {out_c, out_min, out_max}; - -// //if you want tensors to be alive after .eval() -// //copies of the share_pointer needs to be here -// S_TENSOR ref_c_rptr = c.lock(); -// S_TENSOR ref_min_rptr = c_min.lock(); -// S_TENSOR ref_max_rptr = c_max.lock(); -// S_TENSOR out_c_rptr = out_c.lock(); -// S_TENSOR out_min_rptr = out_min.lock(); -// S_TENSOR out_max_rptr = out_max.lock(); - - -// timer_start(); -// ctx.push(new QntMatMulOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); - -// double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + -// meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); - -// passed(result == 0); -// } - void RefCountTest(void) { testStart("Context Ref Count"); + + ctx.gc(); + timer_start(); //inputs - TENSOR a = ctx.add(new RamTensor({1,1,1})); - TENSOR b = ctx.add(new RamTensor({1,1,1})); - TENSOR c = ctx.add(new RamTensor({1,1,1})); + S_TENSOR a = ctx.add(new RamTensor({1,1,1}, "a")); + S_TENSOR b = ctx.add(new RamTensor({1,1,1}, "b")); + S_TENSOR c = ctx.add(new RamTensor({1,1,1}, "c")); //init values - *(a.lock()->write(0, 0)) = 1; - *(b.lock()->write(0, 0)) = 1; - *(c.lock()->write(0, 0)) = 1; + *(a->write(0, 0)) = 1; + *(b->write(0, 0)) = 1; + *(c->write(0, 0)) = 1; // reference outputs - TENSOR out = ctx.add(new RamTensor({1,1,1})); - S_TENSOR shr_out = out.lock(); + S_TENSOR out = ctx.add(new RamTensor({1,1,1}, "out")); - TList inputs0 = {a, b}; - TList outputs0 = {c}; //2 + TNameList inputs0 = {"a", "b"}; + TNameList outputs0 = {"c"}; //2 ctx.push(new AddOp(), inputs0, outputs0); - TList inputs1 = {c, a}; - TList outputs1 = {b}; //3 + TNameList inputs1 = {"c", "a"}; + TNameList outputs1 = {"b"}; //3 ctx.push(new AddOp(), inputs1, outputs1); - TList inputs2 = {a, b}; - TList outputs2 = {out}; //4 + TNameList inputs2 = {"a", "b"}; + TNameList outputs2 = {"out"}; //4 ctx.push(new AddOp(), inputs2, outputs2); ctx.eval(); timer_stop(); - if(a.lock() || b.lock() || c.lock() || !out.lock()) { + if(a.use_count() != 1 || b.use_count() != 1 || c.use_count() != 1 || out.use_count() != 2) { failed(); return; } - int result = *(shr_out->read(0, 0)); + int result = *(out->read(0, 0)); passed(result == 4); } diff --git a/main.cpp b/main.cpp index 6567db4c..2d906bd3 100644 --- a/main.cpp +++ b/main.cpp @@ -7,11 +7,11 @@ #include "tensorIdxImporterTests.hpp" #include "context.hpp" #include "MathTests.hpp" -/*#include "ArrayTests.hpp" +#include "MatrixTests.hpp" #include "context_test.hpp" +/*#include "ArrayTests.hpp" #include "tensor_test.hpp" #include "NnTests.hpp" -#include "MatrixTests.hpp" #include "mlp_test.hpp" #include "deep_mnist_mlp.hpp"*/ @@ -42,23 +42,22 @@ int main(int argc, char** argv) { printf("Math result...\r\n"); mathTests.printSummary(); + printf("running matrix test ...\r\n"); + matrixOpsTest matrixTests; + matrixTests.runAll(); + matrixTests.printSummary(); + + printf("Context test: \r\n"); + contextTest ctxTest; + ctxTest.runAll(); + printf("Context result...\r\n"); + ctxTest.printSummary(); + /* ArrayOpsTest arrayTests; arrayTests.runAll(); printf("Array: \r\n"); arrayTests.printSummary(); - - printf("running matrix test ...\r\n"); - matrixOpsTest matrixTests; - matrixTests.runAll(); - matrixTests.printSummary(); - - printf("Context test: \r\n"); - contextTest ctxTest; - ctxTest.runAll(); - printf("Context result...\r\n"); - ctxTest.printSummary(); - printf("NnOpS: \r\n"); NnOpsTest nnTest; nnTest.runAll(); diff --git a/tensor.hpp b/tensor.hpp index cd5719f0..76ee6040 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -218,8 +218,8 @@ class RamTensor : public Tensor { }; template -Tensor* TensorCast(Tensor* input) { - Tensor* output = new RamTensor(input->getShape()); +Tensor* TensorCast(Tensor* input, TName name) { + Tensor* output = new RamTensor(input->getShape(), name); const Tin* inputPrt = input->read(0, 0); Tout* outputPrt = output->write(0, 0); @@ -231,8 +231,8 @@ Tensor* TensorCast(Tensor* input) { } template -Tensor* TensorConstant(std::vector shape, T c) { - Tensor* output = new RamTensor(shape); +Tensor* TensorConstant(std::vector shape, T c, TName const &name) { + Tensor* output = new RamTensor(shape, name); T* outPrt = output->write(0, 0); for (uint32_t i = 0; i < output->getSize(); i++) { @@ -243,13 +243,13 @@ Tensor* TensorConstant(std::vector shape, T c) { } template -Tensor* TensorConstant(std::initializer_list l, T c) { +Tensor* TensorConstant(std::initializer_list l, T c, TName const &name) { std::vector v; for (auto i : l) { v.push_back(i); } - return TensorConstant(v, c); + return TensorConstant(v, c, name); } // From 13872d7e9bafc10d7d1fb92d31740b769b688344 Mon Sep 17 00:00:00 2001 From: kazami Date: Sun, 19 Nov 2017 18:13:09 +0800 Subject: [PATCH 73/80] 1. pass run deep mlp demo for name lookup 2. modify tensorCast for name lookup --- deep_mnist_mlp.cpp | 240 +++++++++++++++++++++------------------------ deep_mnist_mlp.hpp | 14 +-- main.cpp | 12 +-- tensor.hpp | 4 +- 4 files changed, 129 insertions(+), 141 deletions(-) diff --git a/deep_mnist_mlp.cpp b/deep_mnist_mlp.cpp index a57789bb..511bff7d 100644 --- a/deep_mnist_mlp.cpp +++ b/deep_mnist_mlp.cpp @@ -1,195 +1,184 @@ -#ifdef __DEEP_MNIST_MLP_HPP__ #include "deep_mnist_mlp.hpp" -void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, - TENSOR out_min, TENSOR out_max) { +void tensorQuantize(Context& ctx, TName input, TName output, + TName out_min, TName out_max) { //reshape - TENSOR reduce_dim = ctx.add(new RamTensor({1})); - TENSOR reshape_out = ctx.add(new RamTensor()); + S_TENSOR reduce_dim = ctx.add(new RamTensor({1}, "reduce_dim")); + S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); - TENSOR reshape_shape = ctx.add(new RamTensor()); + S_TENSOR reshape_shape = ctx.add(new RamTensor("reshape_shape")); - *(reduce_dim.lock()->write(0, 0)) = 0; - ctx.push(new ReshapeOp(), {input, reshape_shape}, {reshape_out}); + *(reduce_dim->write(0, 0)) = 0; + ctx.push(new ReshapeOp(), {input, "reshape_shape"}, {"reshape_out"}); //Min and Max of (reshaped) input - TENSOR min_out = ctx.add(new RamTensor({1})); - TENSOR max_out = ctx.add(new RamTensor({1})); - ctx.push(new MinOp(), {reshape_out, reduce_dim}, {min_out}); - ctx.push(new MaxOp(), {reshape_out, reduce_dim}, {max_out}); + S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); + S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); + ctx.push(new MinOp(), {"reshape_out", "reduce_dim"}, {"min_out"}); + ctx.push(new MaxOp(), {"reshape_out", "reduce_dim"}, {"max_out"}); - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {output, out_min, out_max}); + ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {output, out_min, out_max}); } -void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, - TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, - TENSOR z_output) { +void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, + TName w, TName w_min, TName w_max, TName b, + TName z_output) { //quantized matmul - TENSOR out_c = ctx.add(new RamTensor()); + S_TENSOR out_c = ctx.add(new RamTensor("out_c")); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); + S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); - ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); + ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {"out_c", "matmul_out_min", "matmul_out_max"}); //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); + S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); + ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); + S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out")); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); + ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); - Shape out_shape = out_c.lock()->getShape(); + Shape out_shape = out_c->getShape(); //clean up - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out")); + ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); - ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); + ctx.push(new AddOp(), {"deqnt_out", b}, {z_output}); } -void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, - TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim) { +void PredLayer(Context &ctx, TName input, TName input_min, + TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim) { - TENSOR out_c = ctx.add(new RamTensor()); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + S_TENSOR out_mat_pred = ctx.add(new RamTensor("out_mat_pred")); + S_TENSOR matmul_out_min_pred = ctx.add(new RamTensor({1}, "matmul_out_min_pred")); + S_TENSOR matmul_out_max_pred = ctx.add(new RamTensor({1}, "matmul_out_max_pred")); //MatMul ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, - {out_c, matmul_out_min, matmul_out_max}); + {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}); //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, - {req_out_min, req_out_max}); + S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min_pred")); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max_pred")); + ctx.push(new Requantization_RangeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}, + {"req_out_min_pred", "req_out_max_pred"}); //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, - {reqnt_out, reqnt_out_min, reqnt_out_max}); + S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out_pred")); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min_pred")); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max_pred")); + ctx.push(new RequantizeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred", "req_out_min_pred", "req_out_max_pred"}, + {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}); //dequantize - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out_pred")); + ctx.push(new DequantizeOp(), {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}, {"deqnt_out_pred"}); //Add - TENSOR output_z = ctx.add(new RamTensor()); - ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); + S_TENSOR output_z = ctx.add(new RamTensor("output_z_pred")); + ctx.push(new AddOp(), {"deqnt_out_pred", bias}, {"output_z_pred"}); //ArgMax - ctx.push(new ArgMaxOp(), {output_z, dim}, {output}); + ctx.push(new ArgMaxOp(), {"output_z_pred", dim}, {output}); } int runMLP(string inputIdxFile) { TensorIdxImporter t_import; Context ctx; - TENSOR x_quantized = ctx.add(new RamTensor()); - TENSOR x_min = ctx.add(new RamTensor({1})); - TENSOR x_max = ctx.add(new RamTensor({1})); - TENSOR x = ctx.add(t_import.float_import(inputIdxFile)); - S_TENSOR xs_quantized = x_quantized.lock(); - S_TENSOR xs_min = x_min.lock(); - S_TENSOR xs_max = x_max.lock(); - - tensorQuantize(ctx, x, x_quantized, x_min, x_max); + S_TENSOR x_quantized = ctx.add(new RamTensor("x_quantized")); + S_TENSOR x_min = ctx.add(new RamTensor({1}, "x_min")); + S_TENSOR x_max = ctx.add(new RamTensor({1}, "x_max")); + S_TENSOR x = ctx.add(t_import.float_import(inputIdxFile, "x")); + + tensorQuantize(ctx, "x", "x_quantized", "x_min", "x_max"); ctx.eval(); //relu layer first - TENSOR w = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx")); - TENSOR w_min = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx")); - TENSOR w_max = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx")); - TENSOR b = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx")); - TENSOR relu_output = ctx.add(new RamTensor()); - TENSOR relu_min = ctx.add(new RamTensor({1})); - TENSOR relu_max = ctx.add(new RamTensor({1})); - S_TENSOR relus_output = relu_output.lock(); - S_TENSOR relus_min = relu_min.lock(); - S_TENSOR relus_max = relu_max.lock(); - TENSOR z_output = ctx.add(new RamTensor()); - - ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, z_output); - - TENSOR z_qnt_output = ctx.add(new RamTensor()); - TENSOR z_min = ctx.add(new RamTensor({1})); - TENSOR z_max = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); - - ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {relu_output, relu_min, relu_max}); + S_TENSOR w = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx", "w")); + S_TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx", "w_min")); + S_TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx", "w_max")); + S_TENSOR b = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx", "b")); + S_TENSOR relu_output = ctx.add(new RamTensor("relu_output")); + S_TENSOR relu_min = ctx.add(new RamTensor({1}, "relu_min")); + S_TENSOR relu_max = ctx.add(new RamTensor({1}, "relu_max")); + S_TENSOR z_output = ctx.add(new RamTensor("z_output")); + + ReluLayer(ctx, "x_quantized", "x_min", "x_max", "w", "w_min", "w_max", "b", "z_output"); + + S_TENSOR z_qnt_output = ctx.add(new RamTensor("z_qnt_output")); + S_TENSOR z_min = ctx.add(new RamTensor({1}, "z_min")); + S_TENSOR z_max = ctx.add(new RamTensor({1}, "z_max")); + tensorQuantize(ctx, "z_output", "z_qnt_output", "z_min", "z_max"); + + ctx.push(new ReluOp(), {"z_qnt_output", "z_min", "z_max"}, {"relu_output", "relu_min", "relu_max"}); ctx.eval(); //relu layer 2 - TENSOR w2 = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx")); - TENSOR w_min2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_min_0.idx")); - TENSOR w_max2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_max_0.idx")); - TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx")); - TENSOR relu_output2 = ctx.add(new RamTensor()); - TENSOR relu_min2 = ctx.add(new RamTensor({1})); - TENSOR relu_max2 = ctx.add(new RamTensor({1})); - - S_TENSOR relus_output2 = relu_output2.lock(); - S_TENSOR relus_min2 = relu_min2.lock(); - S_TENSOR relus_max2 = relu_max2.lock(); - TENSOR z_output2 = ctx.add(new RamTensor()); - ReluLayer(ctx, relu_output, relu_min, relu_max, w2, w_min2, w_max2, b2, z_output2); - - - TENSOR z_qnt_output2 = ctx.add(new RamTensor()); - TENSOR z_min2 = ctx.add(new RamTensor({1})); - TENSOR z_max2 = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output2, z_qnt_output2, z_min2, z_max2); - - ctx.push(new ReluOp(), {z_qnt_output2, z_min2, z_max2}, {relu_output2, relu_min2, relu_max2}); + S_TENSOR w2 = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx", "w2")); + S_TENSOR w_min2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_min_0.idx", "w_min2")); + S_TENSOR w_max2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_max_0.idx", "w_max2")); + S_TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx", "b2")); + S_TENSOR relu_output2 = ctx.add(new RamTensor("relu_output2")); + S_TENSOR relu_min2 = ctx.add(new RamTensor({1}, "relu_min2")); + S_TENSOR relu_max2 = ctx.add(new RamTensor({1}, "relu_max2")); + + S_TENSOR z_output2 = ctx.add(new RamTensor("z_output2")); + ReluLayer(ctx, "relu_output", "relu_min", "relu_max", "w2", "w_min2", "w_max2", "b2", "z_output2"); + + + S_TENSOR z_qnt_output2 = ctx.add(new RamTensor("z_qnt_output2")); + S_TENSOR z_min2 = ctx.add(new RamTensor({1}, "z_min2")); + S_TENSOR z_max2 = ctx.add(new RamTensor({1}, "z_max2")); + tensorQuantize(ctx, "z_output2", "z_qnt_output2", "z_min2", "z_max2"); + + ctx.push(new ReluOp(), {"z_qnt_output2", "z_min2", "z_max2"}, {"relu_output2", "relu_min2", "relu_max2"}); ctx.eval(); - TENSOR w3 = ctx.add(t_import.ubyte_import( + S_TENSOR w3 = ctx.add(t_import.ubyte_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_quint8_const_0.idx")); - TENSOR w2_min = ctx.add(t_import.float_import( + "inputs/Variable_4_quint8_const_0.idx", "w3")); + S_TENSOR w2_min = ctx.add(t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_min_0.idx")); - TENSOR w2_max = ctx.add(t_import.float_import( + "inputs/Variable_4_min_0.idx", "w2_min")); + S_TENSOR w2_max = ctx.add(t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_max_0.idx")); - TENSOR bias2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx")); - TENSOR dim = ctx.add(t_import.int_import( + "inputs/Variable_4_max_0.idx", "w2_max")); + S_TENSOR bias2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx", "bias2")); + S_TENSOR dim = ctx.add(t_import.int_import( "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" - "y_pred-dimension_0.idx")); + "y_pred-dimension_0.idx", "dim")); - TENSOR pred = ctx.add(new RamTensor()); - PredLayer(ctx, relu_output2, relu_min2, relu_max2, pred, w3, w2_min, w2_max, bias2, dim); - S_TENSOR pred_val = pred.lock(); + S_TENSOR pred = ctx.add(new RamTensor("pred")); + PredLayer(ctx, "relu_output2", "relu_min2", "relu_max2", "pred", "w3", "w2_min", "w2_max", "bias2", "dim"); ctx.eval(); Tensor* ref_out = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); - Tensor* ref_pred = TensorCast(ref_out); + "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx", "ref_out"); + Tensor* ref_pred = TensorCast(ref_out, "ref_pred"); - double result = Test::meanPercentErr(ref_pred, pred_val.get()); + double result = Test::meanPercentErr(ref_pred, pred.get()); if (result < 0.0001) { printf("PASSED %.8f\r\n\r\n", result); @@ -197,7 +186,6 @@ int runMLP(string inputIdxFile) { printf("FAILED %.8f\r\n\r\n", result); } - return *(pred.lock()->read(0, 0)); + return *(pred->read(0, 0)); // output layer } -#endif diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index dd649cd9..39966b06 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -12,15 +12,15 @@ #include "uTensorBase.hpp" #include "context.hpp" -void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, - TENSOR out_min, TENSOR out_max); +void tensorQuantize(Context& ctx, TName input, TName output, + TName out_min, TName out_max); -void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, - TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, - TENSOR z_output); +void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, + TName w, TName w_min, TName w_max, TName b, + TName z_output); -void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, - TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim); +void PredLayer(Context &ctx, TName input, TName input_min, + TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim); int runMLP(string inputIdxFile); diff --git a/main.cpp b/main.cpp index 9f7d6faf..79f20f60 100644 --- a/main.cpp +++ b/main.cpp @@ -11,10 +11,10 @@ #include "NnTests.hpp" #include "tensor_test.hpp" #include "mlp_test.hpp" +#include "deep_mnist_mlp.hpp" /*#include "context_test.hpp" #include "MathTests.hpp" -#include "MatrixTests.hpp" -#include "deep_mnist_mlp.hpp"*/ +#include "MatrixTests.hpp"*/ Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -28,8 +28,8 @@ int main(int argc, char** argv) { printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); printf("running deep-mlp...\r\n"); -// int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); -// printf("prediction: %d\r\n", prediction); + int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + printf("prediction: %d\r\n", prediction); idxImporterTest idxTest; idxTest.runAll(); @@ -85,11 +85,11 @@ int main(int argc, char** argv) { - printf("mlp test: \r\n"); +/* printf("mlp test: \r\n"); mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); - mlpt.printSummary(); + mlpt.printSummary();*/ diff --git a/tensor.hpp b/tensor.hpp index cd5719f0..8b9ccb0e 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -218,8 +218,8 @@ class RamTensor : public Tensor { }; template -Tensor* TensorCast(Tensor* input) { - Tensor* output = new RamTensor(input->getShape()); +Tensor* TensorCast(Tensor* input, TName name) { + Tensor* output = new RamTensor(input->getShape(), name); const Tin* inputPrt = input->read(0, 0); Tout* outputPrt = output->write(0, 0); From 1e58e90cf7731b5b3621f79166d937d56936f555 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sun, 19 Nov 2017 18:33:03 +0800 Subject: [PATCH 74/80] updated readme --- README.md | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fec3ef50..d81149a0 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,14 @@ ## Introduction - uTensor is an extremely light-weight Deep-Learning Inference framework built on mbed and Tensorflow. + uTensor is an extremely light-weight Deep-Learning Inference framework built on Mbed and Tensorflow: + + - TensorFlow to uTensor exporter (planned) + - Tensor Classes + - Operators Classes + - Context, a resource and graph management class - This project is under going constant development. + This project is under going constant development. APIs are expected to update rapidly. ## Requirement @@ -55,6 +60,21 @@ prediction: 7 Currently, the binary runs the first sample of the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) which contains a handwritten digit of number 7. Ths network architecture is a 3-layer Relu based MLP, as shown below: ![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mlp_mnist.png "mxnet Handwritten Digit Recognition") + + The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. + +##Exporting to uTensor + + At this time, exporting models to uTensor requires the [node-viewer](https://github.com/neil-tan/tf-node-viewer/) project. This tool is designed to view navigate TensorFlow graphs and export tensors to idx files. The idx files are then used by the uTensor. For further instruction, please refer to this [wiki-page](https://github.com/neil-tan/uTensor/wiki/Tensorflow-model-exporting-note). + + uTensor-CLI is a tool planned for the next releases. It would streamline the process of moving TensorFlow models to uTensor environment. + +##Reference + +- [TensorFlow](https://www.tensorflow.org) +- [Mbed](https://developer.mbed.org) +- [Node-Viewer](https://github.com/neil-tan/tf-node-viewer/) +- [How to Quantize Neural Networks with TensorFlow](https://petewarden.com/2016/05/03/how-to-quantize-neural-networks-with-tensorflow/) +- [mxnet Handwritten Digit Recognition](https://mxnet.incubator.apache.org/tutorials/python/mnist.html) + - - The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. From 0b884b6b87eedcb7f287371deb1f298755d17bde Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Wed, 29 Nov 2017 16:31:33 -0800 Subject: [PATCH 75/80] context stateful wip --- context.cpp | 7 ++++++- context_test.hpp | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/context.cpp b/context.cpp index ddaebbca..d244ede2 100644 --- a/context.cpp +++ b/context.cpp @@ -3,6 +3,7 @@ S_TENSOR Context::add(Tensor* t, uint8_t init_count) { if(t == nullptr) { ERR_EXIT("null pointer tensor"); } if(rTable.find(t->getName()) != rTable.end()) { + ///NT: TODO: check stateful here ERR_EXIT("tensor with name \"%s\" address already exist in rTable", t->getName().c_str()); } @@ -143,7 +144,11 @@ int Context::eval(void) { dcrListRef(op->getInputs()); - delete op; + delete op; ///NT: TODO: replace this with a cleanupOp(op) method + ///context would require a new op record table + ///addStateful(Ops, name) + ///push(opName, ...) + ///Can you pass constructor as a reference? } diff --git a/context_test.hpp b/context_test.hpp index 607628ed..0e887de9 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -17,6 +17,16 @@ class contextTest : public Test { TensorIdxImporter t_import; Context ctx; +private: + + TName codeGenStatfulHelper(TName input) { + ctx.add(TensorConstant({1}, 1, "incr_val")); + ctx.add(new RamTensor(ref_out->getShape(), "out")); //gc problem? + ctx.push(new AddOp(), {"incr_val", input}, output); + + return output; + } + public: void RefCountTest(void) { testStart("Context Ref Count"); @@ -61,10 +71,14 @@ class contextTest : public Test { } + void codeGenTemplate(void) { + ctx.gc(); + } + void runAll(void) { - // MatMalTest(); RefCountTest(); + codeGenTemplate(); } }; From 5209e03706e4b93abc1740d02a726af41d925f96 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Wed, 29 Nov 2017 17:22:31 -0800 Subject: [PATCH 76/80] codeGenTemplate test passed --- context_test.hpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/context_test.hpp b/context_test.hpp index 0e887de9..8db717b6 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -19,12 +19,11 @@ class contextTest : public Test { private: - TName codeGenStatfulHelper(TName input) { - ctx.add(TensorConstant({1}, 1, "incr_val")); - ctx.add(new RamTensor(ref_out->getShape(), "out")); //gc problem? - ctx.push(new AddOp(), {"incr_val", input}, output); - - return output; + void codeGenStatfulHelper(TName state) { + ctx.add(TensorConstant({1}, 1, "incr_val")); + ctx.add(new RamTensor({1}, "out")); //gc problem? + ctx.push(new AddOp(), {"incr_val", state}, {"out"}); + ctx.eval(); } public: @@ -72,7 +71,20 @@ class contextTest : public Test { } void codeGenTemplate(void) { + testStart("codeGenTemplate"); ctx.gc(); + S_TENSOR state = ctx.add(TensorConstant({1}, 0, "state"), 255); + S_TENSOR out; + for(auto i = 0; i < 5; i++) { + codeGenStatfulHelper("state"); + out = ctx.get("out"); + *(state->write(0, 0)) = *(out->read(0, 0)); + ctx.gc(); + } + + int result = *(out->read(0, 0)); + passed(result == 5); + } From 66468d8b9ffa66160c082ec7a08e1b24fc1a668f Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Fri, 1 Dec 2017 23:22:56 -0800 Subject: [PATCH 77/80] context lambda wip --- ArrayTests.hpp | 42 +++--- MathTests.hpp | 114 +++++++-------- context.cpp | 10 +- context.hpp | 9 +- context_test.hpp | 23 +-- deep_mnist_mlp.cpp | 350 ++++++++++++++++++++++----------------------- main.cpp | 43 +++--- tensor_test.hpp | 10 +- 8 files changed, 303 insertions(+), 298 deletions(-) diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 36d40d79..95cb3200 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -15,22 +15,22 @@ class ArrayOpsTest : public Test { testStart("quantize_v2"); //reference inputs /Users/neitan01/Documents/mbed/uTensor.git/TESTS/scripts/PRE-GEN/qA - S_TENSOR b_q_ref = ctx.add(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx", "b_q_ref")); - S_TENSOR b_min_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx", "b_min_q_ref")); - S_TENSOR b_max_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx", "b_max_q_ref")); + S_TENSOR b_q_ref = ctx.add(defer(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx", "b_q_ref"))); + S_TENSOR b_min_q_ref = ctx.add(defer(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx", "b_min_q_ref"))); + S_TENSOR b_max_q_ref = ctx.add(defer(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx", "b_max_q_ref"))); //reference outputs - S_TENSOR ref_b_q = ctx.add(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx", "ref_b_q")); - S_TENSOR ref_b_min_q = ctx.add(t_import.float_import("/fs/testData/qB/out/qB_1.idx", "ref_b_min_q")); - S_TENSOR ref_b_max_q = ctx.add(t_import.float_import("/fs/testData/qB/out/qb_2.idx", "ref_b_max_q")); + S_TENSOR ref_b_q = ctx.add(defer(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx", "ref_b_q"))); + S_TENSOR ref_b_min_q = ctx.add(defer(t_import.float_import("/fs/testData/qB/out/qB_1.idx", "ref_b_min_q"))); + S_TENSOR ref_b_max_q = ctx.add(defer(t_import.float_import("/fs/testData/qB/out/qb_2.idx", "ref_b_max_q"))); - S_TENSOR out_b_q = ctx.add(new RamTensor(b_q_ref->getShape(), "b_q")); - S_TENSOR out_b_min_q = ctx.add(new RamTensor(b_min_q_ref->getShape(), "b_min_q")); - S_TENSOR out_b_max_q = ctx.add(new RamTensor(b_max_q_ref->getShape(), "b_max_q")); + S_TENSOR out_b_q = ctx.add(defer(new RamTensor(b_q_ref->getShape(), "b_q"))); + S_TENSOR out_b_min_q = ctx.add(defer(new RamTensor(b_min_q_ref->getShape(), "b_min_q"))); + S_TENSOR out_b_max_q = ctx.add(defer(new RamTensor(b_max_q_ref->getShape(), "b_max_q"))); //Implementation goes here timer_start(); - ctx.push(new QuantizeV2Op(), {"b_q_ref", "b_min_q_ref", "b_max_q_ref"}, {"b_q", "b_min_q", "b_max_q"}); + ctx.push(defer(new QuantizeV2Op()), {"b_q_ref", "b_min_q_ref", "b_max_q_ref"}, {"b_q", "b_min_q", "b_max_q"}); ctx.eval(); timer_stop(); @@ -44,18 +44,18 @@ class ArrayOpsTest : public Test { testStart("dequantize"); //reference inputs - S_TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx", "a")); - S_TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx", "a_min")); - S_TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx", "a_max")); + S_TENSOR a = ctx.add(defer(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx", "a"))); + S_TENSOR a_min = ctx.add(defer(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx", "a_min"))); + S_TENSOR a_max = ctx.add(defer(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx", "a_max"))); //reference outputs - S_TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx", "out_ref")); + S_TENSOR out_ref = ctx.add(defer(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx", "out_ref"))); //modify the checks below: - S_TENSOR out = ctx.add(new RamTensor(out_ref->getShape(), "out")); + S_TENSOR out = ctx.add(defer(new RamTensor(out_ref->getShape(), "out"))); timer_start(); - ctx.push(new DequantizeOp(), {"a", "a_min", "a_max"}, {"out"}); + ctx.push(defer(new DequantizeOp()), {"a", "a_min", "a_max"}, {"out"}); ctx.eval(); timer_stop(); @@ -69,18 +69,18 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - S_TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx", "ref_a")); - S_TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx", "ref_dim")); + S_TENSOR ref_a = ctx.add(defer(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx", "ref_a"))); + S_TENSOR ref_dim = ctx.add(defer(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx", "ref_dim"))); //reference outputs - S_TENSOR out_ref_2 = ctx.add(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx", "out_ref_2")); + S_TENSOR out_ref_2 = ctx.add(defer(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx", "out_ref_2"))); //modify the checks below: - S_TENSOR out_2 = ctx.add(new RamTensor(out_ref_2->getShape(), "out_2")); + S_TENSOR out_2 = ctx.add(defer(new RamTensor(out_ref_2->getShape(), "out_2"))); timer_start(); - ctx.push(new ReshapeOp(), {"ref_a", "ref_dim"}, {"out_2"}); + ctx.push(defer(new ReshapeOp()), {"ref_a", "ref_dim"}, {"out_2"}); ctx.eval(); timer_stop(); diff --git a/MathTests.hpp b/MathTests.hpp index 65714377..d117f4a5 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -17,24 +17,24 @@ class MathOpsTest : public Test { //Note: raw pointers should be owned ONLY by the context. no copy of the raw pointer should exist elsewhere // reference inputs - ctx.add(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx", "a")); - ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx", "a_min")); - ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx", "a_max")); + ctx.add(defer(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx", "a"))); + ctx.add(defer(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx", "a_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx", "a_max"))); // reference output - ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx", "ref_min")); - ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx", "ref_max")); + ctx.add(defer(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx", "ref_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx", "ref_max"))); // Implementation goes here // modify the checks below: - ctx.add(new RamTensor(ctx.get("ref_min")->getShape(), "out_min")); - ctx.add(new RamTensor(ctx.get("ref_max")->getShape(), "out_max")); + ctx.add(defer(new RamTensor(ctx.get("ref_min")->getShape(), "out_min"))); + ctx.add(defer(new RamTensor(ctx.get("ref_max")->getShape(), "out_max"))); TNameList inputs = {"a", "a_min", "a_max"}; TNameList outputs = {"out_min", "out_max"}; timer_start(); - ctx.push(new Requantization_RangeOp(), inputs, outputs); + ctx.push(defer(new Requantization_RangeOp()), inputs, outputs); ctx.eval(); timer_stop(); @@ -52,25 +52,25 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx", "a")); - ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx", "a_min")); - ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx", "a_max")); - ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx", "r_a_min")); - ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx", "r_a_max")); + ctx.add(defer(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx", "a"))); + ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx", "a_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx", "a_max"))); + ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx", "r_a_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx", "r_a_max"))); // tf.quint8 //Note: //Instead of using ctx.get() to obtain a shared_ptr, you may also use the shared_ptr returned by ctx.add() // reference outputs - S_TENSOR ref_a_q = ctx.add(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx", "ref_a_q")); - S_TENSOR ref_a_min = ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx", "ref_a_min")); - S_TENSOR ref_a_max = ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx", "ref_a_max")); + S_TENSOR ref_a_q = ctx.add(defer(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx", "ref_a_q"))); + S_TENSOR ref_a_min = ctx.add(defer(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx", "ref_a_min"))); + S_TENSOR ref_a_max = ctx.add(defer(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx", "ref_a_max"))); // modify the checks below: - S_TENSOR a_q = ctx.add(new RamTensor(ref_a_q->getShape(), "a_q")); - S_TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min->getShape(), "a_min_q")); - S_TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max->getShape(), "a_max_q")); + S_TENSOR a_q = ctx.add(defer(new RamTensor(ref_a_q->getShape(), "a_q"))); + S_TENSOR a_min_q = ctx.add(defer(new RamTensor(ref_a_min->getShape(), "a_min_q"))); + S_TENSOR a_max_q = ctx.add(defer(new RamTensor(ref_a_max->getShape(), "a_max_q"))); TNameList inputs = {"a", "a_min", "a_max", "r_a_min", "r_a_max"}; @@ -78,7 +78,7 @@ class MathOpsTest : public Test { // Implementation goes here timer_start(); - ctx.push(new RequantizeOp(), inputs, outputs); + ctx.push(defer(new RequantizeOp()), inputs, outputs); ctx.eval(); timer_stop(); @@ -95,23 +95,23 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx", "a")); - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx", "a_min")); - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx", "a_max")); - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx", "r_a_min")); - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx", "r_a_max")); + ctx.add(defer(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx", "a"))); + ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx", "a_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx", "a_max"))); + ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx", "r_a_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx", "r_a_max"))); // tf.quint8 // reference outputs - ctx.add(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx", "ref_a_q")); - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx", "ref_a_min")); - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx", "ref_a_max")); + ctx.add(defer(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx", "ref_a_q"))); + ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx", "ref_a_min"))); + ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx", "ref_a_max"))); // modify the checks below: - ctx.add(new RamTensor(ctx.get("ref_a_q")->getShape(), "a_q")); - ctx.add(new RamTensor(ctx.get("ref_a_min")->getShape(), "a_min_q")); - ctx.add(new RamTensor(ctx.get("ref_a_max")->getShape(), "a_max_q")); + ctx.add(defer(new RamTensor(ctx.get("ref_a_q")->getShape(), "a_q"))); + ctx.add(defer(new RamTensor(ctx.get("ref_a_min")->getShape(), "a_min_q"))); + ctx.add(defer(new RamTensor(ctx.get("ref_a_max")->getShape(), "a_max_q"))); S_TENSOR ref_val = ctx.get("ref_a_q"); S_TENSOR ref_min = ctx.get("ref_a_min"); @@ -122,7 +122,7 @@ class MathOpsTest : public Test { // Implementation goes here timer_start(); - ctx.push(new RequantizeOp(), {"a", "a_min", "a_max", "r_a_min", "r_a_max"}, {"a_q", "a_min_q", "a_max_q"}); + ctx.push(defer(new RequantizeOp()), {"a", "a_min", "a_max", "r_a_min", "r_a_max"}, {"a_q", "a_min_q", "a_max_q"}); ctx.eval(); timer_stop(); @@ -158,24 +158,24 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx", "ref_a")); - ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx", "ref_dim")); + ctx.add(defer(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx", "ref_a"))); + ctx.add(defer(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx", "ref_dim"))); // reference outputs /// NT: FIXME: argmax outputs int64 tensor which isn't supported by /// int_import. - S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx", "ref_out")); + S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx", "ref_out"))); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); TNameList inputs = {"ref_a", "ref_dim"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(new ArgMaxOp(), inputs, outputs); + ctx.push(defer(new ArgMaxOp()), inputs, outputs); ctx.eval(); timer_stop(); @@ -192,29 +192,29 @@ class MathOpsTest : public Test { ctx.gc(); - S_TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f, "test_input")); + S_TENSOR test_input = ctx.add(defer(TensorConstant({10, 5}, 0.0f, "test_input"))); *(test_input->write(25, 0)) = 1.0f; *(test_input->write(26, 0)) = 1.0f; *(test_input->write(7, 0)) = 1.0f; *(test_input->write(48, 0)) = 1.0f; *(test_input->write(14, 0)) = 1.0f; - S_TENSOR test_dim = ctx.add(new RamTensor({1}, "test_dim")); + S_TENSOR test_dim = ctx.add(defer(new RamTensor({1}, "test_dim"))); *(test_dim->write(0, 0)) = 0; - S_TENSOR test_out_ref = ctx.add(new RamTensor({5}, "test_out_ref")); + S_TENSOR test_out_ref = ctx.add(defer(new RamTensor({5}, "test_out_ref"))); *(test_out_ref->write(0, 0)) = 5.0f; *(test_out_ref->write(1, 0)) = 5.0f; *(test_out_ref->write(2, 0)) = 1.0f; *(test_out_ref->write(3, 0)) = 9.0f; *(test_out_ref->write(4, 0)) = 2.0f; - S_TENSOR test_out = ctx.add(new RamTensor(test_out_ref->getShape(), "test_out")); + S_TENSOR test_out = ctx.add(defer(new RamTensor(test_out_ref->getShape(), "test_out"))); TNameList inputs = {"test_input", "test_dim"}; TNameList outputs = {"test_out"}; timer_start(); - ctx.push(new ArgMaxOp(), inputs, outputs); + ctx.push(defer(new ArgMaxOp()), inputs, outputs); ctx.eval(); timer_stop(); @@ -227,20 +227,20 @@ class MathOpsTest : public Test { testStart("add"); // reference inputs - ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx", "a")); - ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx", "b")); + ctx.add(defer(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx", "a"))); + ctx.add(defer(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx", "b"))); // reference outputs - S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx", "ref_out")); + S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx", "ref_out"))); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); TNameList inputs = {"a", "b"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(new AddOp(), inputs, outputs); + ctx.push(defer(new AddOp()), inputs, outputs); ctx.eval(); timer_stop(); @@ -255,21 +255,21 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx", "a")); - ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx", "dim")); + ctx.add(defer(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx", "a"))); + ctx.add(defer(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx", "dim"))); // reference outputs - S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx", "ref_out")); + S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx", "ref_out"))); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); TNameList inputs = {"a", "dim"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(new MinOp(), inputs, outputs); + ctx.push(defer(new MinOp()), inputs, outputs); ctx.eval(); timer_stop(); @@ -284,20 +284,20 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx", "a")); - ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx", "dim")); + ctx.add(defer(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx", "a"))); + ctx.add(defer(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx", "dim"))); // reference outputs - S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx", "ref_out")); + S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx", "ref_out"))); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); TNameList inputs = {"a", "dim"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(new MaxOp(), inputs, outputs); + ctx.push(defer(new MaxOp()), inputs, outputs); ctx.eval(); timer_stop(); diff --git a/context.cpp b/context.cpp index d244ede2..5a6973a1 100644 --- a/context.cpp +++ b/context.cpp @@ -1,6 +1,7 @@ #include "context.hpp" -S_TENSOR Context::add(Tensor* t, uint8_t init_count) { +S_TENSOR Context::add(std::function func, uint8_t init_count) { + Tensor* t = (Tensor*) func(); if(t == nullptr) { ERR_EXIT("null pointer tensor"); } if(rTable.find(t->getName()) != rTable.end()) { ///NT: TODO: check stateful here @@ -29,7 +30,8 @@ S_TENSOR Context::get(TName const &t_name) { } -void Context::push(Operator *op, TNameList &in_names, TNameList &out_names) { +void Context::push(std::function func, TNameList &in_names, TNameList &out_names) { + Operator* op = (Operator*) func(); //error checking in the Op class S_TList _inputs; for(auto in:in_names) { @@ -52,7 +54,7 @@ void Context::push(Operator *op, TNameList &in_names, TNameList &out_names) { } -void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { +void Context::push(std::function func, std::initializer_list _inputs, std::initializer_list _outputs) { TNameList inputs; TNameList outputs; @@ -64,7 +66,7 @@ void Context::push(Operator *op, std::initializer_list _inputs, std::init outputs.push_back(o); } - push(op, inputs, outputs); + push(func, inputs, outputs); } void Context::incrTNameListRef(const TNameList &t_list) { diff --git a/context.hpp b/context.hpp index 15076833..959b5e71 100644 --- a/context.hpp +++ b/context.hpp @@ -6,6 +6,7 @@ #include #include "uTensorBase.hpp" #include "stdio.h" +#include //#include class Ref_Record { @@ -44,10 +45,11 @@ class Context : public uTensor { //uint16_t getRef(); public: - S_TENSOR add(Tensor* t, uint8_t init_count = 0); +//S_TENSOR addStateful(std::function func); + S_TENSOR add(std::function func, uint8_t init_count = 0); S_TENSOR get(TName const &t_name); - void push(Operator *op, TNameList &_inputs, TNameList &_outputs); - void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); + void push(std::function func, TNameList &_inputs, TNameList &_outputs); + void push(std::function func, std::initializer_list _inputs, std::initializer_list _outputs); uint32_t gc(void); int eval(void); @@ -57,5 +59,6 @@ class Context : public uTensor { }; +#define defer(...) ([&](){return (void*) (__VA_ARGS__);}) #endif // UTENSOR_CTX_H diff --git a/context_test.hpp b/context_test.hpp index 8db717b6..319b7140 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -20,9 +20,9 @@ class contextTest : public Test { private: void codeGenStatfulHelper(TName state) { - ctx.add(TensorConstant({1}, 1, "incr_val")); - ctx.add(new RamTensor({1}, "out")); //gc problem? - ctx.push(new AddOp(), {"incr_val", state}, {"out"}); + ctx.add(defer(TensorConstant({1}, 1, "incr_val"))); + ctx.add(defer(new RamTensor({1}, "out"))); //gc problem? + ctx.push(defer(new AddOp()), {"incr_val", state}, {"out"}); ctx.eval(); } @@ -34,9 +34,10 @@ class contextTest : public Test { timer_start(); //inputs - S_TENSOR a = ctx.add(new RamTensor({1,1,1}, "a")); - S_TENSOR b = ctx.add(new RamTensor({1,1,1}, "b")); - S_TENSOR c = ctx.add(new RamTensor({1,1,1}, "c")); + + S_TENSOR a = ctx.add(defer(new RamTensor({1,1,1}, "a"))); + S_TENSOR b = ctx.add(defer(new RamTensor({1,1,1}, "b"))); + S_TENSOR c = ctx.add(defer(new RamTensor({1,1,1}, "c"))); //init values *(a->write(0, 0)) = 1; @@ -44,19 +45,19 @@ class contextTest : public Test { *(c->write(0, 0)) = 1; // reference outputs - S_TENSOR out = ctx.add(new RamTensor({1,1,1}, "out")); + S_TENSOR out = ctx.add(defer(new RamTensor({1,1,1}, "out"))); TNameList inputs0 = {"a", "b"}; TNameList outputs0 = {"c"}; //2 - ctx.push(new AddOp(), inputs0, outputs0); + ctx.push(defer(new AddOp()), inputs0, outputs0); TNameList inputs1 = {"c", "a"}; TNameList outputs1 = {"b"}; //3 - ctx.push(new AddOp(), inputs1, outputs1); + ctx.push(defer(new AddOp()), inputs1, outputs1); TNameList inputs2 = {"a", "b"}; TNameList outputs2 = {"out"}; //4 - ctx.push(new AddOp(), inputs2, outputs2); + ctx.push(defer(new AddOp()), inputs2, outputs2); ctx.eval(); timer_stop(); @@ -73,7 +74,7 @@ class contextTest : public Test { void codeGenTemplate(void) { testStart("codeGenTemplate"); ctx.gc(); - S_TENSOR state = ctx.add(TensorConstant({1}, 0, "state"), 255); + S_TENSOR state = ctx.add(defer(TensorConstant({1}, 0, "state")), 255); S_TENSOR out; for(auto i = 0; i < 5; i++) { codeGenStatfulHelper("state"); diff --git a/deep_mnist_mlp.cpp b/deep_mnist_mlp.cpp index 511bff7d..3f94db2d 100644 --- a/deep_mnist_mlp.cpp +++ b/deep_mnist_mlp.cpp @@ -1,191 +1,191 @@ -#include "deep_mnist_mlp.hpp" +// #include "deep_mnist_mlp.hpp" -void tensorQuantize(Context& ctx, TName input, TName output, - TName out_min, TName out_max) { +// void tensorQuantize(Context& ctx, TName input, TName output, +// TName out_min, TName out_max) { - //reshape - S_TENSOR reduce_dim = ctx.add(new RamTensor({1}, "reduce_dim")); - S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); +// //reshape +// S_TENSOR reduce_dim = ctx.add(new RamTensor({1}, "reduce_dim")); +// S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); - S_TENSOR reshape_shape = ctx.add(new RamTensor("reshape_shape")); +// S_TENSOR reshape_shape = ctx.add(new RamTensor("reshape_shape")); - *(reduce_dim->write(0, 0)) = 0; - ctx.push(new ReshapeOp(), {input, "reshape_shape"}, {"reshape_out"}); +// *(reduce_dim->write(0, 0)) = 0; +// ctx.push(new ReshapeOp(), {input, "reshape_shape"}, {"reshape_out"}); - //Min and Max of (reshaped) input - S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); - S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); - ctx.push(new MinOp(), {"reshape_out", "reduce_dim"}, {"min_out"}); - ctx.push(new MaxOp(), {"reshape_out", "reduce_dim"}, {"max_out"}); +// //Min and Max of (reshaped) input +// S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); +// S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); +// ctx.push(new MinOp(), {"reshape_out", "reduce_dim"}, {"min_out"}); +// ctx.push(new MaxOp(), {"reshape_out", "reduce_dim"}, {"max_out"}); - ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {output, out_min, out_max}); -} +// ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {output, out_min, out_max}); +// } -void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, - TName w, TName w_min, TName w_max, TName b, - TName z_output) { +// void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, +// TName w, TName w_min, TName w_max, TName b, +// TName z_output) { - //quantized matmul +// //quantized matmul - S_TENSOR out_c = ctx.add(new RamTensor("out_c")); +// S_TENSOR out_c = ctx.add(new RamTensor("out_c")); - S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); - S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); +// S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); +// S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); - ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {"out_c", "matmul_out_min", "matmul_out_max"}); - - //Requantization_Range - S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); - S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); - ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); - - //Requantize - S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out")); - S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); - S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); - ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); - - Shape out_shape = out_c->getShape(); - //clean up - - S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out")); - ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); +// ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {"out_c", "matmul_out_min", "matmul_out_max"}); + +// //Requantization_Range +// S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); +// S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); +// ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); + +// //Requantize +// S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out")); +// S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); +// S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); +// ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); + +// Shape out_shape = out_c->getShape(); +// //clean up + +// S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out")); +// ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); - ctx.push(new AddOp(), {"deqnt_out", b}, {z_output}); +// ctx.push(new AddOp(), {"deqnt_out", b}, {z_output}); -} +// } -void PredLayer(Context &ctx, TName input, TName input_min, - TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim) { - - S_TENSOR out_mat_pred = ctx.add(new RamTensor("out_mat_pred")); - S_TENSOR matmul_out_min_pred = ctx.add(new RamTensor({1}, "matmul_out_min_pred")); - S_TENSOR matmul_out_max_pred = ctx.add(new RamTensor({1}, "matmul_out_max_pred")); - - //MatMul - ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, - {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}); - - //Requantization_Range - S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min_pred")); - S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max_pred")); - ctx.push(new Requantization_RangeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}, - {"req_out_min_pred", "req_out_max_pred"}); - - //Requantize - S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out_pred")); - S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min_pred")); - S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max_pred")); - ctx.push(new RequantizeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred", "req_out_min_pred", "req_out_max_pred"}, - {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}); - - //dequantize - S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out_pred")); - ctx.push(new DequantizeOp(), {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}, {"deqnt_out_pred"}); - - //Add - S_TENSOR output_z = ctx.add(new RamTensor("output_z_pred")); - ctx.push(new AddOp(), {"deqnt_out_pred", bias}, {"output_z_pred"}); - - //ArgMax - ctx.push(new ArgMaxOp(), {"output_z_pred", dim}, {output}); -} - -int runMLP(string inputIdxFile) { - TensorIdxImporter t_import; - Context ctx; - S_TENSOR x_quantized = ctx.add(new RamTensor("x_quantized")); - S_TENSOR x_min = ctx.add(new RamTensor({1}, "x_min")); - S_TENSOR x_max = ctx.add(new RamTensor({1}, "x_max")); - S_TENSOR x = ctx.add(t_import.float_import(inputIdxFile, "x")); - - tensorQuantize(ctx, "x", "x_quantized", "x_min", "x_max"); - ctx.eval(); - - //relu layer first - - S_TENSOR w = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx", "w")); - S_TENSOR w_min = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx", "w_min")); - S_TENSOR w_max = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx", "w_max")); - S_TENSOR b = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx", "b")); - S_TENSOR relu_output = ctx.add(new RamTensor("relu_output")); - S_TENSOR relu_min = ctx.add(new RamTensor({1}, "relu_min")); - S_TENSOR relu_max = ctx.add(new RamTensor({1}, "relu_max")); - S_TENSOR z_output = ctx.add(new RamTensor("z_output")); - - ReluLayer(ctx, "x_quantized", "x_min", "x_max", "w", "w_min", "w_max", "b", "z_output"); - - S_TENSOR z_qnt_output = ctx.add(new RamTensor("z_qnt_output")); - S_TENSOR z_min = ctx.add(new RamTensor({1}, "z_min")); - S_TENSOR z_max = ctx.add(new RamTensor({1}, "z_max")); - tensorQuantize(ctx, "z_output", "z_qnt_output", "z_min", "z_max"); - - ctx.push(new ReluOp(), {"z_qnt_output", "z_min", "z_max"}, {"relu_output", "relu_min", "relu_max"}); - - ctx.eval(); - - //relu layer 2 - S_TENSOR w2 = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx", "w2")); - S_TENSOR w_min2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_min_0.idx", "w_min2")); - S_TENSOR w_max2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_max_0.idx", "w_max2")); - S_TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx", "b2")); - S_TENSOR relu_output2 = ctx.add(new RamTensor("relu_output2")); - S_TENSOR relu_min2 = ctx.add(new RamTensor({1}, "relu_min2")); - S_TENSOR relu_max2 = ctx.add(new RamTensor({1}, "relu_max2")); - - S_TENSOR z_output2 = ctx.add(new RamTensor("z_output2")); - ReluLayer(ctx, "relu_output", "relu_min", "relu_max", "w2", "w_min2", "w_max2", "b2", "z_output2"); - - - S_TENSOR z_qnt_output2 = ctx.add(new RamTensor("z_qnt_output2")); - S_TENSOR z_min2 = ctx.add(new RamTensor({1}, "z_min2")); - S_TENSOR z_max2 = ctx.add(new RamTensor({1}, "z_max2")); - tensorQuantize(ctx, "z_output2", "z_qnt_output2", "z_min2", "z_max2"); - - ctx.push(new ReluOp(), {"z_qnt_output2", "z_min2", "z_max2"}, {"relu_output2", "relu_min2", "relu_max2"}); - - ctx.eval(); - - S_TENSOR w3 = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_quint8_const_0.idx", "w3")); - S_TENSOR w2_min = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_min_0.idx", "w2_min")); - S_TENSOR w2_max = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_max_0.idx", "w2_max")); - S_TENSOR bias2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx", "bias2")); - S_TENSOR dim = ctx.add(t_import.int_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" - "y_pred-dimension_0.idx", "dim")); - - S_TENSOR pred = ctx.add(new RamTensor("pred")); - PredLayer(ctx, "relu_output2", "relu_min2", "relu_max2", "pred", "w3", "w2_min", "w2_max", "bias2", "dim"); - ctx.eval(); - - - Tensor* ref_out = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx", "ref_out"); - Tensor* ref_pred = TensorCast(ref_out, "ref_pred"); - - double result = Test::meanPercentErr(ref_pred, pred.get()); +// void PredLayer(Context &ctx, TName input, TName input_min, +// TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim) { + +// S_TENSOR out_mat_pred = ctx.add(new RamTensor("out_mat_pred")); +// S_TENSOR matmul_out_min_pred = ctx.add(new RamTensor({1}, "matmul_out_min_pred")); +// S_TENSOR matmul_out_max_pred = ctx.add(new RamTensor({1}, "matmul_out_max_pred")); + +// //MatMul +// ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, +// {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}); + +// //Requantization_Range +// S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min_pred")); +// S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max_pred")); +// ctx.push(new Requantization_RangeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}, +// {"req_out_min_pred", "req_out_max_pred"}); + +// //Requantize +// S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out_pred")); +// S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min_pred")); +// S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max_pred")); +// ctx.push(new RequantizeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred", "req_out_min_pred", "req_out_max_pred"}, +// {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}); + +// //dequantize +// S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out_pred")); +// ctx.push(new DequantizeOp(), {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}, {"deqnt_out_pred"}); + +// //Add +// S_TENSOR output_z = ctx.add(new RamTensor("output_z_pred")); +// ctx.push(new AddOp(), {"deqnt_out_pred", bias}, {"output_z_pred"}); + +// //ArgMax +// ctx.push(new ArgMaxOp(), {"output_z_pred", dim}, {output}); +// } + +// int runMLP(string inputIdxFile) { +// TensorIdxImporter t_import; +// Context ctx; +// S_TENSOR x_quantized = ctx.add(new RamTensor("x_quantized")); +// S_TENSOR x_min = ctx.add(new RamTensor({1}, "x_min")); +// S_TENSOR x_max = ctx.add(new RamTensor({1}, "x_max")); +// S_TENSOR x = ctx.add(t_import.float_import(inputIdxFile, "x")); + +// tensorQuantize(ctx, "x", "x_quantized", "x_min", "x_max"); +// ctx.eval(); + +// //relu layer first + +// S_TENSOR w = ctx.add(t_import.ubyte_import( +// "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx", "w")); +// S_TENSOR w_min = +// ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx", "w_min")); +// S_TENSOR w_max = +// ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx", "w_max")); +// S_TENSOR b = +// ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx", "b")); +// S_TENSOR relu_output = ctx.add(new RamTensor("relu_output")); +// S_TENSOR relu_min = ctx.add(new RamTensor({1}, "relu_min")); +// S_TENSOR relu_max = ctx.add(new RamTensor({1}, "relu_max")); +// S_TENSOR z_output = ctx.add(new RamTensor("z_output")); + +// ReluLayer(ctx, "x_quantized", "x_min", "x_max", "w", "w_min", "w_max", "b", "z_output"); + +// S_TENSOR z_qnt_output = ctx.add(new RamTensor("z_qnt_output")); +// S_TENSOR z_min = ctx.add(new RamTensor({1}, "z_min")); +// S_TENSOR z_max = ctx.add(new RamTensor({1}, "z_max")); +// tensorQuantize(ctx, "z_output", "z_qnt_output", "z_min", "z_max"); + +// ctx.push(new ReluOp(), {"z_qnt_output", "z_min", "z_max"}, {"relu_output", "relu_min", "relu_max"}); + +// ctx.eval(); + +// //relu layer 2 +// S_TENSOR w2 = ctx.add(t_import.ubyte_import( +// "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx", "w2")); +// S_TENSOR w_min2 = ctx.add(t_import.float_import( +// "/fs/testData/deep_mlp/import-Variable_2_min_0.idx", "w_min2")); +// S_TENSOR w_max2 = ctx.add(t_import.float_import( +// "/fs/testData/deep_mlp/import-Variable_2_max_0.idx", "w_max2")); +// S_TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx", "b2")); +// S_TENSOR relu_output2 = ctx.add(new RamTensor("relu_output2")); +// S_TENSOR relu_min2 = ctx.add(new RamTensor({1}, "relu_min2")); +// S_TENSOR relu_max2 = ctx.add(new RamTensor({1}, "relu_max2")); + +// S_TENSOR z_output2 = ctx.add(new RamTensor("z_output2")); +// ReluLayer(ctx, "relu_output", "relu_min", "relu_max", "w2", "w_min2", "w_max2", "b2", "z_output2"); + + +// S_TENSOR z_qnt_output2 = ctx.add(new RamTensor("z_qnt_output2")); +// S_TENSOR z_min2 = ctx.add(new RamTensor({1}, "z_min2")); +// S_TENSOR z_max2 = ctx.add(new RamTensor({1}, "z_max2")); +// tensorQuantize(ctx, "z_output2", "z_qnt_output2", "z_min2", "z_max2"); + +// ctx.push(new ReluOp(), {"z_qnt_output2", "z_min2", "z_max2"}, {"relu_output2", "relu_min2", "relu_max2"}); + +// ctx.eval(); + +// S_TENSOR w3 = ctx.add(t_import.ubyte_import( +// "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" +// "inputs/Variable_4_quint8_const_0.idx", "w3")); +// S_TENSOR w2_min = ctx.add(t_import.float_import( +// "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" +// "inputs/Variable_4_min_0.idx", "w2_min")); +// S_TENSOR w2_max = ctx.add(t_import.float_import( +// "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" +// "inputs/Variable_4_max_0.idx", "w2_max")); +// S_TENSOR bias2 = ctx.add(t_import.float_import( +// "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx", "bias2")); +// S_TENSOR dim = ctx.add(t_import.int_import( +// "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" +// "y_pred-dimension_0.idx", "dim")); + +// S_TENSOR pred = ctx.add(new RamTensor("pred")); +// PredLayer(ctx, "relu_output2", "relu_min2", "relu_max2", "pred", "w3", "w2_min", "w2_max", "bias2", "dim"); +// ctx.eval(); + + +// Tensor* ref_out = t_import.float_import( +// "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx", "ref_out"); +// Tensor* ref_pred = TensorCast(ref_out, "ref_pred"); + +// double result = Test::meanPercentErr(ref_pred, pred.get()); - if (result < 0.0001) { - printf("PASSED %.8f\r\n\r\n", result); - } else { - printf("FAILED %.8f\r\n\r\n", result); - } - - return *(pred->read(0, 0)); - // output layer -} +// if (result < 0.0001) { +// printf("PASSED %.8f\r\n\r\n", result); +// } else { +// printf("FAILED %.8f\r\n\r\n", result); +// } + +// return *(pred->read(0, 0)); +// // output layer +// } diff --git a/main.cpp b/main.cpp index 6d2ef839..f14dbd98 100644 --- a/main.cpp +++ b/main.cpp @@ -7,16 +7,15 @@ #include "tensorIdxImporterTests.hpp" #include "context.hpp" #include "ArrayTests.hpp" -#include "MathTests.hpp" -#include "MatrixTests.hpp" -#include "context_test.hpp" +// #include "MathTests.hpp" +// #include "MatrixTests.hpp" #include "tensor_test.hpp" -#include "NnTests.hpp" -#include "mlp_test.hpp" -#include "deep_mnist_mlp.hpp" +// #include "NnTests.hpp" +// #include "mlp_test.hpp" +// #include "deep_mnist_mlp.hpp" #include "context_test.hpp" #include "MathTests.hpp" -#include "MatrixTests.hpp" +// #include "MatrixTests.hpp" Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -27,11 +26,11 @@ int main(int argc, char** argv) { ON_ERR(bd.init(), "SDBlockDevice init "); ON_ERR(fs.mount(&bd), "Mounting the filesystem on \"/fs\". "); - printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); - printf("running deep-mlp...\r\n"); + // printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); + // printf("running deep-mlp...\r\n"); - int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - printf("prediction: %d\r\n\r\n\r\n\r\n", prediction); + // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + // printf("prediction: %d\r\n\r\n\r\n\r\n", prediction); printf("IDX import:\r\n"); idxImporterTest idxTest; @@ -69,17 +68,17 @@ int main(int argc, char** argv) { printf("Math result...\r\n"); mathTests.printSummary(); - printf("running matrix test:\r\n"); - matrixOpsTest matrixTests; - matrixTests.runAll(); - printf("running matrix result ...\r\n"); - matrixTests.printSummary(); - - printf("NnOpS: \r\n"); - NnOpsTest nnTest; - nnTest.runAll(); - printf("Nn Ops result...\r\n"); - nnTest.printSummary(); + // printf("running matrix test:\r\n"); + // matrixOpsTest matrixTests; + // matrixTests.runAll(); + // printf("running matrix result ...\r\n"); + // matrixTests.printSummary(); + + // printf("NnOpS: \r\n"); + // NnOpsTest nnTest; + // nnTest.runAll(); + // printf("Nn Ops result...\r\n"); + // nnTest.printSummary(); /* printf("mlp test: \r\n"); mlpTest mlpt; diff --git a/tensor_test.hpp b/tensor_test.hpp index 8cab02b6..49e0508f 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -36,7 +36,7 @@ class transTest : public Test { std::default_random_engine gen; vector tmp({2, 3, 4, 5}); std::string a_s = "input" + std::to_string(i); - S_TENSOR inputTensor = ctx.add(new RamTensor(tmp, a_s)); + S_TENSOR inputTensor = ctx.add(defer(new RamTensor(tmp, a_s))); vector permute = {2, 3, 1, 0}; vector g = inputTensor->getShape(); std::shuffle(permute.begin(), permute.end(), gen); @@ -44,7 +44,7 @@ class transTest : public Test { permuteIndexTransform trans(inputTensor->getShape(), permute); std::string a_o = "output" + std::to_string(i); - S_TENSOR output = ctx.add(new RamTensor(trans.getNewShape(), a_o)); + S_TENSOR output = ctx.add(defer(new RamTensor(trans.getNewShape(), a_o))); vector s = output->getShape(); res = testshape(g, s, permute); if (!res) { @@ -61,7 +61,7 @@ class transTest : public Test { vector output_1({2, 2, 3, 5, 6, 6, 4, 5, 7, 5, 1, 9, 1, 3, 2, 2, 5, 3, 3, 6, 3, 4, 9, 2}); - S_TENSOR inputTensor2 = ctx.add(new RamTensor({2, 3, 4}, "inputTensor2")); + S_TENSOR inputTensor2 = ctx.add(defer(new RamTensor({2, 3, 4}, "inputTensor2"))); vector permute = {0, 2, 1}; permuteIndexTransform trans(inputTensor2->getShape(), permute); @@ -87,7 +87,7 @@ class transTest : public Test { vector output_2({2, 1, 2, 3, 3, 2, 5, 2, 6, 5, 6, 3, 4, 3, 5, 6, 7, 3, 5, 4, 1, 9, 9, 2}); - S_TENSOR inputTensor3 = ctx.add(new RamTensor({2, 4, 3}, "inputTensor3")); + S_TENSOR inputTensor3 = ctx.add(defer(new RamTensor({2, 4, 3}, "inputTensor3"))); vector permute2 = {1, 2, 0}; permuteIndexTransform trans2(inputTensor3->getShape(), permute2); testStart("test vec 2 for transform"); @@ -107,7 +107,7 @@ class transTest : public Test { vector output_3({8, 2, 8, 1, 0, 3, 4, 6, 2, 6, 0, 6, 3, 9, 2, 7, 0, 7, 0, 4, 8, 9, 0, 4, 3, 6, 8}); - S_TENSOR inputTensor4 = ctx.add(new RamTensor({1, 3, 3, 3}, "inputTensor4")); + S_TENSOR inputTensor4 = ctx.add(defer(new RamTensor({1, 3, 3, 3}, "inputTensor4"))); vector permute3 = {0, 3, 2, 1}; permuteIndexTransform trans3(inputTensor4->getShape(), permute3); testStart("test vec 4d for transform"); From 1f96bc2b461e95f2c5d090e520370e7ae562bedb Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sat, 2 Dec 2017 18:41:01 -0800 Subject: [PATCH 78/80] context: add_static, addCached, push_static; context internal gc wip --- ArrayTests.hpp | 42 ++--- MathTests.hpp | 116 ++++++------- MatrixTests.hpp | 24 +-- NnTests.hpp | 18 +- context.cpp | 105 ++++++++--- context.hpp | 22 ++- context_test.hpp | 22 +-- deep_mnist_mlp.cpp | 347 ++++++++++++++++++------------------- main.cpp | 38 ++-- tensor.hpp | 31 ++-- tensorIdxImporter.hpp | 26 +-- tensorIdxImporterTests.hpp | 10 +- tensor_test.hpp | 12 +- uTensorBase.cpp | 5 + uTensorBase.hpp | 1 + 15 files changed, 443 insertions(+), 376 deletions(-) diff --git a/ArrayTests.hpp b/ArrayTests.hpp index 95cb3200..24daf5f5 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -15,22 +15,22 @@ class ArrayOpsTest : public Test { testStart("quantize_v2"); //reference inputs /Users/neitan01/Documents/mbed/uTensor.git/TESTS/scripts/PRE-GEN/qA - S_TENSOR b_q_ref = ctx.add(defer(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx", "b_q_ref"))); - S_TENSOR b_min_q_ref = ctx.add(defer(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx", "b_min_q_ref"))); - S_TENSOR b_max_q_ref = ctx.add(defer(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx", "b_max_q_ref"))); + S_TENSOR b_q_ref = ctx.addCached(hold(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx")), "b_q_ref"); + S_TENSOR b_min_q_ref = ctx.addCached(hold(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx")), "b_min_q_ref"); + S_TENSOR b_max_q_ref = ctx.addCached(hold(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx")), "b_max_q_ref"); //reference outputs - S_TENSOR ref_b_q = ctx.add(defer(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx", "ref_b_q"))); - S_TENSOR ref_b_min_q = ctx.add(defer(t_import.float_import("/fs/testData/qB/out/qB_1.idx", "ref_b_min_q"))); - S_TENSOR ref_b_max_q = ctx.add(defer(t_import.float_import("/fs/testData/qB/out/qb_2.idx", "ref_b_max_q"))); + S_TENSOR ref_b_q = ctx.addCached(hold(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx")), "ref_b_q"); + S_TENSOR ref_b_min_q = ctx.addCached(hold(t_import.float_import("/fs/testData/qB/out/qB_1.idx")), "ref_b_min_q"); + S_TENSOR ref_b_max_q = ctx.addCached(hold(t_import.float_import("/fs/testData/qB/out/qb_2.idx")), "ref_b_max_q"); - S_TENSOR out_b_q = ctx.add(defer(new RamTensor(b_q_ref->getShape(), "b_q"))); - S_TENSOR out_b_min_q = ctx.add(defer(new RamTensor(b_min_q_ref->getShape(), "b_min_q"))); - S_TENSOR out_b_max_q = ctx.add(defer(new RamTensor(b_max_q_ref->getShape(), "b_max_q"))); + S_TENSOR out_b_q = ctx.addCached(hold(new RamTensor(b_q_ref->getShape())), "b_q"); + S_TENSOR out_b_min_q = ctx.addCached(hold(new RamTensor(b_min_q_ref->getShape())), "b_min_q"); + S_TENSOR out_b_max_q = ctx.addCached(hold(new RamTensor(b_max_q_ref->getShape())), "b_max_q"); //Implementation goes here timer_start(); - ctx.push(defer(new QuantizeV2Op()), {"b_q_ref", "b_min_q_ref", "b_max_q_ref"}, {"b_q", "b_min_q", "b_max_q"}); + ctx.push_static(hold(new QuantizeV2Op()), "QuantizeV2Op", {"b_q_ref", "b_min_q_ref", "b_max_q_ref"}, {"b_q", "b_min_q", "b_max_q"}); ctx.eval(); timer_stop(); @@ -44,18 +44,18 @@ class ArrayOpsTest : public Test { testStart("dequantize"); //reference inputs - S_TENSOR a = ctx.add(defer(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx", "a"))); - S_TENSOR a_min = ctx.add(defer(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx", "a_min"))); - S_TENSOR a_max = ctx.add(defer(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx", "a_max"))); + S_TENSOR a = ctx.addCached(hold(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx")), "a"); + S_TENSOR a_min = ctx.addCached(hold(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx")), "a_min"); + S_TENSOR a_max = ctx.addCached(hold(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx")), "a_max"); //reference outputs - S_TENSOR out_ref = ctx.add(defer(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx", "out_ref"))); + S_TENSOR out_ref = ctx.addCached(hold(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx")), "out_ref"); //modify the checks below: - S_TENSOR out = ctx.add(defer(new RamTensor(out_ref->getShape(), "out"))); + S_TENSOR out = ctx.addCached(hold(new RamTensor(out_ref->getShape())), "out"); timer_start(); - ctx.push(defer(new DequantizeOp()), {"a", "a_min", "a_max"}, {"out"}); + ctx.push_static(hold(new DequantizeOp()), "DequantizeOp", {"a", "a_min", "a_max"}, {"out"}); ctx.eval(); timer_stop(); @@ -69,18 +69,18 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - S_TENSOR ref_a = ctx.add(defer(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx", "ref_a"))); - S_TENSOR ref_dim = ctx.add(defer(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx", "ref_dim"))); + S_TENSOR ref_a = ctx.addCached(hold(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx")), "ref_a"); + S_TENSOR ref_dim = ctx.addCached(hold(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx")), "ref_dim"); //reference outputs - S_TENSOR out_ref_2 = ctx.add(defer(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx", "out_ref_2"))); + S_TENSOR out_ref_2 = ctx.addCached(hold(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx")), "out_ref_2"); //modify the checks below: - S_TENSOR out_2 = ctx.add(defer(new RamTensor(out_ref_2->getShape(), "out_2"))); + S_TENSOR out_2 = ctx.addCached(hold(new RamTensor(out_ref_2->getShape())), "out_2"); timer_start(); - ctx.push(defer(new ReshapeOp()), {"ref_a", "ref_dim"}, {"out_2"}); + ctx.push_static(hold(new ReshapeOp()), "ReshapeOp", {"ref_a", "ref_dim"}, {"out_2"}); ctx.eval(); timer_stop(); diff --git a/MathTests.hpp b/MathTests.hpp index d117f4a5..61c011e7 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -17,24 +17,24 @@ class MathOpsTest : public Test { //Note: raw pointers should be owned ONLY by the context. no copy of the raw pointer should exist elsewhere // reference inputs - ctx.add(defer(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx", "a"))); - ctx.add(defer(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx", "a_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx", "a_max"))); + ctx.addCached(hold(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx")), "a"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx")), "a_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx")), "a_max"); // reference output - ctx.add(defer(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx", "ref_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx", "ref_max"))); + ctx.addCached(hold(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx")), "ref_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx")), "ref_max"); // Implementation goes here // modify the checks below: - ctx.add(defer(new RamTensor(ctx.get("ref_min")->getShape(), "out_min"))); - ctx.add(defer(new RamTensor(ctx.get("ref_max")->getShape(), "out_max"))); + ctx.addCached(hold(new RamTensor(ctx.get("ref_min")->getShape())), "out_min"); + ctx.addCached(hold(new RamTensor(ctx.get("ref_max")->getShape())), "out_max"); TNameList inputs = {"a", "a_min", "a_max"}; TNameList outputs = {"out_min", "out_max"}; timer_start(); - ctx.push(defer(new Requantization_RangeOp()), inputs, outputs); + ctx.push_static(hold(new Requantization_RangeOp()), "Requantization_RangeOp", inputs, outputs); ctx.eval(); timer_stop(); @@ -52,25 +52,25 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(defer(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx", "a"))); - ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx", "a_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx", "a_max"))); - ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx", "r_a_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx", "r_a_max"))); + ctx.addCached(hold(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx")), "a"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx")), "a_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx")), "a_max"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx")), "r_a_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx")), "r_a_max"); // tf.quint8 //Note: //Instead of using ctx.get() to obtain a shared_ptr, you may also use the shared_ptr returned by ctx.add() // reference outputs - S_TENSOR ref_a_q = ctx.add(defer(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx", "ref_a_q"))); - S_TENSOR ref_a_min = ctx.add(defer(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx", "ref_a_min"))); - S_TENSOR ref_a_max = ctx.add(defer(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx", "ref_a_max"))); + S_TENSOR ref_a_q = ctx.addCached(hold(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx")), "ref_a_q"); + S_TENSOR ref_a_min = ctx.addCached(hold(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx")), "ref_a_min"); + S_TENSOR ref_a_max = ctx.addCached(hold(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx")), "ref_a_max"); // modify the checks below: - S_TENSOR a_q = ctx.add(defer(new RamTensor(ref_a_q->getShape(), "a_q"))); - S_TENSOR a_min_q = ctx.add(defer(new RamTensor(ref_a_min->getShape(), "a_min_q"))); - S_TENSOR a_max_q = ctx.add(defer(new RamTensor(ref_a_max->getShape(), "a_max_q"))); + S_TENSOR a_q = ctx.addCached(hold(new RamTensor(ref_a_q->getShape())), "a_q"); + S_TENSOR a_min_q = ctx.addCached(hold(new RamTensor(ref_a_min->getShape())), "a_min_q"); + S_TENSOR a_max_q = ctx.addCached(hold(new RamTensor(ref_a_max->getShape())), "a_max_q"); TNameList inputs = {"a", "a_min", "a_max", "r_a_min", "r_a_max"}; @@ -78,7 +78,7 @@ class MathOpsTest : public Test { // Implementation goes here timer_start(); - ctx.push(defer(new RequantizeOp()), inputs, outputs); + ctx.push_static(hold(new RequantizeOp()), "RequantizeOp", inputs, outputs); ctx.eval(); timer_stop(); @@ -95,23 +95,23 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(defer(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx", "a"))); - ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx", "a_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx", "a_max"))); - ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx", "r_a_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx", "r_a_max"))); + ctx.addCached(hold(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx")), "a"); + ctx.addCached(hold(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx")), "a_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx")), "a_max"); + ctx.addCached(hold(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx")), "r_a_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx")), "r_a_max"); // tf.quint8 // reference outputs - ctx.add(defer(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx", "ref_a_q"))); - ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx", "ref_a_min"))); - ctx.add(defer(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx", "ref_a_max"))); + ctx.addCached(hold(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx")), "ref_a_q"); + ctx.addCached(hold(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx")), "ref_a_min"); + ctx.addCached(hold(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx")), "ref_a_max"); // modify the checks below: - ctx.add(defer(new RamTensor(ctx.get("ref_a_q")->getShape(), "a_q"))); - ctx.add(defer(new RamTensor(ctx.get("ref_a_min")->getShape(), "a_min_q"))); - ctx.add(defer(new RamTensor(ctx.get("ref_a_max")->getShape(), "a_max_q"))); + ctx.addCached(hold(new RamTensor(ctx.get("ref_a_q")->getShape())), "a_q"); + ctx.addCached(hold(new RamTensor(ctx.get("ref_a_min")->getShape())), "a_min_q"); + ctx.addCached(hold(new RamTensor(ctx.get("ref_a_max")->getShape())), "a_max_q"); S_TENSOR ref_val = ctx.get("ref_a_q"); S_TENSOR ref_min = ctx.get("ref_a_min"); @@ -122,7 +122,7 @@ class MathOpsTest : public Test { // Implementation goes here timer_start(); - ctx.push(defer(new RequantizeOp()), {"a", "a_min", "a_max", "r_a_min", "r_a_max"}, {"a_q", "a_min_q", "a_max_q"}); + ctx.push_static(hold(new RequantizeOp()), "RequantizeOp", {"a", "a_min", "a_max", "r_a_min", "r_a_max"}, {"a_q", "a_min_q", "a_max_q"}); ctx.eval(); timer_stop(); @@ -158,28 +158,28 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(defer(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx", "ref_a"))); - ctx.add(defer(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx", "ref_dim"))); + ctx.addCached(hold(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx")), "ref_a"); + ctx.addCached(hold(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx")), "ref_dim"); // reference outputs /// NT: FIXME: argmax outputs int64 tensor which isn't supported by /// int_import. - S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx", "ref_out"))); + S_TENSOR ref_out = ctx.addCached(hold(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx")), "ref_out"); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); + S_TENSOR out = ctx.addCached(hold(new RamTensor(ref_out->getShape())), "out"); TNameList inputs = {"ref_a", "ref_dim"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(defer(new ArgMaxOp()), inputs, outputs); + ctx.push_static(hold(new ArgMaxOp()), "ArgMaxOp", inputs, outputs); ctx.eval(); timer_stop(); - Tensor* out_float = TensorCast(out.get(), "out_float"); ///NT: /WIP how to handle the name? + Tensor* out_float = TensorCast(out.get()); ///NT: /WIP how to handle the name? double result = meanPercentErr(ref_out.get(), out_float); @@ -192,29 +192,29 @@ class MathOpsTest : public Test { ctx.gc(); - S_TENSOR test_input = ctx.add(defer(TensorConstant({10, 5}, 0.0f, "test_input"))); + S_TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f), "test_input"); *(test_input->write(25, 0)) = 1.0f; *(test_input->write(26, 0)) = 1.0f; *(test_input->write(7, 0)) = 1.0f; *(test_input->write(48, 0)) = 1.0f; *(test_input->write(14, 0)) = 1.0f; - S_TENSOR test_dim = ctx.add(defer(new RamTensor({1}, "test_dim"))); + S_TENSOR test_dim = ctx.add(new RamTensor({1}), "test_dim"); *(test_dim->write(0, 0)) = 0; - S_TENSOR test_out_ref = ctx.add(defer(new RamTensor({5}, "test_out_ref"))); + S_TENSOR test_out_ref = ctx.add(new RamTensor({5}), "test_out_ref"); *(test_out_ref->write(0, 0)) = 5.0f; *(test_out_ref->write(1, 0)) = 5.0f; *(test_out_ref->write(2, 0)) = 1.0f; *(test_out_ref->write(3, 0)) = 9.0f; *(test_out_ref->write(4, 0)) = 2.0f; - S_TENSOR test_out = ctx.add(defer(new RamTensor(test_out_ref->getShape(), "test_out"))); + S_TENSOR test_out = ctx.add(new RamTensor(test_out_ref->getShape()), "test_out"); TNameList inputs = {"test_input", "test_dim"}; TNameList outputs = {"test_out"}; timer_start(); - ctx.push(defer(new ArgMaxOp()), inputs, outputs); + ctx.push(new ArgMaxOp(), inputs, outputs); ctx.eval(); timer_stop(); @@ -227,20 +227,20 @@ class MathOpsTest : public Test { testStart("add"); // reference inputs - ctx.add(defer(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx", "a"))); - ctx.add(defer(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx", "b"))); + ctx.addCached(hold(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx")), "a"); + ctx.addCached(hold(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx")), "b"); // reference outputs - S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx", "ref_out"))); + S_TENSOR ref_out = ctx.addCached(hold(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx")), "ref_out"); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); + S_TENSOR out = ctx.addCached(hold(new RamTensor(ref_out->getShape())), "out"); TNameList inputs = {"a", "b"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(defer(new AddOp()), inputs, outputs); + ctx.push_static(hold(new AddOp()), "AddOp", inputs, outputs); ctx.eval(); timer_stop(); @@ -255,21 +255,21 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(defer(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx", "a"))); - ctx.add(defer(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx", "dim"))); + ctx.addCached(hold(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx")), "a"); + ctx.addCached(hold(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx")), "dim"); // reference outputs - S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx", "ref_out"))); + S_TENSOR ref_out = ctx.addCached(hold(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx")), "ref_out"); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); + S_TENSOR out = ctx.addCached(hold(new RamTensor(ref_out->getShape())), "out"); TNameList inputs = {"a", "dim"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(defer(new MinOp()), inputs, outputs); + ctx.push_static(hold(new MinOp()), "MinOp", inputs, outputs); ctx.eval(); timer_stop(); @@ -284,20 +284,20 @@ class MathOpsTest : public Test { ctx.gc(); // reference inputs - ctx.add(defer(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx", "a"))); - ctx.add(defer(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx", "dim"))); + ctx.addCached(hold(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx")), "a"); + ctx.addCached(hold(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx")), "dim"); // reference outputs - S_TENSOR ref_out = ctx.add(defer(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx", "ref_out"))); + S_TENSOR ref_out = ctx.addCached(hold(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx")), "ref_out"); // Implementation goes here // modify the checks below: - S_TENSOR out = ctx.add(defer(new RamTensor(ref_out->getShape(), "out"))); + S_TENSOR out = ctx.addCached(hold(new RamTensor(ref_out->getShape())), "out"); TNameList inputs = {"a", "dim"}; TNameList outputs = {"out"}; timer_start(); - ctx.push(defer(new MaxOp()), inputs, outputs); + ctx.push_static(hold(new MaxOp()), "MaxOp", inputs, outputs); ctx.eval(); timer_stop(); diff --git a/MatrixTests.hpp b/MatrixTests.hpp index b42fa0c4..5e7c1166 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -17,24 +17,24 @@ class matrixOpsTest : public Test { ctx.gc(); //inputs - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx", "a")); - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx", "a_min")); - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx", "a_max")); - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx", "b")); - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx", "b_min")); - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx", "b_max")); + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx"), "a"); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx"), "a_min"); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx"), "a_max"); + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx"), "b"); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx"), "b_min"); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx"), "b_max"); // reference outputs - S_TENSOR c = ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx", "c")); - S_TENSOR c_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx", "c_min")); - S_TENSOR c_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx", "c_max")); + S_TENSOR c = ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx"), "c"); + S_TENSOR c_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx"), "c_min"); + S_TENSOR c_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx"), "c_max"); //we need default constructor here //so we can get ride of the shapes here - S_TENSOR out_c = ctx.add(new RamTensor(c->getShape(), "out_c")); - S_TENSOR out_min = ctx.add(new RamTensor(c_min->getShape(), "out_min")); - S_TENSOR out_max = ctx.add(new RamTensor(c_max->getShape(), "out_max")); + S_TENSOR out_c = ctx.add(new RamTensor(c->getShape()), "out_c"); + S_TENSOR out_min = ctx.add(new RamTensor(c_min->getShape()), "out_min"); + S_TENSOR out_max = ctx.add(new RamTensor(c_max->getShape()), "out_max"); //TList inputs = {a, a_min, a_max, b, b_min, b_max}; //TList outputs = {out_c, out_min, out_max}; diff --git a/NnTests.hpp b/NnTests.hpp index e77f603b..a111fac3 100644 --- a/NnTests.hpp +++ b/NnTests.hpp @@ -14,24 +14,24 @@ class NnOpsTest : public Test { testStart("quantized_relu"); // reference inputs S_TENSOR a = - ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx", "a")); + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx"), "a"); S_TENSOR min = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx", "min")); + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx"), "min"); S_TENSOR max = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx", "max")); + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx"), "max"); // reference outputs S_TENSOR ref_out = - ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx", "ref_out")); + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx"), "ref_out"); S_TENSOR ref_min = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx", "ref_min")); + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx"), "ref_min"); S_TENSOR ref_max = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx", "ref_max")); + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx"), "ref_max"); // modify the checks below: - S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); - S_TENSOR out_min = ctx.add(new RamTensor(ref_min->getShape(), "out_min")); - S_TENSOR out_max = ctx.add(new RamTensor(ref_max->getShape(), "out_max")); + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape()), "out"); + S_TENSOR out_min = ctx.add(new RamTensor(ref_min->getShape()), "out_min"); + S_TENSOR out_max = ctx.add(new RamTensor(ref_max->getShape()), "out_max"); timer_start(); diff --git a/context.cpp b/context.cpp index 5a6973a1..fd407f21 100644 --- a/context.cpp +++ b/context.cpp @@ -1,14 +1,40 @@ #include "context.hpp" -S_TENSOR Context::add(std::function func, uint8_t init_count) { - Tensor* t = (Tensor*) func(); +S_TENSOR Context::add_static(std::function func, TName _name) { + return addCached(func, _name, 1, true); +} + +S_TENSOR Context::addCached(std::function func, TName _name, uint8_t init_count, bool _is_static) { + Tensor* t; + if(rTable.find(_name) == rTable.end()) { + t = (Tensor*) func(); + add(t, _name); + } + + Ref_Record record = rTable[_name]; + record.is_static = _is_static; + record.is_cacheable = true; + if(init_count > 0) { + record.count = init_count; + record.allow_incr = false; + } + if(record.count < 1 && record.is_static) { + record.count = 1; + } + rTable[_name] = record; + + return record.sptr; +} + +S_TENSOR Context::add(Tensor* t, TName _name, uint8_t init_count) { if(t == nullptr) { ERR_EXIT("null pointer tensor"); } - if(rTable.find(t->getName()) != rTable.end()) { + if(rTable.find(_name) != rTable.end()) { ///NT: TODO: check stateful here ERR_EXIT("tensor with name \"%s\" address already exist in rTable", t->getName().c_str()); } S_TENSOR _sptr(t); + t->setName(_name); Ref_Record record; @@ -19,7 +45,7 @@ S_TENSOR Context::add(std::function func, uint8_t init_count) { record.sptr = _sptr; - rTable[t->getName()] = record; + rTable[_name] = record; return _sptr; } @@ -29,9 +55,42 @@ S_TENSOR Context::get(TName const &t_name) { return rTable[t_name].sptr; } +Operator* Context::registerOpTable(std::function func, TName _name) { + Operator* op; + //empty static op tensor list + if(opTable.find(_name) == opTable.end()) { + op = (Operator*) func(); + op->setName(_name); + } else { + op = opTable[_name]; + } + + return op; +} + +void Context::push_static(std::function func, TName _name, TNameList &_inputs, TNameList &_outputs, bool is_static) { + push(registerOpTable(func, _name), _inputs, _outputs); +} +void Context::push_static(std::function func, TName _name, std::initializer_list _inputs, std::initializer_list _outputs, bool is_static) { + push(registerOpTable(func, _name), _inputs, _outputs); +} + +void Context::push(Operator* op, std::initializer_list _inputs, std::initializer_list _outputs) { + TNameList inputs; + TNameList outputs; + + for(auto i:_inputs) { + inputs.push_back(i); + } -void Context::push(std::function func, TNameList &in_names, TNameList &out_names) { - Operator* op = (Operator*) func(); + for(auto o:_outputs) { + outputs.push_back(o); + } + + push(op, inputs, outputs); +} + +void Context::push(Operator* op, TNameList &in_names, TNameList &out_names) { //error checking in the Op class S_TList _inputs; for(auto in:in_names) { @@ -54,21 +113,6 @@ void Context::push(std::function func, TNameList &in_names, TNameLi } -void Context::push(std::function func, std::initializer_list _inputs, std::initializer_list _outputs) { - TNameList inputs; - TNameList outputs; - - for(auto i:_inputs) { - inputs.push_back(i); - } - - for(auto o:_outputs) { - outputs.push_back(o); - } - - push(func, inputs, outputs); -} - void Context::incrTNameListRef(const TNameList &t_list) { for(auto t_name:t_list) { if(rTable.find(t_name) == rTable.end()) { @@ -76,7 +120,7 @@ void Context::incrTNameListRef(const TNameList &t_list) { } Ref_Record record = rTable[t_name]; - if(record.allow_incr) { + if(record.allow_incr && !record.is_static) { record.count++; rTable[t_name] = record; } @@ -120,7 +164,7 @@ uint8_t Context::dcrRef(TName t_name) { } Ref_Record record = rTable[t_name]; - if(record.count > 0) record.count -= 1; + if(record.count > 0 && !record.is_static) record.count -= 1; rTable[t_name] = record; return record.count; @@ -130,6 +174,14 @@ bool Context::isTracked(TName t_name) { return (rTable.find(t_name) != rTable.end()); } +void Context::cleanUpOp(Operator* op) { + if(opTable.find(op->getName()) == opTable.end()) { + delete op; + } else { + op->empty(); + } +} + int Context::eval(void) { //unref2nullTensors(); @@ -146,11 +198,8 @@ int Context::eval(void) { dcrListRef(op->getInputs()); - delete op; ///NT: TODO: replace this with a cleanupOp(op) method - ///context would require a new op record table - ///addStateful(Ops, name) - ///push(opName, ...) - ///Can you pass constructor as a reference? + + cleanUpOp(op); } diff --git a/context.hpp b/context.hpp index 959b5e71..01985e96 100644 --- a/context.hpp +++ b/context.hpp @@ -13,10 +13,14 @@ class Ref_Record { public: uint8_t count; bool allow_incr; + bool is_static; + bool is_cacheable; S_TENSOR sptr; Ref_Record() { count = 0; + is_static = false; + is_cacheable = true; allow_incr = true; sptr.reset(); } @@ -27,7 +31,8 @@ class Context : public uTensor { std::vector op_list; bool del_onsight; - std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? + std::unordered_map rTable; + std::unordered_map opTable; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); //void gc(void); //garbage collector, delete any tracked unreferenced tensor @@ -41,15 +46,20 @@ class Context : public uTensor { //uint16_t incrRef(std::shared_ptr sptr); uint8_t dcrRef(TName name); bool isTracked(TName name); + void cleanUpOp(Operator* op); + Operator* registerOpTable(std::function func, TName _name); //bool isTracked(Tensor* t); //uint16_t getRef(); public: -//S_TENSOR addStateful(std::function func); - S_TENSOR add(std::function func, uint8_t init_count = 0); + S_TENSOR add_static(std::function func, TName _name); + S_TENSOR addCached(std::function func, TName _name, uint8_t init_count = 0, bool _is_static = false); + S_TENSOR add(Tensor* t, TName _name, uint8_t init_count = 0); S_TENSOR get(TName const &t_name); - void push(std::function func, TNameList &_inputs, TNameList &_outputs); - void push(std::function func, std::initializer_list _inputs, std::initializer_list _outputs); + void push_static(std::function func, TName _name, TNameList &_inputs, TNameList &_outputs, bool is_static = false); + void push_static(std::function func, TName _name, std::initializer_list _inputs, std::initializer_list _outputs, bool is_static = false); + void push(Operator* op, TNameList &_inputs, TNameList &_outputs); + void push(Operator* op, std::initializer_list _inputs, std::initializer_list _outputs); uint32_t gc(void); int eval(void); @@ -59,6 +69,6 @@ class Context : public uTensor { }; -#define defer(...) ([&](){return (void*) (__VA_ARGS__);}) +#define hold(...) ([&](){return (void*) (__VA_ARGS__);}) #endif // UTENSOR_CTX_H diff --git a/context_test.hpp b/context_test.hpp index 319b7140..a2ef757c 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -20,9 +20,9 @@ class contextTest : public Test { private: void codeGenStatfulHelper(TName state) { - ctx.add(defer(TensorConstant({1}, 1, "incr_val"))); - ctx.add(defer(new RamTensor({1}, "out"))); //gc problem? - ctx.push(defer(new AddOp()), {"incr_val", state}, {"out"}); + ctx.add(TensorConstant({1}, 1), "incr_val"); + ctx.add(new RamTensor({1}), "out"); //gc problem? + ctx.push(new AddOp(), {"incr_val", state}, {"out"}); ctx.eval(); } @@ -35,9 +35,9 @@ class contextTest : public Test { timer_start(); //inputs - S_TENSOR a = ctx.add(defer(new RamTensor({1,1,1}, "a"))); - S_TENSOR b = ctx.add(defer(new RamTensor({1,1,1}, "b"))); - S_TENSOR c = ctx.add(defer(new RamTensor({1,1,1}, "c"))); + S_TENSOR a = ctx.add(new RamTensor({1,1,1}), "a"); + S_TENSOR b = ctx.add(new RamTensor({1,1,1}), "b"); + S_TENSOR c = ctx.add(new RamTensor({1,1,1}), "c"); //init values *(a->write(0, 0)) = 1; @@ -45,19 +45,19 @@ class contextTest : public Test { *(c->write(0, 0)) = 1; // reference outputs - S_TENSOR out = ctx.add(defer(new RamTensor({1,1,1}, "out"))); + S_TENSOR out = ctx.add(new RamTensor({1,1,1}), "out"); TNameList inputs0 = {"a", "b"}; TNameList outputs0 = {"c"}; //2 - ctx.push(defer(new AddOp()), inputs0, outputs0); + ctx.push(new AddOp(), inputs0, outputs0); TNameList inputs1 = {"c", "a"}; TNameList outputs1 = {"b"}; //3 - ctx.push(defer(new AddOp()), inputs1, outputs1); + ctx.push(new AddOp(), inputs1, outputs1); TNameList inputs2 = {"a", "b"}; TNameList outputs2 = {"out"}; //4 - ctx.push(defer(new AddOp()), inputs2, outputs2); + ctx.push(new AddOp(), inputs2, outputs2); ctx.eval(); timer_stop(); @@ -74,9 +74,9 @@ class contextTest : public Test { void codeGenTemplate(void) { testStart("codeGenTemplate"); ctx.gc(); - S_TENSOR state = ctx.add(defer(TensorConstant({1}, 0, "state")), 255); S_TENSOR out; for(auto i = 0; i < 5; i++) { + S_TENSOR state = ctx.add_static(hold(TensorConstant({1}, 0)), "state"); codeGenStatfulHelper("state"); out = ctx.get("out"); *(state->write(0, 0)) = *(out->read(0, 0)); diff --git a/deep_mnist_mlp.cpp b/deep_mnist_mlp.cpp index 3f94db2d..b24d2dab 100644 --- a/deep_mnist_mlp.cpp +++ b/deep_mnist_mlp.cpp @@ -1,191 +1,188 @@ -// #include "deep_mnist_mlp.hpp" +#include "deep_mnist_mlp.hpp" -// void tensorQuantize(Context& ctx, TName input, TName output, -// TName out_min, TName out_max) { +void tensorQuantize(Context& ctx, TName input, TName output, + TName out_min, TName out_max) { -// //reshape -// S_TENSOR reduce_dim = ctx.add(new RamTensor({1}, "reduce_dim")); -// S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); + //reshape + S_TENSOR reduce_dim = ctx.add(new RamTensor({1}), "reduce_dim"); + ctx.add(new RamTensor(), "reshape_out"); -// S_TENSOR reshape_shape = ctx.add(new RamTensor("reshape_shape")); + ctx.add(new RamTensor(), "reshape_shape"); -// *(reduce_dim->write(0, 0)) = 0; -// ctx.push(new ReshapeOp(), {input, "reshape_shape"}, {"reshape_out"}); + *(reduce_dim->write(0, 0)) = 0; + ctx.push(new ReshapeOp(), {input, "reshape_shape"}, {"reshape_out"}); -// //Min and Max of (reshaped) input -// S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); -// S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); -// ctx.push(new MinOp(), {"reshape_out", "reduce_dim"}, {"min_out"}); -// ctx.push(new MaxOp(), {"reshape_out", "reduce_dim"}, {"max_out"}); + //Min and Max of (reshaped) input + ctx.add(new RamTensor({1}), "min_out"); + ctx.add(new RamTensor({1}), "max_out"); + ctx.push(new MinOp(), {"reshape_out", "reduce_dim"}, {"min_out"}); + ctx.push(new MaxOp(), {"reshape_out", "reduce_dim"}, {"max_out"}); -// ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {output, out_min, out_max}); -// } + ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {output, out_min, out_max}); +} -// void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, -// TName w, TName w_min, TName w_max, TName b, -// TName z_output) { +void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, + TName w, TName w_min, TName w_max, TName b, + TName z_output) { -// //quantized matmul + //quantized matmul -// S_TENSOR out_c = ctx.add(new RamTensor("out_c")); + S_TENSOR out_c = ctx.add(new RamTensor(), "out_c"); -// S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); -// S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); + ctx.add(new RamTensor({1}), "matmul_out_min"); + ctx.add(new RamTensor({1}), "matmul_out_max"); -// ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {"out_c", "matmul_out_min", "matmul_out_max"}); - -// //Requantization_Range -// S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); -// S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); -// ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); - -// //Requantize -// S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out")); -// S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); -// S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); -// ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); - -// Shape out_shape = out_c->getShape(); -// //clean up - -// S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out")); -// ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); + ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {"out_c", "matmul_out_min", "matmul_out_max"}); + + //Requantization_Range + S_TENSOR req_out_min = ctx.add(new RamTensor({1}), "req_out_min"); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}), "req_out_max"); + ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); + + //Requantize + ctx.add(new RamTensor(), "reqnt_out"); + ctx.add(new RamTensor({1}), "reqnt_out_min"); + ctx.add(new RamTensor({1}), "reqnt_out_max"); + ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); + + Shape out_shape = out_c->getShape(); + //clean up -// ctx.push(new AddOp(), {"deqnt_out", b}, {z_output}); + S_TENSOR deqnt_out = ctx.add(new RamTensor(), "deqnt_out"); + ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); -// } + ctx.push(new AddOp(), {"deqnt_out", b}, {z_output}); -// void PredLayer(Context &ctx, TName input, TName input_min, -// TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim) { - -// S_TENSOR out_mat_pred = ctx.add(new RamTensor("out_mat_pred")); -// S_TENSOR matmul_out_min_pred = ctx.add(new RamTensor({1}, "matmul_out_min_pred")); -// S_TENSOR matmul_out_max_pred = ctx.add(new RamTensor({1}, "matmul_out_max_pred")); - -// //MatMul -// ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, -// {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}); - -// //Requantization_Range -// S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min_pred")); -// S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max_pred")); -// ctx.push(new Requantization_RangeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}, -// {"req_out_min_pred", "req_out_max_pred"}); - -// //Requantize -// S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out_pred")); -// S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min_pred")); -// S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max_pred")); -// ctx.push(new RequantizeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred", "req_out_min_pred", "req_out_max_pred"}, -// {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}); - -// //dequantize -// S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out_pred")); -// ctx.push(new DequantizeOp(), {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}, {"deqnt_out_pred"}); - -// //Add -// S_TENSOR output_z = ctx.add(new RamTensor("output_z_pred")); -// ctx.push(new AddOp(), {"deqnt_out_pred", bias}, {"output_z_pred"}); - -// //ArgMax -// ctx.push(new ArgMaxOp(), {"output_z_pred", dim}, {output}); -// } - -// int runMLP(string inputIdxFile) { -// TensorIdxImporter t_import; -// Context ctx; -// S_TENSOR x_quantized = ctx.add(new RamTensor("x_quantized")); -// S_TENSOR x_min = ctx.add(new RamTensor({1}, "x_min")); -// S_TENSOR x_max = ctx.add(new RamTensor({1}, "x_max")); -// S_TENSOR x = ctx.add(t_import.float_import(inputIdxFile, "x")); - -// tensorQuantize(ctx, "x", "x_quantized", "x_min", "x_max"); -// ctx.eval(); - -// //relu layer first - -// S_TENSOR w = ctx.add(t_import.ubyte_import( -// "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx", "w")); -// S_TENSOR w_min = -// ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx", "w_min")); -// S_TENSOR w_max = -// ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx", "w_max")); -// S_TENSOR b = -// ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx", "b")); -// S_TENSOR relu_output = ctx.add(new RamTensor("relu_output")); -// S_TENSOR relu_min = ctx.add(new RamTensor({1}, "relu_min")); -// S_TENSOR relu_max = ctx.add(new RamTensor({1}, "relu_max")); -// S_TENSOR z_output = ctx.add(new RamTensor("z_output")); - -// ReluLayer(ctx, "x_quantized", "x_min", "x_max", "w", "w_min", "w_max", "b", "z_output"); - -// S_TENSOR z_qnt_output = ctx.add(new RamTensor("z_qnt_output")); -// S_TENSOR z_min = ctx.add(new RamTensor({1}, "z_min")); -// S_TENSOR z_max = ctx.add(new RamTensor({1}, "z_max")); -// tensorQuantize(ctx, "z_output", "z_qnt_output", "z_min", "z_max"); - -// ctx.push(new ReluOp(), {"z_qnt_output", "z_min", "z_max"}, {"relu_output", "relu_min", "relu_max"}); - -// ctx.eval(); - -// //relu layer 2 -// S_TENSOR w2 = ctx.add(t_import.ubyte_import( -// "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx", "w2")); -// S_TENSOR w_min2 = ctx.add(t_import.float_import( -// "/fs/testData/deep_mlp/import-Variable_2_min_0.idx", "w_min2")); -// S_TENSOR w_max2 = ctx.add(t_import.float_import( -// "/fs/testData/deep_mlp/import-Variable_2_max_0.idx", "w_max2")); -// S_TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx", "b2")); -// S_TENSOR relu_output2 = ctx.add(new RamTensor("relu_output2")); -// S_TENSOR relu_min2 = ctx.add(new RamTensor({1}, "relu_min2")); -// S_TENSOR relu_max2 = ctx.add(new RamTensor({1}, "relu_max2")); - -// S_TENSOR z_output2 = ctx.add(new RamTensor("z_output2")); -// ReluLayer(ctx, "relu_output", "relu_min", "relu_max", "w2", "w_min2", "w_max2", "b2", "z_output2"); - - -// S_TENSOR z_qnt_output2 = ctx.add(new RamTensor("z_qnt_output2")); -// S_TENSOR z_min2 = ctx.add(new RamTensor({1}, "z_min2")); -// S_TENSOR z_max2 = ctx.add(new RamTensor({1}, "z_max2")); -// tensorQuantize(ctx, "z_output2", "z_qnt_output2", "z_min2", "z_max2"); - -// ctx.push(new ReluOp(), {"z_qnt_output2", "z_min2", "z_max2"}, {"relu_output2", "relu_min2", "relu_max2"}); - -// ctx.eval(); - -// S_TENSOR w3 = ctx.add(t_import.ubyte_import( -// "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" -// "inputs/Variable_4_quint8_const_0.idx", "w3")); -// S_TENSOR w2_min = ctx.add(t_import.float_import( -// "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" -// "inputs/Variable_4_min_0.idx", "w2_min")); -// S_TENSOR w2_max = ctx.add(t_import.float_import( -// "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" -// "inputs/Variable_4_max_0.idx", "w2_max")); -// S_TENSOR bias2 = ctx.add(t_import.float_import( -// "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx", "bias2")); -// S_TENSOR dim = ctx.add(t_import.int_import( -// "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" -// "y_pred-dimension_0.idx", "dim")); - -// S_TENSOR pred = ctx.add(new RamTensor("pred")); -// PredLayer(ctx, "relu_output2", "relu_min2", "relu_max2", "pred", "w3", "w2_min", "w2_max", "bias2", "dim"); -// ctx.eval(); - - -// Tensor* ref_out = t_import.float_import( -// "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx", "ref_out"); -// Tensor* ref_pred = TensorCast(ref_out, "ref_pred"); - -// double result = Test::meanPercentErr(ref_pred, pred.get()); +} + +void PredLayer(Context &ctx, TName input, TName input_min, + TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim) { + + S_TENSOR out_mat_pred = ctx.add(new RamTensor(), "out_mat_pred"); + S_TENSOR matmul_out_min_pred = ctx.add(new RamTensor({1}), "matmul_out_min_pred"); + S_TENSOR matmul_out_max_pred = ctx.add(new RamTensor({1}), "matmul_out_max_pred"); + + //MatMul + ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, + {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}); + + //Requantization_Range + ctx.add(new RamTensor({1}), "req_out_min_pred"); + ctx.add(new RamTensor({1}), "req_out_max_pred"); + ctx.push(new Requantization_RangeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}, + {"req_out_min_pred", "req_out_max_pred"}); + + //Requantize + S_TENSOR reqnt_out = ctx.add(new RamTensor(), "reqnt_out_pred"); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}), "reqnt_out_min_pred"); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}), "reqnt_out_max_pred"); + ctx.push(new RequantizeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred", "req_out_min_pred", "req_out_max_pred"}, + {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}); + + //dequantize + ctx.add(new RamTensor(), "deqnt_out_pred"); + ctx.push(new DequantizeOp(), {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}, {"deqnt_out_pred"}); + + //Add + ctx.add(new RamTensor(), "output_z_pred"); + ctx.push(new AddOp(), {"deqnt_out_pred", bias}, {"output_z_pred"}); + + //ArgMax + ctx.push(new ArgMaxOp(), {"output_z_pred", dim}, {output}); +} + +int runMLP(string inputIdxFile) { + TensorIdxImporter t_import; + Context ctx; + ctx.add(new RamTensor(), "x_quantized"); + ctx.add(new RamTensor({1}), "x_min"); + ctx.add(new RamTensor({1}), "x_max"); + ctx.add(t_import.float_import(inputIdxFile), "x"); + + tensorQuantize(ctx, "x", "x_quantized", "x_min", "x_max"); + ctx.eval(); + + //relu layer first + + ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx"), "w"); + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx"), "w_min"); + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx"), "w_max"); + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx"), "b"); + ctx.add(new RamTensor(), "relu_output"); + ctx.add(new RamTensor({1}), "relu_min"); + ctx.add(new RamTensor({1}), "relu_max"); + ctx.add(new RamTensor(), "z_output"); + + ReluLayer(ctx, "x_quantized", "x_min", "x_max", "w", "w_min", "w_max", "b", "z_output"); + + ctx.add(new RamTensor(), "z_qnt_output"); + ctx.add(new RamTensor({1}), "z_min"); + ctx.add(new RamTensor({1}), "z_max"); + tensorQuantize(ctx, "z_output", "z_qnt_output", "z_min", "z_max"); + + ctx.push(new ReluOp(), {"z_qnt_output", "z_min", "z_max"}, {"relu_output", "relu_min", "relu_max"}); + + ctx.eval(); + + //relu layer 2 + ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx"), "w2"); + ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_min_0.idx"), "w_min2"); + ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_max_0.idx"), "w_max2"); + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx"), "b2"); + ctx.add(new RamTensor(), "relu_output2"); + ctx.add(new RamTensor({1}), "relu_min2"); + ctx.add(new RamTensor({1}), "relu_max2"); + + ctx.add(new RamTensor(), "z_output2"); + ReluLayer(ctx, "relu_output", "relu_min", "relu_max", "w2", "w_min2", "w_max2", "b2", "z_output2"); + + + ctx.add(new RamTensor(), "z_qnt_output2"); + ctx.add(new RamTensor({1}), "z_min2"); + ctx.add(new RamTensor({1}), "z_max2"); + tensorQuantize(ctx, "z_output2", "z_qnt_output2", "z_min2", "z_max2"); + + ctx.push(new ReluOp(), {"z_qnt_output2", "z_min2", "z_max2"}, {"relu_output2", "relu_min2", "relu_max2"}); + + ctx.eval(); + + ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_quint8_const_0.idx"), "w3"); + ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_min_0.idx"), "w2_min"); + ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" + "inputs/Variable_4_max_0.idx"), "w2_max"); + ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx"), "bias2"); + ctx.add(t_import.int_import( + "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" + "y_pred-dimension_0.idx"), "dim"); + + S_TENSOR pred = ctx.add(new RamTensor(), "pred"); + PredLayer(ctx, "relu_output2", "relu_min2", "relu_max2", "pred", "w3", "w2_min", "w2_max", "bias2", "dim"); + ctx.eval(); + + + Tensor* ref_out = t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); + Tensor* ref_pred = TensorCast(ref_out); + + double result = Test::meanPercentErr(ref_pred, pred.get()); -// if (result < 0.0001) { -// printf("PASSED %.8f\r\n\r\n", result); -// } else { -// printf("FAILED %.8f\r\n\r\n", result); -// } - -// return *(pred->read(0, 0)); -// // output layer -// } + if (result < 0.0001) { + printf("PASSED %.8f\r\n\r\n", result); + } else { + printf("FAILED %.8f\r\n\r\n", result); + } + + return *(pred->read(0, 0)); + // output layer +} diff --git a/main.cpp b/main.cpp index f14dbd98..3bf73a95 100644 --- a/main.cpp +++ b/main.cpp @@ -7,15 +7,13 @@ #include "tensorIdxImporterTests.hpp" #include "context.hpp" #include "ArrayTests.hpp" -// #include "MathTests.hpp" -// #include "MatrixTests.hpp" +#include "MatrixTests.hpp" #include "tensor_test.hpp" -// #include "NnTests.hpp" +#include "NnTests.hpp" // #include "mlp_test.hpp" -// #include "deep_mnist_mlp.hpp" +#include "deep_mnist_mlp.hpp" #include "context_test.hpp" #include "MathTests.hpp" -// #include "MatrixTests.hpp" Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -26,11 +24,11 @@ int main(int argc, char** argv) { ON_ERR(bd.init(), "SDBlockDevice init "); ON_ERR(fs.mount(&bd), "Mounting the filesystem on \"/fs\". "); - // printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); - // printf("running deep-mlp...\r\n"); + printf("Deep MLP on Mbed (Trained with Tensorflow)\r\n\r\n"); + printf("running deep-mlp...\r\n"); - // int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - // printf("prediction: %d\r\n\r\n\r\n\r\n", prediction); + int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); + printf("prediction: %d\r\n\r\n\r\n\r\n", prediction); printf("IDX import:\r\n"); idxImporterTest idxTest; @@ -68,17 +66,17 @@ int main(int argc, char** argv) { printf("Math result...\r\n"); mathTests.printSummary(); - // printf("running matrix test:\r\n"); - // matrixOpsTest matrixTests; - // matrixTests.runAll(); - // printf("running matrix result ...\r\n"); - // matrixTests.printSummary(); - - // printf("NnOpS: \r\n"); - // NnOpsTest nnTest; - // nnTest.runAll(); - // printf("Nn Ops result...\r\n"); - // nnTest.printSummary(); + printf("running matrix test:\r\n"); + matrixOpsTest matrixTests; + matrixTests.runAll(); + printf("running matrix result ...\r\n"); + matrixTests.printSummary(); + + printf("NnOpS: \r\n"); + NnOpsTest nnTest; + nnTest.runAll(); + printf("Nn Ops result...\r\n"); + nnTest.printSummary(); /* printf("mlp test: \r\n"); mlpTest mlpt; diff --git a/tensor.hpp b/tensor.hpp index 76ee6040..56614114 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -29,7 +29,13 @@ class uTensor { virtual void inFocus(){}; virtual void deFocus(){}; virtual std::string getName() { return name; } - virtual void setName(std::string _name) { name = _name; } + virtual void setName(std::string _name) { + if(name == "") { + name = _name; + } else { + ERR_EXIT("Tensor %s already has a name %s\r\n", _name.c_str(), name.c_str()); + } + } virtual ~uTensor() = 0; @@ -62,11 +68,11 @@ class Tensor : public uTensor { protected: std::shared_ptr s; // short for states public: - Tensor(TName &_name) { + Tensor() { s = std::make_shared(); s->total_size = 0; s->data = nullptr; - setName(_name); + setName(""); } // returns how far a given dimension is apart @@ -155,9 +161,10 @@ template class RamTensor : public Tensor { // need deep copy public: - RamTensor(TName _name) : Tensor(_name) {} + //RamTensor(TName _name) : Tensor(_name) {} + RamTensor() {}; - RamTensor(std::initializer_list l, TName _name) : Tensor(_name) { + RamTensor(std::initializer_list l) { std::vector v; for (auto i : l) { v.push_back(i); @@ -166,7 +173,7 @@ class RamTensor : public Tensor { Tensor::init(v); } - RamTensor(std::vector v, TName _name) : Tensor(_name) { + RamTensor(std::vector v) { Tensor::init(v); } @@ -218,8 +225,8 @@ class RamTensor : public Tensor { }; template -Tensor* TensorCast(Tensor* input, TName name) { - Tensor* output = new RamTensor(input->getShape(), name); +Tensor* TensorCast(Tensor* input) { + Tensor* output = new RamTensor(input->getShape()); const Tin* inputPrt = input->read(0, 0); Tout* outputPrt = output->write(0, 0); @@ -231,8 +238,8 @@ Tensor* TensorCast(Tensor* input, TName name) { } template -Tensor* TensorConstant(std::vector shape, T c, TName const &name) { - Tensor* output = new RamTensor(shape, name); +Tensor* TensorConstant(std::vector shape, T c) { + Tensor* output = new RamTensor(shape); T* outPrt = output->write(0, 0); for (uint32_t i = 0; i < output->getSize(); i++) { @@ -243,13 +250,13 @@ Tensor* TensorConstant(std::vector shape, T c, TName const &name) { } template -Tensor* TensorConstant(std::initializer_list l, T c, TName const &name) { +Tensor* TensorConstant(std::initializer_list l, T c) { std::vector v; for (auto i : l) { v.push_back(i); } - return TensorConstant(v, c, name); + return TensorConstant(v, c); } // diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index a5f61ab7..3fcaf5cd 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -33,25 +33,25 @@ class TensorIdxImporter { HeaderMeta header; HeaderMeta parseHeader(void); template - Tensor* loader(string& filename, IDX_DTYPE idx_type, string name); + Tensor* loader(string& filename, IDX_DTYPE idx_type); void open(string filename); // void open(FILE *fp); public: - Tensor* ubyte_import(string filename, string name) { - return loader(filename, IDX_DTYPE::idx_ubyte, name); + Tensor* ubyte_import(string filename) { + return loader(filename, IDX_DTYPE::idx_ubyte); } - Tensor* byte_import(string filename, string name) { - return loader(filename, IDX_DTYPE::idx_byte, name); + Tensor* byte_import(string filename) { + return loader(filename, IDX_DTYPE::idx_byte); } - Tensor* short_import(string filename, string name) { - return loader(filename, IDX_DTYPE::idx_short, name); + Tensor* short_import(string filename) { + return loader(filename, IDX_DTYPE::idx_short); } - Tensor* int_import(string filename, string name) { - return loader(filename, IDX_DTYPE::idx_int, name); + Tensor* int_import(string filename) { + return loader(filename, IDX_DTYPE::idx_int); } - Tensor* float_import(string filename, string name) { - return loader(filename, IDX_DTYPE::idx_float, name); + Tensor* float_import(string filename) { + return loader(filename, IDX_DTYPE::idx_float); } uint32_t getMagicNumber(unsigned char dtype, unsigned char dim); uint8_t getIdxDTypeSize(IDX_DTYPE dtype); @@ -65,7 +65,7 @@ class TensorIdxImporter { template -Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type, string name) { +Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { fp = fopen(filename.c_str(), "r"); DEBUG("Opening file %s ", filename.c_str()); @@ -79,7 +79,7 @@ Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type, string n fseek(fp, header.dataPos, SEEK_SET); // need error handling - Tensor* t = new RamTensor(header.dim, name); // tensor allocated + Tensor* t = new RamTensor(header.dim); // tensor allocated const uint8_t unit_size = t->unit_size(); U* val = (U*)malloc(unit_size); diff --git a/tensorIdxImporterTests.hpp b/tensorIdxImporterTests.hpp index 833eb811..3fabce87 100644 --- a/tensorIdxImporterTests.hpp +++ b/tensorIdxImporterTests.hpp @@ -20,7 +20,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx", "uchar1"); + t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx"); timer_stop(); double result = sum(t); passed(result == 4518); @@ -32,7 +32,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx", "short1"); + t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx"); timer_stop(); double result = sum(t); passed(result == 270250); @@ -44,10 +44,10 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx", "int1"); + t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx"); timer_stop(); double result = sum(t); - passed(result == 5748992600); + passed(result == 7158278745); delete t; } @@ -56,7 +56,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx", "float1"); + t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx"); timer_stop(); double result = sum(t); diff --git a/tensor_test.hpp b/tensor_test.hpp index 49e0508f..509cb8d8 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -12,7 +12,7 @@ class tensorTest : public Test { public: void runResize() { testStart("tensortest"); - Tensor* a = new RamTensor({3, 2, 3}, "a"); + Tensor* a = new RamTensor({3, 2, 3}); std::vector v({1, 5, 8}); a->resize(v); bool res = testsize(1 * 5 * 8, a->getSize()); @@ -36,7 +36,7 @@ class transTest : public Test { std::default_random_engine gen; vector tmp({2, 3, 4, 5}); std::string a_s = "input" + std::to_string(i); - S_TENSOR inputTensor = ctx.add(defer(new RamTensor(tmp, a_s))); + S_TENSOR inputTensor = ctx.add(new RamTensor(tmp), a_s); vector permute = {2, 3, 1, 0}; vector g = inputTensor->getShape(); std::shuffle(permute.begin(), permute.end(), gen); @@ -44,7 +44,7 @@ class transTest : public Test { permuteIndexTransform trans(inputTensor->getShape(), permute); std::string a_o = "output" + std::to_string(i); - S_TENSOR output = ctx.add(defer(new RamTensor(trans.getNewShape(), a_o))); + S_TENSOR output = ctx.add(new RamTensor(trans.getNewShape()), a_o); vector s = output->getShape(); res = testshape(g, s, permute); if (!res) { @@ -61,7 +61,7 @@ class transTest : public Test { vector output_1({2, 2, 3, 5, 6, 6, 4, 5, 7, 5, 1, 9, 1, 3, 2, 2, 5, 3, 3, 6, 3, 4, 9, 2}); - S_TENSOR inputTensor2 = ctx.add(defer(new RamTensor({2, 3, 4}, "inputTensor2"))); + S_TENSOR inputTensor2 = ctx.add(new RamTensor({2, 3, 4}), "inputTensor2"); vector permute = {0, 2, 1}; permuteIndexTransform trans(inputTensor2->getShape(), permute); @@ -87,7 +87,7 @@ class transTest : public Test { vector output_2({2, 1, 2, 3, 3, 2, 5, 2, 6, 5, 6, 3, 4, 3, 5, 6, 7, 3, 5, 4, 1, 9, 9, 2}); - S_TENSOR inputTensor3 = ctx.add(defer(new RamTensor({2, 4, 3}, "inputTensor3"))); + S_TENSOR inputTensor3 = ctx.add(new RamTensor({2, 4, 3}), "inputTensor3"); vector permute2 = {1, 2, 0}; permuteIndexTransform trans2(inputTensor3->getShape(), permute2); testStart("test vec 2 for transform"); @@ -107,7 +107,7 @@ class transTest : public Test { vector output_3({8, 2, 8, 1, 0, 3, 4, 6, 2, 6, 0, 6, 3, 9, 2, 7, 0, 7, 0, 4, 8, 9, 0, 4, 3, 6, 8}); - S_TENSOR inputTensor4 = ctx.add(defer(new RamTensor({1, 3, 3, 3}, "inputTensor4"))); + S_TENSOR inputTensor4 = ctx.add(new RamTensor({1, 3, 3, 3}), "inputTensor4"); vector permute3 = {0, 3, 2, 1}; permuteIndexTransform trans3(inputTensor4->getShape(), permute3); testStart("test vec 4d for transform"); diff --git a/uTensorBase.cpp b/uTensorBase.cpp index 57b0ec91..18d583dc 100644 --- a/uTensorBase.cpp +++ b/uTensorBase.cpp @@ -12,3 +12,8 @@ void Operator::setOutputs(S_TList &_outputs) { outputs = _outputs; } + +void Operator::empty(void) { + inputs.empty(); + outputs.empty(); +} diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 7886705d..303269bc 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -24,6 +24,7 @@ class Operator : public uTensor { S_TList getOutputs(void) { return outputs;} uint8_t getNumInputs(void) { return n_inputs; } uint8_t getNumOutputs(void) { return n_outputs; } + void empty(void); Operator() { n_inputs = 0; //overridden by constructor From ae3a7a46c37ad4aef040fe9b07ebfe3bdf65fde1 Mon Sep 17 00:00:00 2001 From: Neil Tan Date: Sat, 2 Dec 2017 18:47:45 -0800 Subject: [PATCH 79/80] updated comment in context.cpp --- context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/context.cpp b/context.cpp index fd407f21..c6c7a484 100644 --- a/context.cpp +++ b/context.cpp @@ -29,7 +29,6 @@ S_TENSOR Context::addCached(std::function func, TName _name, uint8_ S_TENSOR Context::add(Tensor* t, TName _name, uint8_t init_count) { if(t == nullptr) { ERR_EXIT("null pointer tensor"); } if(rTable.find(_name) != rTable.end()) { - ///NT: TODO: check stateful here ERR_EXIT("tensor with name \"%s\" address already exist in rTable", t->getName().c_str()); } @@ -210,6 +209,7 @@ int Context::eval(void) { uint32_t Context::gc(void) { TNameList nlist; + ///NT: TODO: implement cache policy here for ( auto it : rTable) { Ref_Record r = it.second; From 9c7fcb17593878155dedac1e7b7c43e01c3a3a87 Mon Sep 17 00:00:00 2001 From: dboyliao Date: Sat, 9 Dec 2017 00:13:16 +0800 Subject: [PATCH 80/80] Supporting scalar tensor broadcasting --- MathOps.hpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/MathOps.hpp b/MathOps.hpp index bea3b6d3..d23a088b 100644 --- a/MathOps.hpp +++ b/MathOps.hpp @@ -273,13 +273,20 @@ void Add(S_TENSOR input, S_TENSOR input2, S_TENSOR out) { const TIn* p_in2 = input2->read(0, 0); //auto shape - out->resize(input->getShape()); + // [FIXME] hacking for broadcasting scalar tensor + const uint32_t size1 = input->getSize(); + const uint32_t size2 = input2->getSize(); + if (size2 == 1) { + out->resize(input->getShape()); + } else { + out->resize(input2->getShape()); + } TOut* p_out = out->write(0, 0); const uint32_t size = out->getSize(); for (uint32_t i = 0; i < size; i++) { - p_out[i] = p_in[i] + p_in2[i]; + p_out[i] = p_in[size1 > 1 ? i : 0] + p_in2[size2 > 1 ? i : 0]; } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy