-
-
Notifications
You must be signed in to change notification settings - Fork 56.2k
[WIP] [GSOC] GGUF Importer #27177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
nklskyoy
wants to merge
16
commits into
opencv:5.x
Choose a base branch
from
nklskyoy:llm-prototype
base: 5.x
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
[WIP] [GSOC] GGUF Importer #27177
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
132aec6
gguf importer: enable vanill attention parsing
nklskyoy f45c5f6
expose readNetFromGGUF fn
nklskyoy 07f6068
encapsulate buffer logic into separate struct
nklskyoy 493a367
fix attn_qkv weight matrix name
nklskyoy d96ae2c
add test fil
nklskyoy e887303
fix attention parsing
nklskyoy 3cb3c69
gguf importer call netimpl->prepareForInference();
nklskyoy d55d585
enable parsing of 1D tensors
nklskyoy 2c3560f
fix read2DMat
nklskyoy a8a10bc
ggufImporter: proper attention init
nklskyoy 87aecd0
getTensor fix exception on unsup. Mat shape
nklskyoy 0fcb34c
test against single-block pytorch attention
nklskyoy 184a3da
Test_GGUFImporter cleanup
nklskyoy 538c99b
major code refactor
nklskyoy e71705e
code refactor 2
nklskyoy f4ce156
test input naming
nklskyoy File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
#include "../precomp.hpp" | ||
#include "gguf_buffer.hpp" | ||
|
||
|
||
namespace cv { namespace dnn { | ||
CV__DNN_INLINE_NS_BEGIN | ||
|
||
GGUFBuffer::GGUFBuffer(const std::string & fileName){ | ||
std::ifstream file(fileName, std::ios::binary | std::ios::ate); | ||
if (!file.is_open()) { | ||
throw std::runtime_error("Could not open file: "); | ||
} | ||
|
||
// Get the size of the file and prepare a buffer | ||
const std::streamsize size = file.tellg(); | ||
file.seekg(0, std::ios::beg); | ||
buf.resize(size); | ||
|
||
// Read the file content into the buffer | ||
if (!file.read(reinterpret_cast<char*>(buf.data()), size)) { | ||
throw std::runtime_error("Error reading file: " ); | ||
} | ||
Comment on lines
+20
to
+22
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same for error handling. |
||
} | ||
|
||
DictValue GGUFBufferReader::readSingleValue(uint32_t type) { | ||
switch (type) { | ||
case GGUF_METADATA_VALUE_TYPE_UINT8: | ||
return DictValue(readSingleValue<uint8_t, int>()); | ||
case GGUF_METADATA_VALUE_TYPE_INT8: | ||
return DictValue(readSingleValue<int8_t, int>()); | ||
case GGUF_METADATA_VALUE_TYPE_UINT16: | ||
return DictValue(readSingleValue<uint16_t, int>()); | ||
case GGUF_METADATA_VALUE_TYPE_INT16: | ||
return DictValue(readSingleValue<int16_t, int>()); | ||
case GGUF_METADATA_VALUE_TYPE_UINT32: | ||
return DictValue(readSingleValue<uint32_t, int>()); | ||
case GGUF_METADATA_VALUE_TYPE_INT32: | ||
return DictValue(readSingleValue<int32_t, int>()); | ||
case GGUF_METADATA_VALUE_TYPE_FLOAT32: | ||
return DictValue(readSingleValue<float, float>()); | ||
case GGUF_METADATA_VALUE_TYPE_BOOL: | ||
return DictValue(readSingleValue<uint8_t, int>() != 0); | ||
case GGUF_METADATA_VALUE_TYPE_STRING: | ||
return DictValue(readString()); | ||
case GGUF_METADATA_VALUE_TYPE_UINT64: | ||
return DictValue(readSingleValue<uint64_t, int64>()); | ||
case GGUF_METADATA_VALUE_TYPE_FLOAT64: | ||
return DictValue(readSingleValue<float, double>()); | ||
case GGUF_METADATA_VALUE_TYPE_ARRAY: | ||
throw std::runtime_error("Tried to parse array as single value"); | ||
default: | ||
throw std::runtime_error("Unsupported metadata type: " + std::to_string(type)); | ||
} | ||
} | ||
|
||
DictValue GGUFBufferReader::readIntArray(int n, gguf_metadata_value_type type) { | ||
std::vector<int> arr(n); | ||
for (int i = 0; i < n; ++i) { | ||
arr[i] = readSingleValueInt(type); | ||
} | ||
return DictValue::arrayInt(arr.begin(), arr.size()); | ||
} | ||
|
||
DictValue GGUFBufferReader::readRealArray(int n, gguf_metadata_value_type type) { | ||
std::vector<double> arr(n); | ||
for (int i = 0; i < n; ++i) { | ||
arr[i] = readSingleValueReal(type); | ||
} | ||
return DictValue::arrayReal(arr.begin(), arr.size()); | ||
} | ||
|
||
DictValue GGUFBufferReader::readStringArray(int n) { | ||
std::vector<std::string> arr(n); | ||
for (int i = 0; i < n; ++i) { | ||
arr[i] = readString(); | ||
} | ||
return DictValue::arrayString(arr.begin(), arr.size()); | ||
} | ||
|
||
double GGUFBufferReader::readSingleValueReal(gguf_metadata_value_type type) { | ||
switch (type) { | ||
case GGUF_METADATA_VALUE_TYPE_FLOAT32: | ||
return readSingleValueReal<float>(); | ||
case GGUF_METADATA_VALUE_TYPE_FLOAT64: | ||
return readSingleValueReal<double>(); | ||
default: | ||
throw std::runtime_error("Unsupported metadata type: " + std::to_string(type)); | ||
} | ||
} | ||
|
||
int64 GGUFBufferReader::readSingleValueInt(gguf_metadata_value_type type) { | ||
switch (type) { | ||
case GGUF_METADATA_VALUE_TYPE_UINT8: | ||
return readSingleValueInt<uint8_t>(); | ||
case GGUF_METADATA_VALUE_TYPE_INT8: | ||
return readSingleValueInt<int8_t>(); | ||
case GGUF_METADATA_VALUE_TYPE_UINT16: | ||
return readSingleValueInt<uint16_t>(); | ||
case GGUF_METADATA_VALUE_TYPE_INT16: | ||
return readSingleValueInt<int16_t>(); | ||
case GGUF_METADATA_VALUE_TYPE_UINT32: | ||
return readSingleValueInt<uint32_t>(); | ||
case GGUF_METADATA_VALUE_TYPE_INT32: | ||
return readSingleValueInt<int32_t>(); | ||
default: | ||
throw std::runtime_error("Unsupported metadata type: " + std::to_string(type)); | ||
} | ||
} | ||
|
||
std::string GGUFBufferReader::readString() { | ||
uint32_t str_len = readSingleValue<uint64_t, int>(); // 28 | ||
std::string str(reinterpret_cast<const char*>(buffer->buf.data() + current_offset), str_len); | ||
current_offset += str_len; | ||
return str; | ||
} | ||
|
||
Mat GGUFBufferReader::read2DMat(ggml_type type, size_t rows, size_t cols, size_t offset) { | ||
if (type != GGML_TYPE_F32) { | ||
throw std::runtime_error("Unsupported tensor type: " + std::to_string(type)); | ||
} | ||
const float* dataPtr = reinterpret_cast<const float*>(buffer->buf.data() + current_offset + offset); | ||
|
||
Mat mat((int)rows, (int)cols, CV_32F); | ||
// for (size_t i = 0; i < cols; i++) { | ||
// for (size_t j = 0; j < rows; j++) { | ||
// printf("%d,%d, -- %f \n", (int)j, (int)i, dataPtr[i * cols + j]); | ||
// mat.at<float>((int)j, (int)i) = dataPtr[i * cols + j]; | ||
// } | ||
// } | ||
|
||
for (size_t i = 0; i < rows; i++) { | ||
for (size_t j = 0; j < cols; j++) { | ||
float value = dataPtr[i * cols + j]; // row-major access | ||
printf("%d,%d -- %f\n", (int)i, (int)j, value); | ||
mat.at<float>((int)i, (int)j) = value; | ||
} | ||
} | ||
return mat; | ||
} | ||
|
||
|
||
|
||
|
||
Mat GGUFBufferReader::read1DMat(ggml_type type, size_t rows, size_t offset) { | ||
if (type != GGML_TYPE_F32) { | ||
throw std::runtime_error("Unsupported tensor type: " + std::to_string(type)); | ||
} | ||
const float* dataPtr = reinterpret_cast<const float*>(buffer->buf.data() + current_offset + offset); | ||
|
||
Mat mat(rows, 1, CV_32F); | ||
for (size_t row = 0; row < rows; row++) { | ||
printf("r: %d, -- %f \n", (int)row, dataPtr[row]); | ||
mat.at<float>((int)row,0) = dataPtr[row]; | ||
} | ||
|
||
return mat; | ||
} | ||
|
||
CV__DNN_INLINE_NS_END | ||
}} | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#include "../precomp.hpp" | ||
|
||
#ifndef __OPENCV_GGUFBUFFER_HPP__ | ||
#define __OPENCV_GGUFBUFFER_HPP__ | ||
|
||
|
||
namespace cv { namespace dnn { | ||
CV__DNN_INLINE_NS_BEGIN | ||
|
||
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md | ||
enum ggml_type : uint32_t { | ||
GGML_TYPE_F32 = 0, | ||
GGML_TYPE_F16 = 1, | ||
GGML_TYPE_Q4_0 = 2, | ||
GGML_TYPE_Q4_1 = 3, | ||
GGML_TYPE_Q5_0 = 6, | ||
GGML_TYPE_Q5_1 = 7, | ||
GGML_TYPE_Q8_0 = 8, | ||
GGML_TYPE_Q8_1 = 9, | ||
GGML_TYPE_Q2_K = 10, | ||
GGML_TYPE_Q3_K = 11, | ||
GGML_TYPE_Q4_K = 12, | ||
GGML_TYPE_Q5_K = 13, | ||
GGML_TYPE_Q6_K = 14, | ||
GGML_TYPE_Q8_K = 15, | ||
GGML_TYPE_IQ2_XXS = 16, | ||
GGML_TYPE_IQ2_XS = 17, | ||
GGML_TYPE_IQ3_XXS = 18, | ||
GGML_TYPE_IQ1_S = 19, | ||
GGML_TYPE_IQ4_NL = 20, | ||
GGML_TYPE_IQ3_S = 21, | ||
GGML_TYPE_IQ2_S = 22, | ||
GGML_TYPE_IQ4_XS = 23, | ||
GGML_TYPE_I8 = 24, | ||
GGML_TYPE_I16 = 25, | ||
GGML_TYPE_I32 = 26, | ||
GGML_TYPE_I64 = 27, | ||
GGML_TYPE_F64 = 28, | ||
GGML_TYPE_IQ1_M = 29, | ||
GGML_TYPE_COUNT, | ||
}; | ||
|
||
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md | ||
enum gguf_metadata_value_type : uint32_t { | ||
GGUF_METADATA_VALUE_TYPE_UINT8 = 0, | ||
GGUF_METADATA_VALUE_TYPE_INT8 = 1, | ||
GGUF_METADATA_VALUE_TYPE_UINT16 = 2, | ||
GGUF_METADATA_VALUE_TYPE_INT16 = 3, | ||
GGUF_METADATA_VALUE_TYPE_UINT32 = 4, | ||
GGUF_METADATA_VALUE_TYPE_INT32 = 5, | ||
GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6, | ||
GGUF_METADATA_VALUE_TYPE_BOOL = 7, | ||
GGUF_METADATA_VALUE_TYPE_STRING = 8, | ||
GGUF_METADATA_VALUE_TYPE_ARRAY = 9, | ||
GGUF_METADATA_VALUE_TYPE_UINT64 = 10, | ||
GGUF_METADATA_VALUE_TYPE_INT64 = 11, | ||
GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12, | ||
}; | ||
|
||
struct GGUFBuffer | ||
{ | ||
GGUFBuffer(const std::string & fileName); | ||
std::vector<uint8_t> buf; | ||
}; | ||
|
||
struct GGUFBufferReader | ||
{ | ||
GGUFBufferReader(Ptr<const GGUFBuffer> buffer) : buffer(buffer), current_offset(0) {} | ||
// read single value | ||
template<typename T,typename R> R readSingleValue(); | ||
template<typename T>int readSingleValueInt(); | ||
int64 readSingleValueInt(gguf_metadata_value_type type); | ||
template<typename T>double readSingleValueReal(); | ||
double readSingleValueReal(gguf_metadata_value_type type); | ||
std::string readString(); | ||
DictValue readSingleValue(uint32_t type); | ||
|
||
// read array | ||
DictValue readIntArray(int n, gguf_metadata_value_type type); | ||
DictValue readRealArray(int n, gguf_metadata_value_type type); | ||
DictValue readStringArray(int n); | ||
|
||
// Mat | ||
Mat read2DMat(ggml_type type, size_t rows, size_t cols, size_t offset); | ||
Mat read1DMat(ggml_type type, size_t rows, size_t offset); | ||
Ptr<const GGUFBuffer> buffer; | ||
size_t current_offset; | ||
}; | ||
|
||
template<typename T,typename R> | ||
R GGUFBufferReader::readSingleValue() { | ||
T value = *reinterpret_cast<const T*>(buffer->buf.data() + current_offset); | ||
current_offset += sizeof(T); | ||
return static_cast<R>(value); | ||
} | ||
|
||
template <typename T, typename R> | ||
R checkRange(T value) { | ||
using Common = typename std::common_type<T, R>::type; | ||
if (static_cast<Common>(value) < static_cast<Common>(std::numeric_limits<R>::min()) || | ||
static_cast<Common>(value) > static_cast<Common>(std::numeric_limits<R>::max())) { | ||
throw std::out_of_range("Value out of range"); | ||
} | ||
return value; | ||
} | ||
|
||
// Parse single int value | ||
template<typename T> | ||
int GGUFBufferReader::readSingleValueInt() { | ||
T value = *reinterpret_cast<const T*>(buffer->buf.data() + current_offset); | ||
current_offset += sizeof(T); | ||
return value; | ||
} | ||
|
||
// Parse single float value | ||
template<typename T> | ||
double GGUFBufferReader::readSingleValueReal() { | ||
T value = *reinterpret_cast<const T*>(buffer->buf.data() + current_offset); | ||
current_offset += sizeof(T); | ||
return checkRange<T,float>(value) ; | ||
} | ||
|
||
CV__DNN_INLINE_NS_END | ||
}} | ||
|
||
#endif // __OPENCV_GGUFBUFFER_HPP__ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
|
||
#include "../precomp.hpp" | ||
#include "../net_impl.hpp" | ||
#include <opencv2/dnn/layer_reg.private.hpp> | ||
#include "gguf_importer.hpp" | ||
#include "gguf_parser.hpp" | ||
// #include <opencv2/core.hpp> | ||
|
||
|
||
#include <fstream> | ||
|
||
namespace cv { namespace dnn { | ||
CV__DNN_INLINE_NS_BEGIN | ||
|
||
GGUFImporter::GGUFImporter(const String& ggufFileName) { | ||
netimpl = net.getImpl(); | ||
ggufFile = makePtr<GGUFParser>(ggufFileName); | ||
|
||
netInput = netimpl->newArg("input", DNN_ARG_INPUT, true); | ||
netOutput = netimpl->newArg("output", DNN_ARG_OUTPUT, true ); | ||
ArgData inputdata = netimpl->args.at(netInput.idx); | ||
inputdata.shape = ggufFile->blocks[0].getInputShape(); | ||
// @TODO make more flexible | ||
inputdata.type = CV_32F; | ||
|
||
ArgData outputdata = netimpl->args.at(netOutput.idx); | ||
outputdata.shape = ggufFile->blocks[ggufFile->blocks.size() - 1].getOutputShape(); // output size; | ||
outputdata.type = CV_32F; | ||
|
||
netimpl->args[netInput.idx] = inputdata; | ||
netimpl->args[netOutput.idx] = outputdata; | ||
|
||
graph = netimpl->newGraph("VanillaAttention", {netInput}, true); | ||
graph->setOutputs({netOutput}); | ||
} | ||
|
||
Net GGUFImporter::constructNet() { | ||
std::vector<Arg> blockInputs = {netInput}; | ||
std::vector<Arg> blockOutputs = {netOutput}; | ||
|
||
for (auto& block : ggufFile->blocks){ | ||
addBlock(block, blockInputs, blockOutputs); | ||
} | ||
|
||
graph->setProg(prog); | ||
netimpl->prepareForInference(); | ||
|
||
return net; | ||
} | ||
|
||
void GGUFImporter::addBlock(BlockMetadata block, std::vector<Arg>& blockInputs, std::vector<Arg>& blockOutputs) { | ||
bool is_final_block = block.blockn == ggufFile->blocks.size() - 1; | ||
// ArgKind outputArgKind = is_final_block ? DNN_ARG_OUTPUT : DNN_ARG_TEMP; | ||
// Add attention | ||
LayerParams layerParams; | ||
// @TODO need to rework the naming system | ||
std::string outArgName = is_final_block ? "output" : ""; | ||
Arg out = netimpl->getArg(outArgName); | ||
if(!is_final_block) { | ||
ArgData outData = netimpl->args.at(out.idx); | ||
outData.shape = block.getOutputShape(); | ||
// @TODO make more flexible | ||
outData.type = CV_32F; | ||
netimpl->args[out.idx] = outData; | ||
} | ||
|
||
// Attention layer | ||
block.getAttentionLayerParams(ggufFile->tensor_reader, layerParams); | ||
Ptr<Layer> layer = LayerFactory::createLayerInstance(layerParams.type, layerParams); | ||
layer->netimpl = netimpl; | ||
layer->inputs = blockInputs; | ||
layer->outputs = {out}; | ||
prog.push_back(layer); | ||
|
||
blockOutputs = {out}; | ||
} | ||
|
||
Net readNetFromGGUF(const String& ggufFileName){ | ||
GGUFImporter importer(ggufFileName); | ||
return importer.constructNet(); | ||
} | ||
|
||
CV__DNN_INLINE_NS_END | ||
|
||
}} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use CV_Error, CV_Assert, etc. See https://docs.opencv.org/5.x/db/de0/group__core__utils.html#ga5b48c333c777666e076bd7052799f891