Skip to content

[WIP] [GSOC] GGUF Importer #27177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 16 commits into
base: 5.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions modules/dnn/include/opencv2/dnn/dnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1236,6 +1236,11 @@ CV__DNN_INLINE_NS_BEGIN
const uchar* bufferWeightsPtr, size_t bufferWeightsSize);




CV_EXPORTS_W Net readNetFromGGUF(CV_WRAP_FILE_PATH const String &ggufFile);


/** @brief Reads a network model <a href="https://onnx.ai/">ONNX</a>.
* @param onnxFile path to the .onnx file with text description of the network architecture.
* @param engine select DNN engine to be used. With auto selection the new engine is used first and falls back to classic.
Expand Down
161 changes: 161 additions & 0 deletions modules/dnn/src/llm/gguf_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#include "../precomp.hpp"
#include "gguf_buffer.hpp"


namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN

GGUFBuffer::GGUFBuffer(const std::string & fileName){
std::ifstream file(fileName, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
throw std::runtime_error("Could not open file: ");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

// Get the size of the file and prepare a buffer
const std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
buf.resize(size);

// Read the file content into the buffer
if (!file.read(reinterpret_cast<char*>(buf.data()), size)) {
throw std::runtime_error("Error reading file: " );
}
Comment on lines +20 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same for error handling.

}

DictValue GGUFBufferReader::readSingleValue(uint32_t type) {
switch (type) {
case GGUF_METADATA_VALUE_TYPE_UINT8:
return DictValue(readSingleValue<uint8_t, int>());
case GGUF_METADATA_VALUE_TYPE_INT8:
return DictValue(readSingleValue<int8_t, int>());
case GGUF_METADATA_VALUE_TYPE_UINT16:
return DictValue(readSingleValue<uint16_t, int>());
case GGUF_METADATA_VALUE_TYPE_INT16:
return DictValue(readSingleValue<int16_t, int>());
case GGUF_METADATA_VALUE_TYPE_UINT32:
return DictValue(readSingleValue<uint32_t, int>());
case GGUF_METADATA_VALUE_TYPE_INT32:
return DictValue(readSingleValue<int32_t, int>());
case GGUF_METADATA_VALUE_TYPE_FLOAT32:
return DictValue(readSingleValue<float, float>());
case GGUF_METADATA_VALUE_TYPE_BOOL:
return DictValue(readSingleValue<uint8_t, int>() != 0);
case GGUF_METADATA_VALUE_TYPE_STRING:
return DictValue(readString());
case GGUF_METADATA_VALUE_TYPE_UINT64:
return DictValue(readSingleValue<uint64_t, int64>());
case GGUF_METADATA_VALUE_TYPE_FLOAT64:
return DictValue(readSingleValue<float, double>());
case GGUF_METADATA_VALUE_TYPE_ARRAY:
throw std::runtime_error("Tried to parse array as single value");
default:
throw std::runtime_error("Unsupported metadata type: " + std::to_string(type));
}
}

DictValue GGUFBufferReader::readIntArray(int n, gguf_metadata_value_type type) {
std::vector<int> arr(n);
for (int i = 0; i < n; ++i) {
arr[i] = readSingleValueInt(type);
}
return DictValue::arrayInt(arr.begin(), arr.size());
}

DictValue GGUFBufferReader::readRealArray(int n, gguf_metadata_value_type type) {
std::vector<double> arr(n);
for (int i = 0; i < n; ++i) {
arr[i] = readSingleValueReal(type);
}
return DictValue::arrayReal(arr.begin(), arr.size());
}

DictValue GGUFBufferReader::readStringArray(int n) {
std::vector<std::string> arr(n);
for (int i = 0; i < n; ++i) {
arr[i] = readString();
}
return DictValue::arrayString(arr.begin(), arr.size());
}

double GGUFBufferReader::readSingleValueReal(gguf_metadata_value_type type) {
switch (type) {
case GGUF_METADATA_VALUE_TYPE_FLOAT32:
return readSingleValueReal<float>();
case GGUF_METADATA_VALUE_TYPE_FLOAT64:
return readSingleValueReal<double>();
default:
throw std::runtime_error("Unsupported metadata type: " + std::to_string(type));
}
}

int64 GGUFBufferReader::readSingleValueInt(gguf_metadata_value_type type) {
switch (type) {
case GGUF_METADATA_VALUE_TYPE_UINT8:
return readSingleValueInt<uint8_t>();
case GGUF_METADATA_VALUE_TYPE_INT8:
return readSingleValueInt<int8_t>();
case GGUF_METADATA_VALUE_TYPE_UINT16:
return readSingleValueInt<uint16_t>();
case GGUF_METADATA_VALUE_TYPE_INT16:
return readSingleValueInt<int16_t>();
case GGUF_METADATA_VALUE_TYPE_UINT32:
return readSingleValueInt<uint32_t>();
case GGUF_METADATA_VALUE_TYPE_INT32:
return readSingleValueInt<int32_t>();
default:
throw std::runtime_error("Unsupported metadata type: " + std::to_string(type));
}
}

std::string GGUFBufferReader::readString() {
uint32_t str_len = readSingleValue<uint64_t, int>(); // 28
std::string str(reinterpret_cast<const char*>(buffer->buf.data() + current_offset), str_len);
current_offset += str_len;
return str;
}

Mat GGUFBufferReader::read2DMat(ggml_type type, size_t rows, size_t cols, size_t offset) {
if (type != GGML_TYPE_F32) {
throw std::runtime_error("Unsupported tensor type: " + std::to_string(type));
}
const float* dataPtr = reinterpret_cast<const float*>(buffer->buf.data() + current_offset + offset);

Mat mat((int)rows, (int)cols, CV_32F);
// for (size_t i = 0; i < cols; i++) {
// for (size_t j = 0; j < rows; j++) {
// printf("%d,%d, -- %f \n", (int)j, (int)i, dataPtr[i * cols + j]);
// mat.at<float>((int)j, (int)i) = dataPtr[i * cols + j];
// }
// }

for (size_t i = 0; i < rows; i++) {
for (size_t j = 0; j < cols; j++) {
float value = dataPtr[i * cols + j]; // row-major access
printf("%d,%d -- %f\n", (int)i, (int)j, value);
mat.at<float>((int)i, (int)j) = value;
}
}
return mat;
}




Mat GGUFBufferReader::read1DMat(ggml_type type, size_t rows, size_t offset) {
if (type != GGML_TYPE_F32) {
throw std::runtime_error("Unsupported tensor type: " + std::to_string(type));
}
const float* dataPtr = reinterpret_cast<const float*>(buffer->buf.data() + current_offset + offset);

Mat mat(rows, 1, CV_32F);
for (size_t row = 0; row < rows; row++) {
printf("r: %d, -- %f \n", (int)row, dataPtr[row]);
mat.at<float>((int)row,0) = dataPtr[row];
}

return mat;
}

CV__DNN_INLINE_NS_END
}}

126 changes: 126 additions & 0 deletions modules/dnn/src/llm/gguf_buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include "../precomp.hpp"

#ifndef __OPENCV_GGUFBUFFER_HPP__
#define __OPENCV_GGUFBUFFER_HPP__


namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN

// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
enum ggml_type : uint32_t {
GGML_TYPE_F32 = 0,
GGML_TYPE_F16 = 1,
GGML_TYPE_Q4_0 = 2,
GGML_TYPE_Q4_1 = 3,
GGML_TYPE_Q5_0 = 6,
GGML_TYPE_Q5_1 = 7,
GGML_TYPE_Q8_0 = 8,
GGML_TYPE_Q8_1 = 9,
GGML_TYPE_Q2_K = 10,
GGML_TYPE_Q3_K = 11,
GGML_TYPE_Q4_K = 12,
GGML_TYPE_Q5_K = 13,
GGML_TYPE_Q6_K = 14,
GGML_TYPE_Q8_K = 15,
GGML_TYPE_IQ2_XXS = 16,
GGML_TYPE_IQ2_XS = 17,
GGML_TYPE_IQ3_XXS = 18,
GGML_TYPE_IQ1_S = 19,
GGML_TYPE_IQ4_NL = 20,
GGML_TYPE_IQ3_S = 21,
GGML_TYPE_IQ2_S = 22,
GGML_TYPE_IQ4_XS = 23,
GGML_TYPE_I8 = 24,
GGML_TYPE_I16 = 25,
GGML_TYPE_I32 = 26,
GGML_TYPE_I64 = 27,
GGML_TYPE_F64 = 28,
GGML_TYPE_IQ1_M = 29,
GGML_TYPE_COUNT,
};

// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
enum gguf_metadata_value_type : uint32_t {
GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
GGUF_METADATA_VALUE_TYPE_INT8 = 1,
GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
GGUF_METADATA_VALUE_TYPE_INT16 = 3,
GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
GGUF_METADATA_VALUE_TYPE_INT32 = 5,
GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
GGUF_METADATA_VALUE_TYPE_BOOL = 7,
GGUF_METADATA_VALUE_TYPE_STRING = 8,
GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
GGUF_METADATA_VALUE_TYPE_UINT64 = 10,
GGUF_METADATA_VALUE_TYPE_INT64 = 11,
GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12,
};

struct GGUFBuffer
{
GGUFBuffer(const std::string & fileName);
std::vector<uint8_t> buf;
};

struct GGUFBufferReader
{
GGUFBufferReader(Ptr<const GGUFBuffer> buffer) : buffer(buffer), current_offset(0) {}
// read single value
template<typename T,typename R> R readSingleValue();
template<typename T>int readSingleValueInt();
int64 readSingleValueInt(gguf_metadata_value_type type);
template<typename T>double readSingleValueReal();
double readSingleValueReal(gguf_metadata_value_type type);
std::string readString();
DictValue readSingleValue(uint32_t type);

// read array
DictValue readIntArray(int n, gguf_metadata_value_type type);
DictValue readRealArray(int n, gguf_metadata_value_type type);
DictValue readStringArray(int n);

// Mat
Mat read2DMat(ggml_type type, size_t rows, size_t cols, size_t offset);
Mat read1DMat(ggml_type type, size_t rows, size_t offset);
Ptr<const GGUFBuffer> buffer;
size_t current_offset;
};

template<typename T,typename R>
R GGUFBufferReader::readSingleValue() {
T value = *reinterpret_cast<const T*>(buffer->buf.data() + current_offset);
current_offset += sizeof(T);
return static_cast<R>(value);
}

template <typename T, typename R>
R checkRange(T value) {
using Common = typename std::common_type<T, R>::type;
if (static_cast<Common>(value) < static_cast<Common>(std::numeric_limits<R>::min()) ||
static_cast<Common>(value) > static_cast<Common>(std::numeric_limits<R>::max())) {
throw std::out_of_range("Value out of range");
}
return value;
}

// Parse single int value
template<typename T>
int GGUFBufferReader::readSingleValueInt() {
T value = *reinterpret_cast<const T*>(buffer->buf.data() + current_offset);
current_offset += sizeof(T);
return value;
}

// Parse single float value
template<typename T>
double GGUFBufferReader::readSingleValueReal() {
T value = *reinterpret_cast<const T*>(buffer->buf.data() + current_offset);
current_offset += sizeof(T);
return checkRange<T,float>(value) ;
}

CV__DNN_INLINE_NS_END
}}

#endif // __OPENCV_GGUFBUFFER_HPP__
85 changes: 85 additions & 0 deletions modules/dnn/src/llm/gguf_importer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

#include "../precomp.hpp"
#include "../net_impl.hpp"
#include <opencv2/dnn/layer_reg.private.hpp>
#include "gguf_importer.hpp"
#include "gguf_parser.hpp"
// #include <opencv2/core.hpp>


#include <fstream>

namespace cv { namespace dnn {
CV__DNN_INLINE_NS_BEGIN

GGUFImporter::GGUFImporter(const String& ggufFileName) {
netimpl = net.getImpl();
ggufFile = makePtr<GGUFParser>(ggufFileName);

netInput = netimpl->newArg("input", DNN_ARG_INPUT, true);
netOutput = netimpl->newArg("output", DNN_ARG_OUTPUT, true );
ArgData inputdata = netimpl->args.at(netInput.idx);
inputdata.shape = ggufFile->blocks[0].getInputShape();
// @TODO make more flexible
inputdata.type = CV_32F;

ArgData outputdata = netimpl->args.at(netOutput.idx);
outputdata.shape = ggufFile->blocks[ggufFile->blocks.size() - 1].getOutputShape(); // output size;
outputdata.type = CV_32F;

netimpl->args[netInput.idx] = inputdata;
netimpl->args[netOutput.idx] = outputdata;

graph = netimpl->newGraph("VanillaAttention", {netInput}, true);
graph->setOutputs({netOutput});
}

Net GGUFImporter::constructNet() {
std::vector<Arg> blockInputs = {netInput};
std::vector<Arg> blockOutputs = {netOutput};

for (auto& block : ggufFile->blocks){
addBlock(block, blockInputs, blockOutputs);
}

graph->setProg(prog);
netimpl->prepareForInference();

return net;
}

void GGUFImporter::addBlock(BlockMetadata block, std::vector<Arg>& blockInputs, std::vector<Arg>& blockOutputs) {
bool is_final_block = block.blockn == ggufFile->blocks.size() - 1;
// ArgKind outputArgKind = is_final_block ? DNN_ARG_OUTPUT : DNN_ARG_TEMP;
// Add attention
LayerParams layerParams;
// @TODO need to rework the naming system
std::string outArgName = is_final_block ? "output" : "";
Arg out = netimpl->getArg(outArgName);
if(!is_final_block) {
ArgData outData = netimpl->args.at(out.idx);
outData.shape = block.getOutputShape();
// @TODO make more flexible
outData.type = CV_32F;
netimpl->args[out.idx] = outData;
}

// Attention layer
block.getAttentionLayerParams(ggufFile->tensor_reader, layerParams);
Ptr<Layer> layer = LayerFactory::createLayerInstance(layerParams.type, layerParams);
layer->netimpl = netimpl;
layer->inputs = blockInputs;
layer->outputs = {out};
prog.push_back(layer);

blockOutputs = {out};
}

Net readNetFromGGUF(const String& ggufFileName){
GGUFImporter importer(ggufFileName);
return importer.constructNet();
}

CV__DNN_INLINE_NS_END

}}
Loading
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy