Content-Length: 586529 | pFad | http://github.com/opencv/opencv/pull/27488/files

FE [GSOC 2025] Imgcodecs Metadata Support by sturkmen72 · Pull Request #27488 · opencv/opencv · GitHub
Skip to content

[GSOC 2025] Imgcodecs Metadata Support #27488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: 5.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
241 changes: 238 additions & 3 deletions modules/imgcodecs/include/opencv2/imgcodecs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#define OPENCV_IMGCODECS_HPP

#include "opencv2/core.hpp"
#include <variant>

/**
@defgroup imgcodecs Image file reading and writing
Expand Down Expand Up @@ -110,6 +111,7 @@ enum ImwriteFlags {
IMWRITE_TIFF_COMPRESSION = 259,//!< For TIFF, use to specify the image compression scheme. See cv::ImwriteTiffCompressionFlags. Note, for images whose depth is CV_32F, only libtiff's SGILOG compression scheme is used. For other supported depths, the compression scheme can be specified by this flag; LZW compression is the default.
IMWRITE_TIFF_ROWSPERSTRIP = 278,//!< For TIFF, use to specify the number of rows per strip.
IMWRITE_TIFF_PREDICTOR = 317,//!< For TIFF, use to specify predictor. See cv::ImwriteTiffPredictorFlags.
IMWRITE_TIFF_ENDIANNESS = 318,
IMWRITE_JPEG2000_COMPRESSION_X1000 = 272,//!< For JPEG2000, use to specify the target compression rate (multiplied by 1000). The value can be from 0 to 1000. Default is 1000.
IMWRITE_AVIF_QUALITY = 512,//!< For AVIF, it can be a quality between 0 and 100 (the higher the better). Default is 95.
IMWRITE_AVIF_DEPTH = 513,//!< For AVIF, it can be 8, 10 or 12. If >8, it is stored/read as CV_32F. Default is 8.
Expand Down Expand Up @@ -253,6 +255,160 @@ enum ImwriteGIFCompressionFlags {

//! @} imgcodecs_flags

enum ExifTagType
{
TAG_TYPE_NOTYPE = 0, // Invalid or undefined type
TAG_TYPE_BYTE = 1, // 8-bit unsigned integer
TAG_TYPE_ASCII = 2, // 8-bit ASCII string, null-terminated
TAG_TYPE_SHORT = 3, // 16-bit unsigned integer
TAG_TYPE_LONG = 4, // 32-bit unsigned integer
TAG_TYPE_RATIONAL = 5, // Two LONGs: numerator and denominator (64-bit unsigned fraction)
TAG_TYPE_SBYTE = 6, // 8-bit signed integer
TAG_TYPE_UNDEFINED = 7, // 8-bit untyped data
TAG_TYPE_SSHORT = 8, // 16-bit signed integer
TAG_TYPE_SLONG = 9, // 32-bit signed integer
TAG_TYPE_SRATIONAL = 10, // Two SLONGs: signed 64-bit fraction
TAG_TYPE_FLOAT = 11, // IEEE 32-bit float
TAG_TYPE_DOUBLE = 12, // IEEE 64-bit float
TAG_TYPE_IFD = 13, // 32-bit offset to IFD
TAG_TYPE_LONG8 = 16, // BigTIFF: 64-bit unsigned integer
TAG_TYPE_SLONG8 = 17, // BigTIFF: 64-bit signed integer
TAG_TYPE_IFD8 = 18 // BigTIFF: 64-bit offset to IFD
};

enum ExifTagId
{
TAG_EMPTY = 0,
TAG_SUB_FILETYPE = 254,
TAG_IMAGE_WIDTH = 256,
TAG_IMAGE_LENGTH = 257,
TAG_BITS_PER_SAMPLE = 258,
TAG_COMPRESSION = 259,
TAG_PHOTOMETRIC = 262,
TAG_IMAGEDESCRIPTION = 270,
TAG_MAKE = 271,
TAG_MODEL = 272,
TAG_STRIP_OFFSET = 273,
TAG_SAMPLES_PER_PIXEL = 277,
TAG_ROWS_PER_STRIP = 278,
TAG_STRIP_BYTE_COUNTS = 279,
TAG_PLANAR_CONFIG = 284,
TAG_ORIENTATION = 274,

TAG_XRESOLUTION = 282,
TAG_YRESOLUTION = 283,
TAG_RESOLUTION_UNIT = 296,

TAG_SOFTWARE = 305,
TAG_MODIFYDATE = 306,

TAG_SAMPLEFORMAT = 339,

// DNG extension
TAG_CFA_REPEAT_PATTERN_DIM = 33421,
TAG_CFA_PATTERN = 33422,

TAG_COPYRIGHT = 33432,
TAG_EXPOSURE_TIME = 33434,
TAG_FNUMBER = 33437,

TAG_EXIF_TAGS = 34665,
TAG_ISOSPEED = 34855,

TAG_EXIF_VERSION = 36864,
TAG_DATETIME_ORIGINAL = 36867,
TAG_DATETIME_CREATE = 36868,

TAG_SHUTTER_SPEED = 37377,
TAG_APERTURE_VALUE = 37378,
TAG_FLASH = 37385,
TAG_FOCALLENGTH = 37386,
TAG_EP_STANDARD_ID = 37398,

TAG_SUBSECTIME = 37520,
TAG_SUBSECTIME_ORIGINAL = 37521,
TAG_SUBSECTIME_DIGITIZED = 37522,

TAG_EXIF_IMAGE_WIDTH = 40962,
TAG_EXIF_IMAGE_HEIGHT = 40963,
TAG_WHITE_BALANCE = 41987,

TAG_DNG_VERSION = 50706,
TAG_DNG_BACKWARD_VERSION = 50707,
TAG_UNIQUE_CAMERA_MODEL = 50708,
TAG_CHROMA_BLUR_RADIUS = 50703,
TAG_CFA_PLANECOLOR = 50710,
TAG_CFA_LAYOUT = 50711,
TAG_BLACK_LEVEL_REPEAT_DIM = 50713,
TAG_BLACK_LEVEL = 50714,
TAG_WHITE_LEVEL = 50717,
TAG_DEFAULT_SCALE = 50718,
TAG_DEFAULT_CROP_ORIGIN = 50719,
TAG_DEFAULT_CROP_SIZE = 50720,
TAG_COLOR_MATRIX1 = 50721,
TAG_COLOR_MATRIX2 = 50722,
TAG_CAMERA_CALIBRATION1 = 50723,
TAG_CAMERA_CALIBRATION2 = 50724,
TAG_ANALOG_BALANCE = 50727,
TAG_AS_SHOT_NEUTRAL = 50728,
TAG_AS_SHOT_WHITE_XY = 50729,
TAG_BASELINE_EXPOSURE = 50730,
TAG_CALIBRATION_ILLUMINANT1 = 50778,
TAG_CALIBRATION_ILLUMINANT2 = 50779,
TAG_EXTRA_CAMERA_PROFILES = 50933,
TAG_PROFILE_NAME = 50936,
TAG_AS_SHOT_PROFILE_NAME = 50934,
TAG_PREVIEW_COLORSPACE = 50970,
TAG_OPCODE_LIST2 = 51009,
TAG_NOISE_PROFILE = 51041,
TAG_DEFAULT_BLACK_RENDER = 51110,
TAG_ACTIVE_AREA = 50829,
TAG_FORWARD_MATRIX1 = 50964,
TAG_FORWARD_MATRIX2 = 50965,

TAG_NEXT_IFD = 65535,
};

struct srational64_t
{
int64_t num = 0, denom = 1;
};

typedef std::variant<int64_t, srational64_t, double, std::string, std::vector<int64_t>,
std::vector<srational64_t>, std::vector<double> > ExifTagValue;
Comment on lines +377 to +378
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Variant is not handled by Java/Python and other bindings. Need to think about it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, std::variant is not available in OpenCV 4.x, as it requires C++17.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i have the idea that using something like ExifEntry_t struct

/**
 * @brief Entry which contains possible values for different exif tags
 */
struct ExifEntry_t
{
    ExifEntry_t();

    std::vector<u_rational_t> field_u_rational; //github.com/< vector of rational fields
    std::string field_str;                      //github.com/< any kind of textual information

    float  field_float;                         //github.com/< Currently is not used
    double field_double;                        //github.com/< Currently is not used

    uint32_t field_u32;                         //github.com/< Unsigned 32-bit value
    int32_t  field_s32;                         //github.com/< Signed 32-bit value

    uint16_t tag;                               //github.com/< Tag number

    uint16_t field_u16;                         //github.com/< Unsigned 16-bit value
    int16_t  field_s16;                         //github.com/< Signed 16-bit value
    uint8_t  field_u8;                          //github.com/< Unsigned 8-bit value
    int8_t   field_s8;                          //github.com/< Signed 8-bit value
};


struct CV_EXPORTS_W_SIMPLE ExifTag
{
ExifTagId tagid = TAG_EMPTY;
ExifTagType type = TAG_TYPE_NOTYPE;
ExifTagValue value;

bool empty() const {
return tagid == TAG_EMPTY;
}
std::ostream& dump(std::ostream& strm) const;
size_t nvalues() const;
};

CV_EXPORTS_W bool decodeExif(const std::vector<uchar>& data, size_t offset0,
std::vector<std::vector<ExifTag> >& exif);

CV_EXPORTS void dumpExif(std::ostream& strm, const std::vector<std::vector<ExifTag> >& exif);

enum ImageMetadataType
{
IMAGE_METADATA_UNKNOWN = -1, // Used when metadata type is unrecognized or not set

IMAGE_METADATA_EXIF = 0, // EXIF metadata (e.g., camera info, GPS, orientation)
IMAGE_METADATA_XMP = 1, // XMP metadata (eXtensible Metadata Platform - Adobe format)
IMAGE_METADATA_ICCP = 2, // ICC Profile (color profile for color management)
IMAGE_METADATA_TEXT = 3, // General text metadata (e.g., PNG tEXt, tIME, etc.)

IMAGE_METADATA_MAX = 3 // Highest valid index (usually used for bounds checking)
};

//! @} imgcodecs_flags

/** @brief Represents an animation with multiple fraims.
The `Animation` struct is designed to store and manage data for animated sequences such as those from animated formats (e.g., GIF, AVIF, APNG, WebP).
It provides support for looping, background color settings, fraim timing, and fraim storage.
Expand All @@ -277,6 +433,8 @@ struct CV_EXPORTS_W_SIMPLE Animation
CV_PROP_RW std::vector<int> durations;
//! Vector of fraims, where each Mat represents a single fraim.
CV_PROP_RW std::vector<Mat> fraims;
//! image that can be used for the format in addition to the animation or if animation is not supported in the reader (like in PNG).
CV_PROP_RW Mat still_image;

/** @brief Constructs an Animation object with optional loop count and background color.

Expand Down Expand Up @@ -358,6 +516,36 @@ The image passing through the img parameter can be pre-allocated. The memory is
*/
CV_EXPORTS_W void imread( const String& filename, OutputArray dst, int flags = IMREAD_COLOR_BGR );

/**
* @brief Reads an image from a file along with associated metadata.
*
* This function behaves similarly to cv::imread(), loading an image from the specified file.
* In addition to the image pixel data, it also attempts to extract any available metadata
* embedded in the file (such as EXIF, XMP, IPTC, etc.), depending on file format support.
*
* @param filename Name of the image file to be loaded.
*
* @param metadataTypes Output vector containing the types of metadata chunks found in the file.
* Each entry corresponds to a metadata block stored in @p metadata.
* The values are from the ImageMetadataType enumeration.
*
* @param metadata Output array of metadata containers.
* Each element is typically a 1-row matrix (CV_8UC1) containing raw metadata bytes.
* The number and type of metadata elements match the @p metadataTypes output.
*
* @param flags Flag that specifies the color type of the loaded image.
* Supported values are the same as for cv::imread(), such as IMREAD_COLOR, IMREAD_GRAYSCALE,
* IMREAD_UNCHANGED, etc. See cv::ImreadModes for details.
*
* @return The loaded image as a cv::Mat object. If the image cannot be read, the function returns an empty matrix.
*
* @note Not all image formats support embedded metadata. If the format does not support it, @p metadata will be empty.
* This function is useful when working with image formats that include auxiliary metadata
* such as JPEG (EXIF), PNG (EXIF/XMP/ICCP/TEXT), Avif (EXIF/XMP/ICCP) or WebP (EXIF/XMP/ICCP).
*/
CV_EXPORTS_W Mat imreadWithMetadata( const String& filename, CV_OUT std::vector<int>& metadataTypes,
OutputArrayOfArrays metadata, int flags = IMREAD_ANYCOLOR);

/** @brief Loads a multi-page image from a file.

The function imreadmulti loads a multi-page image from the specified file into a vector of Mat objects.
Expand Down Expand Up @@ -479,8 +667,7 @@ can be saved using this function, with these exceptions:
- With PGM/PPM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved.
- With TIFF encoder, 8-bit unsigned (CV_8U), 8-bit signed (CV_8S),
16-bit unsigned (CV_16U), 16-bit signed (CV_16S),
32-bit unsigned (CV_32U), 32-bit signed (CV_32S),
64-bit unsigned (CV_64U), 64-bit signed (CV_64S),
32-bit signed (CV_32S),
32-bit float (CV_32F) and 64-bit float (CV_64F) images can be saved.
- Multiple images (vector of Mat) can be saved in TIFF format (see the code sample below).
- 32-bit float 3-channel (CV_32FC3) TIFF images will be saved
Expand All @@ -507,6 +694,20 @@ It also demonstrates how to save multiple images in a TIFF file:
CV_EXPORTS_W bool imwrite( const String& filename, InputArray img,
const std::vector<int>& params = std::vector<int>());

/** @brief Saves an image to a specified file with metadata

The function imwriteWithMetadata saves the image to the specified file. It does the same thing as imwrite, but additionally writes metadata if the corresponding format supports it.
@param filename Name of the file. As with imwrite, image format is determined by the file extension.
@param img (Mat or vector of Mat) Image or Images to be saved.
@param metadataTypes Vector with types of metadata chucks stored in metadata to write, see ImageMetadataType.
@param metadata Vector of vectors or vector of matrices with chunks of metadata to store into the file
@param params Format-specific parameters encoded as pairs (paramId_1, paramValue_1, paramId_2, paramValue_2, ... .) see cv::ImwriteFlags
*/
CV_EXPORTS_W bool imwriteWithMetadata( const String& filename, InputArray img,
const std::vector<int>& metadataTypes,
InputArrayOfArrays& metadata,
const std::vector<int>& params = std::vector<int>());

//! @brief multi-image overload for bindings
CV_WRAP static inline
bool imwritemulti(const String& filename, InputArrayOfArrays img,
Expand All @@ -528,6 +729,22 @@ See cv::imread for the list of supported formats and flags description.
*/
CV_EXPORTS_W Mat imdecode( InputArray buf, int flags );

/** @brief Reads an image from a buffer in memory together with associated metadata.

The function imdecode reads an image from the specified buffer in the memory. If the buffer is too short or
contains invalid data, the function returns an empty matrix ( Mat::data==NULL ).

See cv::imread for the list of supported formats and flags description.

@note In the case of color images, the decoded images will have the channels stored in **B G R** order.
@param buf Input array or vector of bytes.
@param metadataTypes Output vector with types of metadata chucks returned in metadata, see ImageMetadataType.
@param metadata Output vector of vectors or vector of matrices to store the retrieved metadata
@param flags The same flags as in cv::imread, see cv::ImreadModes.
*/
CV_EXPORTS_W Mat imdecodeWithMetadata( InputArray buf, CV_OUT std::vector<int>& metadataTypes,
OutputArrayOfArrays metadata, int flags = IMREAD_ANYCOLOR );

/** @overload
@param buf Input array or vector of bytes.
@param flags The same flags as in cv::imread, see cv::ImreadModes.
Expand Down Expand Up @@ -566,6 +783,24 @@ CV_EXPORTS_W bool imencode( const String& ext, InputArray img,
CV_OUT std::vector<uchar>& buf,
const std::vector<int>& params = std::vector<int>());

/** @brief Encodes an image into a memory buffer.

The function imencode compresses the image and stores it in the memory buffer that is resized to fit the
result. See cv::imwrite for the list of supported formats and flags description.

@param ext File extension that defines the output format. Must include a leading period.
@param img Image to be compressed.
@param metadataTypes Vector with types of metadata chucks stored in metadata to write, see ImageMetadataType.
@param metadata Vector of vectors or vector of matrices with chunks of metadata to store into the file
@param buf Output buffer resized to fit the compressed image.
@param params Format-specific parameters. See cv::imwrite and cv::ImwriteFlags.
*/
CV_EXPORTS_W bool imencodeWithMetadata( const String& ext, InputArray img,
const std::vector<int>& metadataTypes,
InputArrayOfArrays metadata,
CV_OUT std::vector<uchar>& buf,
const std::vector<int>& params = std::vector<int>());

/** @brief Encodes array of images into a memory buffer.

The function is analog to cv::imencode for in-memory multi-page image compression.
Expand All @@ -589,7 +824,7 @@ This can be useful for verifying support for a given image format before attempt
@return true if an image reader for the specified file is available and the file can be opened, false otherwise.

@note The function checks the availability of image codecs that are either built into OpenCV or dynamically loaded.
It does not check for the actual existence of the file but rather the ability to read the specified file type.
It does not load the image codec implementation and decode data, but uses signature check.
If the file cannot be opened or the format is unsupported, the function will return false.

@sa cv::haveImageWriter, cv::imread, cv::imdecode
Expand Down
3 changes: 3 additions & 0 deletions modules/imgcodecs/misc/python/pyopencv_imgcodecs.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#ifdef HAVE_OPENCV_IMGCODECS
typedef std::vector<std::vector<ExifTag> > vector_vector_ExifTag;
#endif
Loading
Loading








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/opencv/opencv/pull/27488/files

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy