From 87f410b7f63b9f1e0564f5b74a7923b76620340a Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 14 Mar 2025 15:25:46 -0400 Subject: [PATCH 01/54] ENH: Allocate lock only once in StringDType quicksort --- .../_core/src/multiarray/stringdtype/dtype.c | 27 +++++++++++++++++++ .../_core/src/multiarray/stringdtype/dtype.h | 9 +++++++ numpy/_core/src/npysort/quicksort.cpp | 16 ++++++++--- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index a06e7a1ed1b6..14b15589a39a 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -459,6 +459,33 @@ compare(void *a, void *b, void *arr) return ret; } +int +_compare_no_mutex(const void *a, const void *b, void *arr) +{ + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(arr); + return _compare((void*) a, (void*) b, descr, descr); +} + +void +_init_sort_cmp(PyArray_Descr *descr, PyArray_CompareFunc **out_cmp) +{ + if (descr->type_num == NPY_VSTRING) { + NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr); + *out_cmp = _compare_no_mutex; + } + else { + *out_cmp = PyDataType_GetArrFuncs(descr)->compare; + } +} + +void +_end_sort_cmp(PyArray_Descr *descr) +{ + if (descr->type_num == NPY_VSTRING) { + NpyString_release_allocator(((PyArray_StringDTypeObject *)descr)->allocator); + } +} + int _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, PyArray_StringDTypeObject *descr_b) diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.h b/numpy/_core/src/multiarray/stringdtype/dtype.h index 9baad65d5c88..b2e42bfb8832 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.h +++ b/numpy/_core/src/multiarray/stringdtype/dtype.h @@ -19,12 +19,21 @@ new_stringdtype_instance(PyObject *na_object, int coerce); NPY_NO_EXPORT int init_string_dtype(void); +NPY_NO_EXPORT int +_compare_no_mutex(const void *a, const void *b, void *arr); + // Assumes that the caller has already acquired the allocator locks for both // descriptors NPY_NO_EXPORT int _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, PyArray_StringDTypeObject *descr_b); +NPY_NO_EXPORT void +_init_sort_cmp(PyArray_Descr *descr, PyArray_CompareFunc **out_cmp); + +NPY_NO_EXPORT void +_end_sort_cmp(PyArray_Descr *descr); + NPY_NO_EXPORT int init_string_na_object(PyObject *mod); diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index ddf4fce0c28b..728b82201621 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -44,7 +44,7 @@ * the below code implements this converted to an iteration and as an * additional minor optimization skips the recursion depth checking on the * smaller partition as it is always less than half of the remaining data and - * will thus terminate fast enough + * will thus terminate fast enough` */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION @@ -56,6 +56,7 @@ #include "numpy_tag.h" #include "x86_simd_qsort.hpp" #include "highway_qsort.hpp" +#include "stringdtype/dtype.h" #include #include @@ -510,7 +511,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp; char *vp; char *pl = (char *)start; char *pr = pl + (num - 1) * elsize; @@ -521,6 +522,8 @@ npy_quicksort(void *start, npy_intp num, void *varr) int *psdepth = depth; int cdepth = npy_get_msb(num) * 2; + _init_sort_cmp(PyArray_DESCR(arr), &cmp); + /* Items that have zero size don't make sense to sort */ if (elsize == 0) { return 0; @@ -606,6 +609,9 @@ npy_quicksort(void *start, npy_intp num, void *varr) } free(vp); + + _end_sort_cmp(PyArray_DESCR(arr)); + return 0; } @@ -615,7 +621,7 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp; char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; @@ -626,6 +632,8 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) int *psdepth = depth; int cdepth = npy_get_msb(num) * 2; + _init_sort_cmp(PyArray_DESCR(arr), &cmp); + /* Items that have zero size don't make sense to sort */ if (elsize == 0) { return 0; @@ -700,6 +708,8 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) cdepth = *(--psdepth); } + _end_sort_cmp(PyArray_DESCR(arr)); + return 0; } From ecd3ee1e85a66fbb2b79f4962e3f118048cd7d82 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 26 Mar 2025 13:53:20 -0400 Subject: [PATCH 02/54] ENH: Add dtype slots for sorting and begin integration --- numpy/_core/include/numpy/dtype_api.h | 14 +++- numpy/_core/src/multiarray/dtypemeta.c | 8 +++ numpy/_core/src/multiarray/dtypemeta.h | 35 +++++++++- numpy/_core/src/multiarray/item_selection.c | 16 +++-- .../_core/src/multiarray/stringdtype/dtype.c | 68 +++++++++++-------- .../_core/src/multiarray/stringdtype/dtype.h | 9 --- numpy/_core/src/npysort/quicksort.cpp | 13 +--- 7 files changed, 108 insertions(+), 55 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index b37c9fbb6821..1a0afa4681fe 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -364,9 +364,12 @@ typedef int (PyArrayMethod_PromoterFunction)(PyObject *ufunc, #define NPY_DT_ensure_canonical 6 #define NPY_DT_setitem 7 #define NPY_DT_getitem 8 -#define NPY_DT_get_clear_loop 9 -#define NPY_DT_get_fill_zero_loop 10 -#define NPY_DT_finalize_descr 11 +#define NPY_DT_get_sort_function 9 +#define NPY_DT_get_argsort_function 10 +#define NPY_DT_sort_compare 11 +#define NPY_DT_get_clear_loop 12 +#define NPY_DT_get_fill_zero_loop 13 +#define NPY_DT_finalize_descr 14 // These PyArray_ArrFunc slots will be deprecated and replaced eventually // getitem and setitem can be defined as a performance optimization; @@ -477,4 +480,9 @@ typedef PyArray_Descr *(PyArrayDTypeMeta_FinalizeDescriptor)(PyArray_Descr *dtyp typedef int(PyArrayDTypeMeta_SetItem)(PyArray_Descr *, PyObject *, char *); typedef PyObject *(PyArrayDTypeMeta_GetItem)(PyArray_Descr *, char *); +typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, + npy_intp, PyArray_SortFunc **); +typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, + npy_intp, PyArray_ArgSortFunc **); + #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ diff --git a/numpy/_core/src/multiarray/dtypemeta.c b/numpy/_core/src/multiarray/dtypemeta.c index 0b1b0fb39192..abb56006b880 100644 --- a/numpy/_core/src/multiarray/dtypemeta.c +++ b/numpy/_core/src/multiarray/dtypemeta.c @@ -192,6 +192,9 @@ dtypemeta_initialize_struct_from_spec( NPY_DT_SLOTS(DType)->common_instance = NULL; NPY_DT_SLOTS(DType)->setitem = NULL; NPY_DT_SLOTS(DType)->getitem = NULL; + NPY_DT_SLOTS(DType)->get_sort_function = NULL; + NPY_DT_SLOTS(DType)->get_argsort_function = NULL; + NPY_DT_SLOTS(DType)->sort_compare = NULL; NPY_DT_SLOTS(DType)->get_clear_loop = NULL; NPY_DT_SLOTS(DType)->get_fill_zero_loop = NULL; NPY_DT_SLOTS(DType)->finalize_descr = NULL; @@ -1230,6 +1233,11 @@ dtypemeta_wrap_legacy_descriptor( dtype_class->flags |= NPY_DT_NUMERIC; } + /* If sorting compare not defined, set to arrfunc default */ + if (dt_slots->sort_compare == NULL) { + dt_slots->sort_compare = arr_funcs->compare; + } + if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj, PyTypeNum_ISUSERDEF(dtype_class->type_num)) < 0) { Py_DECREF(dtype_class); diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index a8b78e3f7518..819a4a0d9aaf 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -31,6 +31,10 @@ typedef struct { */ PyArrayDTypeMeta_SetItem *setitem; PyArrayDTypeMeta_GetItem *getitem; + + PyArrayDTypeMeta_GetSortFunction *get_sort_function; + PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; + PyArray_CompareFunc *sort_compare; /* * Either NULL or fetches a clearing function. Clearing means deallocating * any referenced data and setting it to a safe state. For Python objects @@ -89,7 +93,7 @@ typedef struct { // This must be updated if new slots before within_dtype_castingimpl // are added -#define NPY_NUM_DTYPE_SLOTS 11 +#define NPY_NUM_DTYPE_SLOTS 14 #define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22 #define NPY_DT_MAX_ARRFUNCS_SLOT \ NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET @@ -291,6 +295,35 @@ PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) Py_XSETREF(descr, _new_); \ } while(0) +static inline int +PyArray_GetSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, + PyArray_SortFunc **out_sort) +{ + if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function == NULL) { + return -1; + } + + NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function(descr, which, out_sort); + return 0; +} + +static inline int +PyArray_GetArgSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, + PyArray_ArgSortFunc **out_argsort) +{ + if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function == NULL) { + return -1; + } + + NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function(descr, which, out_argsort); + return 0; +} + +static inline PyArray_CompareFunc * +PyArray_SortCompareFunction(PyArray_Descr *descr) +{ + return NPY_DT_SLOTS(NPY_DTYPE(descr))->sort_compare; +} // Get the pointer to the PyArray_DTypeMeta for the type associated with the typenum. static inline PyArray_DTypeMeta * diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index d2db10633810..28ba79b01ff8 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1570,10 +1570,14 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) return -1; } - sort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; + PyArray_GetSortFunction(PyArray_DESCR(op), which, &sort); if (sort == NULL) { - if (PyDataType_GetArrFuncs(PyArray_DESCR(op))->compare) { + sort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; + } + + if (sort == NULL) { + if (PyArray_SortCompareFunction(PyArray_DESCR(op)) != NULL) { switch (which) { default: case NPY_QUICKSORT: @@ -1721,10 +1725,14 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyArray_ArgSortFunc *argsort = NULL; PyObject *ret; - argsort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; + PyArray_GetArgSortFunction(PyArray_DESCR(op), which, &argsort); if (argsort == NULL) { - if (PyDataType_GetArrFuncs(PyArray_DESCR(op))->compare) { + argsort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; + } + + if (argsort == NULL) { + if (PyArray_SortCompareFunction(PyArray_DESCR(op)) != NULL) { switch (which) { default: case NPY_QUICKSORT: diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 14b15589a39a..bfa854de2fdf 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -9,6 +9,7 @@ #include "numpy/arrayobject.h" #include "numpy/ndarraytypes.h" #include "numpy/npy_math.h" +#include "npy_sort.h" #include "static_string.h" #include "dtypemeta.h" @@ -459,33 +460,6 @@ compare(void *a, void *b, void *arr) return ret; } -int -_compare_no_mutex(const void *a, const void *b, void *arr) -{ - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(arr); - return _compare((void*) a, (void*) b, descr, descr); -} - -void -_init_sort_cmp(PyArray_Descr *descr, PyArray_CompareFunc **out_cmp) -{ - if (descr->type_num == NPY_VSTRING) { - NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr); - *out_cmp = _compare_no_mutex; - } - else { - *out_cmp = PyDataType_GetArrFuncs(descr)->compare; - } -} - -void -_end_sort_cmp(PyArray_Descr *descr) -{ - if (descr->type_num == NPY_VSTRING) { - NpyString_release_allocator(((PyArray_StringDTypeObject *)descr)->allocator); - } -} - int _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, PyArray_StringDTypeObject *descr_b) @@ -543,6 +517,45 @@ _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, return NpyString_cmp(&s_a, &s_b); } +static int +stringdtype_sort_compare(void *a, void *b, void *arr) { + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(arr); + return _compare(a, b, descr, descr); +} + +int +_stringdtype_sort(void *start, npy_intp num, void *varr, PyArray_SortFunc *sort) { + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(varr); + + NpyString_acquire_allocator(descr); + int result = sort(start, num, varr); + NpyString_release_allocator(descr->allocator); + + return result; +} + +int +_stringdtype_quicksort(void *start, npy_intp num, void *varr) { + return _stringdtype_sort(start, num, varr, &npy_quicksort); +} + +int +stringdtype_get_sort_function( + PyArray_Descr *descr, NPY_SORTKIND sort_kind, PyArray_SortFunc **out_sort) { + + switch (sort_kind) { + case NPY_QUICKSORT: + *out_sort = &_stringdtype_quicksort; + return 0; + default: + PyErr_Format(PyExc_ValueError, + "Sort kind %d not supported for string dtype", sort_kind); + return -1; + } + + return 0; +} + // PyArray_ArgFunc // The max element is the one with the highest unicode code point. int @@ -683,6 +696,7 @@ static PyType_Slot PyArray_StringDType_Slots[] = { &string_discover_descriptor_from_pyobject}, {NPY_DT_setitem, &stringdtype_setitem}, {NPY_DT_getitem, &stringdtype_getitem}, + {NPY_DT_sort_compare, &stringdtype_sort_compare}, {NPY_DT_ensure_canonical, &stringdtype_ensure_canonical}, {NPY_DT_PyArray_ArrFuncs_nonzero, &nonzero}, {NPY_DT_PyArray_ArrFuncs_compare, &compare}, diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.h b/numpy/_core/src/multiarray/stringdtype/dtype.h index b2e42bfb8832..9baad65d5c88 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.h +++ b/numpy/_core/src/multiarray/stringdtype/dtype.h @@ -19,21 +19,12 @@ new_stringdtype_instance(PyObject *na_object, int coerce); NPY_NO_EXPORT int init_string_dtype(void); -NPY_NO_EXPORT int -_compare_no_mutex(const void *a, const void *b, void *arr); - // Assumes that the caller has already acquired the allocator locks for both // descriptors NPY_NO_EXPORT int _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, PyArray_StringDTypeObject *descr_b); -NPY_NO_EXPORT void -_init_sort_cmp(PyArray_Descr *descr, PyArray_CompareFunc **out_cmp); - -NPY_NO_EXPORT void -_end_sort_cmp(PyArray_Descr *descr); - NPY_NO_EXPORT int init_string_na_object(PyObject *mod); diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 728b82201621..737c9821a249 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -56,7 +56,6 @@ #include "numpy_tag.h" #include "x86_simd_qsort.hpp" #include "highway_qsort.hpp" -#include "stringdtype/dtype.h" #include #include @@ -511,7 +510,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp; + PyArray_CompareFunc *cmp = PyArray_SortCompareFunction(PyArray_DESCR(arr)); char *vp; char *pl = (char *)start; char *pr = pl + (num - 1) * elsize; @@ -522,8 +521,6 @@ npy_quicksort(void *start, npy_intp num, void *varr) int *psdepth = depth; int cdepth = npy_get_msb(num) * 2; - _init_sort_cmp(PyArray_DESCR(arr), &cmp); - /* Items that have zero size don't make sense to sort */ if (elsize == 0) { return 0; @@ -610,8 +607,6 @@ npy_quicksort(void *start, npy_intp num, void *varr) free(vp); - _end_sort_cmp(PyArray_DESCR(arr)); - return 0; } @@ -621,7 +616,7 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp; + PyArray_CompareFunc *cmp = PyArray_SortCompareFunction(PyArray_DESCR(arr)); char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; @@ -632,8 +627,6 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) int *psdepth = depth; int cdepth = npy_get_msb(num) * 2; - _init_sort_cmp(PyArray_DESCR(arr), &cmp); - /* Items that have zero size don't make sense to sort */ if (elsize == 0) { return 0; @@ -708,8 +701,6 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) cdepth = *(--psdepth); } - _end_sort_cmp(PyArray_DESCR(arr)); - return 0; } From 7e0bf44e2f2c825b7bb308452d7ed7e8f19080fa Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 26 Mar 2025 19:20:09 -0400 Subject: [PATCH 03/54] MAINT: Rename sort compare slot access function --- numpy/_core/src/multiarray/dtypemeta.h | 2 +- numpy/_core/src/multiarray/item_selection.c | 4 ++-- numpy/_core/src/npysort/quicksort.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 819a4a0d9aaf..728d1829ab0b 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -320,7 +320,7 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, } static inline PyArray_CompareFunc * -PyArray_SortCompareFunction(PyArray_Descr *descr) +PyArray_SortCompare(PyArray_Descr *descr) { return NPY_DT_SLOTS(NPY_DTYPE(descr))->sort_compare; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 28ba79b01ff8..4bee2207a862 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1577,7 +1577,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) } if (sort == NULL) { - if (PyArray_SortCompareFunction(PyArray_DESCR(op)) != NULL) { + if (PyArray_SortCompare(PyArray_DESCR(op)) != NULL) { switch (which) { default: case NPY_QUICKSORT: @@ -1732,7 +1732,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) } if (argsort == NULL) { - if (PyArray_SortCompareFunction(PyArray_DESCR(op)) != NULL) { + if (PyArray_SortCompare(PyArray_DESCR(op)) != NULL) { switch (which) { default: case NPY_QUICKSORT: diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 737c9821a249..6ddd2a5b1453 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -510,7 +510,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); char *vp; char *pl = (char *)start; char *pr = pl + (num - 1) * elsize; @@ -616,7 +616,7 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; From 45f5008aff81f1fe31853fa98a999e3f781c8ecc Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 26 Mar 2025 19:43:21 -0400 Subject: [PATCH 04/54] ENH: Add dtype slot sorting functionality support to all sort kinds --- .../_core/src/multiarray/stringdtype/dtype.c | 70 +++++++++++++++++-- numpy/_core/src/npysort/heapsort.cpp | 4 +- numpy/_core/src/npysort/mergesort.cpp | 4 +- numpy/_core/src/npysort/timsort.cpp | 4 +- 4 files changed, 72 insertions(+), 10 deletions(-) diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index bfa854de2fdf..00aa79b73e62 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -539,18 +539,78 @@ _stringdtype_quicksort(void *start, npy_intp num, void *varr) { return _stringdtype_sort(start, num, varr, &npy_quicksort); } +int +_stringdtype_heapsort(void *start, npy_intp num, void *varr) { + return _stringdtype_sort(start, num, varr, &npy_heapsort); +} + +int +_stringdtype_timsort(void *start, npy_intp num, void *varr) { + return _stringdtype_sort(start, num, varr, &npy_timsort); +} + int stringdtype_get_sort_function( PyArray_Descr *descr, NPY_SORTKIND sort_kind, PyArray_SortFunc **out_sort) { switch (sort_kind) { + default: case NPY_QUICKSORT: *out_sort = &_stringdtype_quicksort; - return 0; + break; + case NPY_HEAPSORT: + *out_sort = &_stringdtype_heapsort; + break; + case NPY_STABLESORT: + *out_sort = &_stringdtype_timsort; + break; + } + + return 0; +} + +int +_stringdtype_argsort(void *vv, npy_intp *tosort, npy_intp num, void *varr, + PyArray_ArgSortFunc *argsort) { + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(varr); + + NpyString_acquire_allocator(descr); + int result = argsort(vv, tosort, num, varr); + NpyString_release_allocator(descr->allocator); + + return result; +} + +int +_stringdtype_aquicksort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { + return _stringdtype_argsort(vv, tosort, n, varr, &npy_aquicksort); +} + +int +_stringdtype_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { + return _stringdtype_argsort(vv, tosort, n, varr, &npy_aheapsort); +} + +int +_stringdtype_atimsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { + return _stringdtype_argsort(vv, tosort, n, varr, &npy_atimsort); +} + +int +stringdtype_get_argsort_function( + PyArray_Descr *descr, NPY_SORTKIND sort_kind, PyArray_ArgSortFunc **out_argsort) { + + switch (sort_kind) { default: - PyErr_Format(PyExc_ValueError, - "Sort kind %d not supported for string dtype", sort_kind); - return -1; + case NPY_QUICKSORT: + *out_argsort = &npy_aquicksort; + break; + case NPY_HEAPSORT: + *out_argsort = &npy_aheapsort; + break; + case NPY_STABLESORT: + *out_argsort = &npy_atimsort; + break; } return 0; @@ -697,6 +757,8 @@ static PyType_Slot PyArray_StringDType_Slots[] = { {NPY_DT_setitem, &stringdtype_setitem}, {NPY_DT_getitem, &stringdtype_getitem}, {NPY_DT_sort_compare, &stringdtype_sort_compare}, + {NPY_DT_get_sort_function, &stringdtype_get_sort_function}, + {NPY_DT_get_argsort_function, &stringdtype_get_argsort_function}, {NPY_DT_ensure_canonical, &stringdtype_ensure_canonical}, {NPY_DT_PyArray_ArrFuncs_nonzero, &nonzero}, {NPY_DT_PyArray_ArrFuncs_compare, &compare}, diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 492cd47262d8..9fb8b262df04 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -54,7 +54,7 @@ npy_heapsort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); if (elsize == 0) { return 0; /* no need for sorting elements of no size */ } @@ -116,7 +116,7 @@ npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); npy_intp *a, i, j, l, tmp; /* The array needs to be offset by one for heapsort indexing */ diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 2fac0ccfafcd..63f645917f7b 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -385,7 +385,7 @@ npy_mergesort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); char *pl = (char *)start; char *pr = pl + num * elsize; char *pw; @@ -461,7 +461,7 @@ npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); npy_intp *pl, *pr, *pw; /* Items that have zero size don't make sense to sort */ diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 0f0f5721e7cf..0c00bddd59bf 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -2250,7 +2250,7 @@ npy_timsort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; @@ -2686,7 +2686,7 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; From d235dc916eb32789c37538da68ed9e16c73e71b6 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 26 Mar 2025 19:53:28 -0400 Subject: [PATCH 05/54] ENH: Add descending flag to internal sorting functions --- numpy/_core/include/numpy/dtype_api.h | 4 ++-- numpy/_core/src/multiarray/dtypemeta.h | 14 ++++++++------ numpy/_core/src/multiarray/item_selection.c | 4 ++-- numpy/_core/src/multiarray/stringdtype/dtype.c | 8 ++++---- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 1a0afa4681fe..0fff4a4db7c9 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -481,8 +481,8 @@ typedef int(PyArrayDTypeMeta_SetItem)(PyArray_Descr *, PyObject *, char *); typedef PyObject *(PyArrayDTypeMeta_GetItem)(PyArray_Descr *, char *); typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, - npy_intp, PyArray_SortFunc **); + npy_intp, int, PyArray_SortFunc **); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, - npy_intp, PyArray_ArgSortFunc **); + npy_intp, int, PyArray_ArgSortFunc **); #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 728d1829ab0b..4378f96bd8c8 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -296,26 +296,28 @@ PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) } while(0) static inline int -PyArray_GetSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, - PyArray_SortFunc **out_sort) +PyArray_GetSortFunction(PyArray_Descr *descr, + NPY_SORTKIND which, int descending, PyArray_SortFunc **out_sort) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function == NULL) { return -1; } - NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function(descr, which, out_sort); + NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function( + descr, which, descending, out_sort); return 0; } static inline int -PyArray_GetArgSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, - PyArray_ArgSortFunc **out_argsort) +PyArray_GetArgSortFunction(PyArray_Descr *descr, + NPY_SORTKIND which, int descending, PyArray_ArgSortFunc **out_argsort) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function == NULL) { return -1; } - NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function(descr, which, out_argsort); + NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function( + descr, which, descending, out_argsort); return 0; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 4bee2207a862..deb34af5d2a2 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1570,7 +1570,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) return -1; } - PyArray_GetSortFunction(PyArray_DESCR(op), which, &sort); + PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort); if (sort == NULL) { sort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; @@ -1725,7 +1725,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyArray_ArgSortFunc *argsort = NULL; PyObject *ret; - PyArray_GetArgSortFunction(PyArray_DESCR(op), which, &argsort); + PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort); if (argsort == NULL) { argsort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 00aa79b73e62..43d87f56b784 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -550,8 +550,8 @@ _stringdtype_timsort(void *start, npy_intp num, void *varr) { } int -stringdtype_get_sort_function( - PyArray_Descr *descr, NPY_SORTKIND sort_kind, PyArray_SortFunc **out_sort) { +stringdtype_get_sort_function(PyArray_Descr *descr, + NPY_SORTKIND sort_kind, int descending, PyArray_SortFunc **out_sort) { switch (sort_kind) { default: @@ -597,8 +597,8 @@ _stringdtype_atimsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { } int -stringdtype_get_argsort_function( - PyArray_Descr *descr, NPY_SORTKIND sort_kind, PyArray_ArgSortFunc **out_argsort) { +stringdtype_get_argsort_function(PyArray_Descr *descr, + NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFunc **out_argsort) { switch (sort_kind) { default: From 6a65c07627cb2a3b54b2170393f40f624cb963f8 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Thu, 27 Mar 2025 16:29:32 -0400 Subject: [PATCH 06/54] MAINT: Improve get dtype sort compare function name --- numpy/_core/src/multiarray/dtypemeta.h | 2 +- numpy/_core/src/multiarray/item_selection.c | 4 ++-- numpy/_core/src/npysort/heapsort.cpp | 4 ++-- numpy/_core/src/npysort/mergesort.cpp | 4 ++-- numpy/_core/src/npysort/quicksort.cpp | 4 ++-- numpy/_core/src/npysort/timsort.cpp | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 4378f96bd8c8..8c7df53e3b8e 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -322,7 +322,7 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, } static inline PyArray_CompareFunc * -PyArray_SortCompare(PyArray_Descr *descr) +PyArray_GetSortCompareFunction(PyArray_Descr *descr) { return NPY_DT_SLOTS(NPY_DTYPE(descr))->sort_compare; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index deb34af5d2a2..fab6c10e3db5 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1577,7 +1577,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) } if (sort == NULL) { - if (PyArray_SortCompare(PyArray_DESCR(op)) != NULL) { + if (PyArray_GetSortCompareFunction(PyArray_DESCR(op)) != NULL) { switch (which) { default: case NPY_QUICKSORT: @@ -1732,7 +1732,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) } if (argsort == NULL) { - if (PyArray_SortCompare(PyArray_DESCR(op)) != NULL) { + if (PyArray_GetSortCompareFunction(PyArray_DESCR(op)) != NULL) { switch (which) { default: case NPY_QUICKSORT: diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 9fb8b262df04..d9a1379eebf8 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -54,7 +54,7 @@ npy_heapsort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); if (elsize == 0) { return 0; /* no need for sorting elements of no size */ } @@ -116,7 +116,7 @@ npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); npy_intp *a, i, j, l, tmp; /* The array needs to be offset by one for heapsort indexing */ diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 63f645917f7b..2d0ae02dce5a 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -385,7 +385,7 @@ npy_mergesort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); char *pl = (char *)start; char *pr = pl + num * elsize; char *pw; @@ -461,7 +461,7 @@ npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); npy_intp *pl, *pr, *pw; /* Items that have zero size don't make sense to sort */ diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 6ddd2a5b1453..5eec89ed9fa6 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -510,7 +510,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); char *vp; char *pl = (char *)start; char *pr = pl + (num - 1) * elsize; @@ -616,7 +616,7 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 0c00bddd59bf..9e7fed5d74cf 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -2250,7 +2250,7 @@ npy_timsort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; @@ -2686,7 +2686,7 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_SortCompare(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; From 379022f71206c9903339f57a7179a6ec86d800a9 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Thu, 27 Mar 2025 16:30:14 -0400 Subject: [PATCH 07/54] MAINT: Fix doc typo --- numpy/_core/src/npysort/quicksort.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 5eec89ed9fa6..4fd3a02be27a 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -44,7 +44,7 @@ * the below code implements this converted to an iteration and as an * additional minor optimization skips the recursion depth checking on the * smaller partition as it is always less than half of the remaining data and - * will thus terminate fast enough` + * will thus terminate fast enough */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION From 36fa05f3e19a612b731671ab3e82a01504e3f62d Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Thu, 27 Mar 2025 16:35:35 -0400 Subject: [PATCH 08/54] MAINT: Error out when non-legacy dtype has no sort_compare function --- numpy/_core/src/multiarray/dtypemeta.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/numpy/_core/src/multiarray/dtypemeta.c b/numpy/_core/src/multiarray/dtypemeta.c index abb56006b880..65ad65ef89cf 100644 --- a/numpy/_core/src/multiarray/dtypemeta.c +++ b/numpy/_core/src/multiarray/dtypemeta.c @@ -1235,6 +1235,13 @@ dtypemeta_wrap_legacy_descriptor( /* If sorting compare not defined, set to arrfunc default */ if (dt_slots->sort_compare == NULL) { + if (!NPY_DT_is_legacy(dtype_class)) { + PyErr_SetString(PyExc_RuntimeError, + "DType has no sort_compare function."); + Py_DECREF(dtype_class); + return -1; + } + dt_slots->sort_compare = arr_funcs->compare; } From a7c679265c07ab837c473a4813e3b769568b9df7 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 28 Mar 2025 00:17:21 -0400 Subject: [PATCH 09/54] DOC: Add release notes for new dtype sorting API --- doc/release/upcoming_changes/28516.c_api.rst | 1 + doc/release/upcoming_changes/28516.new_feature.rst | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 doc/release/upcoming_changes/28516.c_api.rst create mode 100644 doc/release/upcoming_changes/28516.new_feature.rst diff --git a/doc/release/upcoming_changes/28516.c_api.rst b/doc/release/upcoming_changes/28516.c_api.rst new file mode 100644 index 000000000000..0a5616ed28aa --- /dev/null +++ b/doc/release/upcoming_changes/28516.c_api.rst @@ -0,0 +1 @@ +* `PyArray_GetSortFunction` and `PyArray_GetArgSortFunction` have been added to the C-API. These functions return the sorting functions if provided for a given dtype in new slots. \ No newline at end of file diff --git a/doc/release/upcoming_changes/28516.new_feature.rst b/doc/release/upcoming_changes/28516.new_feature.rst new file mode 100644 index 000000000000..1de6ce4602cf --- /dev/null +++ b/doc/release/upcoming_changes/28516.new_feature.rst @@ -0,0 +1,7 @@ +New sorting function slots `NPY_DT_get_sort_function`, `NPY_DT_get_argsort_function` for dtype API +--------------------------------------------------------------------------------------------------- + +User-defined dtypes can now provide specific sorting functions for use with NumPy's sort methods. +The new slot functions ``NPY_DT_get_sort_function`` and ``NPY_DT_get_argsort_function`` should +return function pointers that implement the sorting functionality for the dtype while considering +sort-kind and order. \ No newline at end of file From 84e7421b3edc7b78284a173257f48906baca2fc6 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 28 Mar 2025 01:04:41 -0400 Subject: [PATCH 10/54] DOC: Add doc for sort compare slot in release notes --- doc/release/upcoming_changes/28516.c_api.rst | 2 +- doc/release/upcoming_changes/28516.new_feature.rst | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/release/upcoming_changes/28516.c_api.rst b/doc/release/upcoming_changes/28516.c_api.rst index 0a5616ed28aa..95f677e4897c 100644 --- a/doc/release/upcoming_changes/28516.c_api.rst +++ b/doc/release/upcoming_changes/28516.c_api.rst @@ -1 +1 @@ -* `PyArray_GetSortFunction` and `PyArray_GetArgSortFunction` have been added to the C-API. These functions return the sorting functions if provided for a given dtype in new slots. \ No newline at end of file +* `PyArray_GetSortFunction`, `PyArray_GetArgSortFunction`, and `PyArray_GetSortCompareFunction` have been added to the C-API. These functions return the sorting, argsorting, and sort comparison functions if provided for a given dtype in new slots. \ No newline at end of file diff --git a/doc/release/upcoming_changes/28516.new_feature.rst b/doc/release/upcoming_changes/28516.new_feature.rst index 1de6ce4602cf..b5dc12f05aad 100644 --- a/doc/release/upcoming_changes/28516.new_feature.rst +++ b/doc/release/upcoming_changes/28516.new_feature.rst @@ -2,6 +2,9 @@ New sorting function slots `NPY_DT_get_sort_function`, `NPY_DT_get_argsort_funct --------------------------------------------------------------------------------------------------- User-defined dtypes can now provide specific sorting functions for use with NumPy's sort methods. -The new slot functions ``NPY_DT_get_sort_function`` and ``NPY_DT_get_argsort_function`` should -return function pointers that implement the sorting functionality for the dtype while considering -sort-kind and order. \ No newline at end of file +The new slots `NPY_DT_get_sort_function` and `NPY_DT_get_argsort_function` should be functions that +return function pointers implementing sorting functionality for the dtype, while considering the +sort-kind and order. + +Additionally, the new `NPY_DT_sort_compare` slot can be used to provide a comparison function for +sorting, which will replace the default comparison function for the dtype in sorting functions. \ No newline at end of file From 5caea7f3cfb2e0d0b882c4952bcf59c9b108024c Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 29 Mar 2025 20:24:14 -0400 Subject: [PATCH 11/54] DOC: Add note for potential deprecation of sort arrfuncs in release note --- doc/release/upcoming_changes/28516.new_feature.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/release/upcoming_changes/28516.new_feature.rst b/doc/release/upcoming_changes/28516.new_feature.rst index b5dc12f05aad..4117a9eed988 100644 --- a/doc/release/upcoming_changes/28516.new_feature.rst +++ b/doc/release/upcoming_changes/28516.new_feature.rst @@ -4,7 +4,8 @@ New sorting function slots `NPY_DT_get_sort_function`, `NPY_DT_get_argsort_funct User-defined dtypes can now provide specific sorting functions for use with NumPy's sort methods. The new slots `NPY_DT_get_sort_function` and `NPY_DT_get_argsort_function` should be functions that return function pointers implementing sorting functionality for the dtype, while considering the -sort-kind and order. +sort-kind and order. The old arrfunc slots ``NPY_DT_PyArray_ArrFuncs_sort`` and +``NPY_DT_PyArray_ArrFuncs_argsort`` may be deprecated in the future. Additionally, the new `NPY_DT_sort_compare` slot can be used to provide a comparison function for sorting, which will replace the default comparison function for the dtype in sorting functions. \ No newline at end of file From f7007250fccb01a365ce724e63241794c402f655 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 29 Mar 2025 20:29:56 -0400 Subject: [PATCH 12/54] MAINT: Reorder dtype slots to prevent changing existing slot numbers --- numpy/_core/include/numpy/dtype_api.h | 12 ++++++------ numpy/_core/src/multiarray/dtypemeta.h | 10 +++++++--- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 0fff4a4db7c9..d317cd4ab217 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -364,12 +364,12 @@ typedef int (PyArrayMethod_PromoterFunction)(PyObject *ufunc, #define NPY_DT_ensure_canonical 6 #define NPY_DT_setitem 7 #define NPY_DT_getitem 8 -#define NPY_DT_get_sort_function 9 -#define NPY_DT_get_argsort_function 10 -#define NPY_DT_sort_compare 11 -#define NPY_DT_get_clear_loop 12 -#define NPY_DT_get_fill_zero_loop 13 -#define NPY_DT_finalize_descr 14 +#define NPY_DT_get_clear_loop 9 +#define NPY_DT_get_fill_zero_loop 10 +#define NPY_DT_finalize_descr 11 +#define NPY_DT_get_sort_function 12 +#define NPY_DT_get_argsort_function 13 +#define NPY_DT_sort_compare 14 // These PyArray_ArrFunc slots will be deprecated and replaced eventually // getitem and setitem can be defined as a performance optimization; diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 8c7df53e3b8e..d2807a5fd179 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -32,9 +32,6 @@ typedef struct { PyArrayDTypeMeta_SetItem *setitem; PyArrayDTypeMeta_GetItem *getitem; - PyArrayDTypeMeta_GetSortFunction *get_sort_function; - PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; - PyArray_CompareFunc *sort_compare; /* * Either NULL or fetches a clearing function. Clearing means deallocating * any referenced data and setting it to a safe state. For Python objects @@ -48,6 +45,7 @@ typedef struct { * Python objects. */ PyArrayMethod_GetTraverseLoop *get_clear_loop; + /* Either NULL or a function that sets a function pointer to a traversal loop that fills an array with zero values appropriate for the dtype. If @@ -71,6 +69,12 @@ typedef struct { * parameters, if any, as the operand dtype. */ PyArrayDTypeMeta_FinalizeDescriptor *finalize_descr; + + /* DType sorting methods. */ + PyArrayDTypeMeta_GetSortFunction *get_sort_function; + PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; + PyArray_CompareFunc *sort_compare; + /* * The casting implementation (ArrayMethod) to convert between two * instances of this DType, stored explicitly for fast access: From e7cf5c21f1b0929edef9b8d90f5eb84476711d7f Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 29 Mar 2025 20:32:11 -0400 Subject: [PATCH 13/54] BUG: Error on missing `sort_compare` slot only when dtype is privately defined --- numpy/_core/src/multiarray/dtypemeta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/multiarray/dtypemeta.c b/numpy/_core/src/multiarray/dtypemeta.c index 65ad65ef89cf..c0af64c9702b 100644 --- a/numpy/_core/src/multiarray/dtypemeta.c +++ b/numpy/_core/src/multiarray/dtypemeta.c @@ -1235,7 +1235,7 @@ dtypemeta_wrap_legacy_descriptor( /* If sorting compare not defined, set to arrfunc default */ if (dt_slots->sort_compare == NULL) { - if (!NPY_DT_is_legacy(dtype_class)) { + if (!NPY_DT_is_legacy(dtype_class) && !NPY_DT_is_user_defined(dtype_class)) { PyErr_SetString(PyExc_RuntimeError, "DType has no sort_compare function."); Py_DECREF(dtype_class); From 21fb7e7fb900e56a36494bb242e5292ae3f020da Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 1 Apr 2025 04:25:38 -0400 Subject: [PATCH 14/54] DOC: Add C-API documentation for new sorting slots --- doc/source/reference/c-api/array.rst | 36 +++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 02db78ebb2b1..4b5627cfe7db 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3513,13 +3513,27 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_finalize_descr -.. c:type:: PyArray_Descr *(PyArrayDTypeMeta_FinalizeDescriptor)( \ - PyArray_Descr *dtype) +.. c:macro:: NPY_DT_get_sort_function + +.. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, + npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort); + + If defined, sets a custom sorting function for the DType for each of + the sort kinds numpy implements. Returns 0 on success. + +.. c:macro:: NPY_DT_get_argsort_function + +.. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, + npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort); + + If defined, sets a custom argsorting function for the DType for each of + the sort kinds numpy implements. Returns 0 on success. + +.. c:macro:: NPY_DT_sort_compare - If defined, a function that is called to "finalize" a descriptor - instance after an array is created. One use of this function is to - force newly created arrays to have a newly created descriptor - instance, no matter what input descriptor is provided by a user. + If defined, sets a custom comparison function for the DType for use in + sorting, which will replace `NPY_DT_PyArray_ArrFuncs_compare`. Implements + ``PyArray_CompareFunc``. PyArray_ArrFuncs slots ^^^^^^^^^^^^^^^^^^^^^^ @@ -3547,6 +3561,8 @@ DType API slots but for now we have exposed the legacy .. c:macro:: NPY_DT_PyArray_ArrFuncs_compare Computes a comparison for `numpy.sort`, implements ``PyArray_CompareFunc``. + If `NPY_DT_sort_compare` is defined, it will be used instead. This slot may + be deprecated in the future. .. c:macro:: NPY_DT_PyArray_ArrFuncs_argmax @@ -3590,13 +3606,17 @@ DType API slots but for now we have exposed the legacy An array of PyArray_SortFunc of length ``NPY_NSORTS``. If set, allows defining custom sorting implementations for each of the sorting - algorithms numpy implements. + algorithms numpy implements. If `NPY_DT_get_sort_function` is + defined, it will be used instead. This slot may be deprecated in the + future. .. c:macro:: NPY_DT_PyArray_ArrFuncs_argsort An array of PyArray_ArgSortFunc of length ``NPY_NSORTS``. If set, allows defining custom argsorting implementations for each of the - sorting algorithms numpy implements. + sorting algorithms numpy implements. If `NPY_DT_get_argsort_function` + is defined, it will be used instead. This slot may be deprecated in + the future. Macros and Static Inline Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 9244ea3fd2ce2f30b51aae1f180a666688ff6c33 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 4 Apr 2025 20:20:11 -0400 Subject: [PATCH 15/54] ENH: Replace array object with context and auxdata in sortfunc signatures --- numpy/_core/include/numpy/dtype_api.h | 12 ++- numpy/_core/include/numpy/ndarraytypes.h | 8 +- numpy/_core/src/multiarray/dtypemeta.c | 7 +- numpy/_core/src/multiarray/dtypemeta.h | 13 ++- numpy/_core/src/multiarray/item_selection.c | 67 ++++++++----- .../_core/src/multiarray/stringdtype/dtype.c | 95 ------------------- numpy/_core/src/npysort/heapsort.cpp | 4 +- numpy/_core/src/npysort/mergesort.cpp | 4 +- numpy/_core/src/npysort/quicksort.cpp | 4 +- numpy/_core/src/npysort/timsort.cpp | 4 +- 10 files changed, 83 insertions(+), 135 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index d317cd4ab217..c222b84cb327 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -369,7 +369,8 @@ typedef int (PyArrayMethod_PromoterFunction)(PyObject *ufunc, #define NPY_DT_finalize_descr 11 #define NPY_DT_get_sort_function 12 #define NPY_DT_get_argsort_function 13 -#define NPY_DT_sort_compare 14 +#define NPY_DT_compare 14 +#define NPY_DT_sort_compare 15 // These PyArray_ArrFunc slots will be deprecated and replaced eventually // getitem and setitem can be defined as a performance optimization; @@ -480,6 +481,15 @@ typedef PyArray_Descr *(PyArrayDTypeMeta_FinalizeDescriptor)(PyArray_Descr *dtyp typedef int(PyArrayDTypeMeta_SetItem)(PyArray_Descr *, PyObject *, char *); typedef PyObject *(PyArrayDTypeMeta_GetItem)(PyArray_Descr *, char *); +typedef int (PyArray_CompareFuncWithDescr)(const void *, const void *, + PyArray_Descr *); +typedef int (PyArray_SortFunc)(void *, npy_intp, + PyArrayMethod_Context *, NpyAuxData *, + NpyAuxData **); +typedef int (PyArray_ArgSortFunc)(void *, npy_intp *, npy_intp, + PyArrayMethod_Context *, NpyAuxData *, + NpyAuxData **); + typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, npy_intp, int, PyArray_SortFunc **); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, diff --git a/numpy/_core/include/numpy/ndarraytypes.h b/numpy/_core/include/numpy/ndarraytypes.h index baa42406ac88..ad28dd953549 100644 --- a/numpy/_core/include/numpy/ndarraytypes.h +++ b/numpy/_core/include/numpy/ndarraytypes.h @@ -422,8 +422,8 @@ typedef int (PyArray_FromStrFunc)(char *s, void *dptr, char **endptr, typedef int (PyArray_FillFunc)(void *, npy_intp, void *); -typedef int (PyArray_SortFunc)(void *, npy_intp, void *); -typedef int (PyArray_ArgSortFunc)(void *, npy_intp *, npy_intp, void *); +typedef int (PyArray_SortFuncWithArray)(void *, npy_intp, void *); +typedef int (PyArray_ArgSortFuncWithArray)(void *, npy_intp *, npy_intp, void *); typedef int (PyArray_FillWithScalarFunc)(void *, npy_intp, void *, void *); @@ -514,8 +514,8 @@ typedef struct { * Sorting functions * Can be NULL */ - PyArray_SortFunc *sort[NPY_NSORTS]; - PyArray_ArgSortFunc *argsort[NPY_NSORTS]; + PyArray_SortFuncWithArray *sort[NPY_NSORTS]; + PyArray_ArgSortFuncWithArray *argsort[NPY_NSORTS]; /* * Dictionary of additional casting functions diff --git a/numpy/_core/src/multiarray/dtypemeta.c b/numpy/_core/src/multiarray/dtypemeta.c index c0af64c9702b..17bcde03fa68 100644 --- a/numpy/_core/src/multiarray/dtypemeta.c +++ b/numpy/_core/src/multiarray/dtypemeta.c @@ -194,6 +194,7 @@ dtypemeta_initialize_struct_from_spec( NPY_DT_SLOTS(DType)->getitem = NULL; NPY_DT_SLOTS(DType)->get_sort_function = NULL; NPY_DT_SLOTS(DType)->get_argsort_function = NULL; + NPY_DT_SLOTS(DType)->compare = NULL; NPY_DT_SLOTS(DType)->sort_compare = NULL; NPY_DT_SLOTS(DType)->get_clear_loop = NULL; NPY_DT_SLOTS(DType)->get_fill_zero_loop = NULL; @@ -1233,7 +1234,6 @@ dtypemeta_wrap_legacy_descriptor( dtype_class->flags |= NPY_DT_NUMERIC; } - /* If sorting compare not defined, set to arrfunc default */ if (dt_slots->sort_compare == NULL) { if (!NPY_DT_is_legacy(dtype_class) && !NPY_DT_is_user_defined(dtype_class)) { PyErr_SetString(PyExc_RuntimeError, @@ -1241,8 +1241,11 @@ dtypemeta_wrap_legacy_descriptor( Py_DECREF(dtype_class); return -1; } + } - dt_slots->sort_compare = arr_funcs->compare; + /* Auto-fill compare slot with sort-compare as default */ + if (dt_slots->compare == NULL && dt_slots->sort_compare != NULL) { + dt_slots->compare = dt_slots->sort_compare; } if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj, diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index d2807a5fd179..ca4e7886a96b 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -73,7 +73,8 @@ typedef struct { /* DType sorting methods. */ PyArrayDTypeMeta_GetSortFunction *get_sort_function; PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; - PyArray_CompareFunc *sort_compare; + PyArray_CompareFuncWithDescr *compare; + PyArray_CompareFuncWithDescr *sort_compare; /* * The casting implementation (ArrayMethod) to convert between two @@ -97,7 +98,7 @@ typedef struct { // This must be updated if new slots before within_dtype_castingimpl // are added -#define NPY_NUM_DTYPE_SLOTS 14 +#define NPY_NUM_DTYPE_SLOTS 15 #define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22 #define NPY_DT_MAX_ARRFUNCS_SLOT \ NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET @@ -325,7 +326,13 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, return 0; } -static inline PyArray_CompareFunc * +static inline PyArray_CompareFuncWithDescr * +PyArray_GetCompareFunction(PyArray_Descr *descr) +{ + return NPY_DT_SLOTS(NPY_DTYPE(descr))->compare; +} + +static inline PyArray_CompareFuncWithDescr * PyArray_GetSortCompareFunction(PyArray_Descr *descr) { return NPY_DT_SLOTS(NPY_DTYPE(descr))->sort_compare; diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index fab6c10e3db5..863ae2bfe219 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1192,6 +1192,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, */ static int _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, + PyArray_SortFuncWithArray *sort_with_array, PyArray_PartitionFunc *part, npy_intp const *kth, npy_intp nkth) { npy_intp N = PyArray_DIM(op, axis); @@ -1215,6 +1216,10 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, NPY_cast_info to_cast_info = {.func = NULL}; NPY_cast_info from_cast_info = {.func = NULL}; + PyArrayMethod_Context context = { + .descriptors = &descr + }; + NPY_BEGIN_THREADS_DEF; /* Check if there is any sorting to do */ @@ -1293,7 +1298,12 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, */ if (part == NULL) { - ret = sort(bufptr, N, op); + if (sort != NULL) { + ret = sort(bufptr, N, &context, NULL, NULL); + } + else { + ret = sort_with_array(bufptr, N, op); + } if (needs_api && PyErr_Occurred()) { ret = -1; } @@ -1359,6 +1369,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, static PyObject* _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, + PyArray_ArgSortFuncWithArray *argsort_with_array, PyArray_ArgPartitionFunc *argpart, npy_intp const *kth, npy_intp nkth) { @@ -1388,6 +1399,10 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, NPY_ARRAYMETHOD_FLAGS transfer_flags; NPY_cast_info cast_info = {.func = NULL}; + PyArrayMethod_Context context = { + .descriptors = &descr + }; + NPY_BEGIN_THREADS_DEF; PyObject *mem_handler = PyDataMem_GetHandler(); @@ -1483,8 +1498,13 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, } if (argpart == NULL) { - ret = argsort(valptr, idxptr, N, op); - /* Object comparisons may raise an exception */ + if (argsort != NULL) { + ret = argsort(valptr, idxptr, N, &context, NULL, NULL); + } + else { + ret = argsort_with_array(valptr, idxptr, N, op); + } + /* Object comparisons may raise an exception in Python 3 */ if (needs_api && PyErr_Occurred()) { ret = -1; } @@ -1555,6 +1575,8 @@ NPY_NO_EXPORT int PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) { PyArray_SortFunc *sort = NULL; + PyArray_SortFuncWithArray *sort_with_array = NULL; + int n = PyArray_NDIM(op); if (check_and_adjust_axis(&axis, n) < 0) { @@ -1573,21 +1595,21 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort); if (sort == NULL) { - sort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; + sort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; } - if (sort == NULL) { - if (PyArray_GetSortCompareFunction(PyArray_DESCR(op)) != NULL) { + if (sort_with_array == NULL) { + if (PyDataType_GetArrFuncs(PyArray_DESCR(op))->compare != NULL) { switch (which) { default: case NPY_QUICKSORT: - sort = npy_quicksort; + sort_with_array = npy_quicksort; break; case NPY_HEAPSORT: - sort = npy_heapsort; + sort_with_array = npy_heapsort; break; case NPY_STABLESORT: - sort = npy_timsort; + sort_with_array = npy_timsort; break; } } @@ -1598,7 +1620,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) } } - return _new_sortlike(op, axis, sort, NULL, NULL, 0); + return _new_sortlike(op, axis, sort, sort_with_array, NULL, NULL, 0); } @@ -1671,7 +1693,7 @@ PyArray_Partition(PyArrayObject *op, PyArrayObject * ktharray, int axis, { PyArrayObject *kthrvl; PyArray_PartitionFunc *part; - PyArray_SortFunc *sort; + PyArray_SortFuncWithArray *sort; int n = PyArray_NDIM(op); int ret; @@ -1706,7 +1728,7 @@ PyArray_Partition(PyArrayObject *op, PyArrayObject * ktharray, int axis, return -1; } - ret = _new_sortlike(op, axis, sort, part, + ret = _new_sortlike(op, axis, NULL, sort, part, PyArray_DATA(kthrvl), PyArray_SIZE(kthrvl)); Py_DECREF(kthrvl); @@ -1723,26 +1745,27 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) { PyArrayObject *op2; PyArray_ArgSortFunc *argsort = NULL; + PyArray_ArgSortFuncWithArray *argsort_with_array = NULL; PyObject *ret; PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort); if (argsort == NULL) { - argsort = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; + argsort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; } - if (argsort == NULL) { - if (PyArray_GetSortCompareFunction(PyArray_DESCR(op)) != NULL) { + if (argsort_with_array == NULL) { + if (PyDataType_GetArrFuncs(PyArray_DESCR(op))->compare != NULL) { switch (which) { default: case NPY_QUICKSORT: - argsort = npy_aquicksort; + argsort_with_array = npy_aquicksort; break; case NPY_HEAPSORT: - argsort = npy_aheapsort; + argsort_with_array = npy_aheapsort; break; case NPY_STABLESORT: - argsort = npy_atimsort; + argsort_with_array = npy_atimsort; break; } } @@ -1758,7 +1781,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) return NULL; } - ret = _new_argsortlike(op2, axis, argsort, NULL, NULL, 0); + ret = _new_argsortlike(op2, axis, argsort, argsort_with_array, NULL, NULL, 0); Py_DECREF(op2); return ret; @@ -1774,7 +1797,7 @@ PyArray_ArgPartition(PyArrayObject *op, PyArrayObject *ktharray, int axis, { PyArrayObject *op2, *kthrvl; PyArray_ArgPartitionFunc *argpart; - PyArray_ArgSortFunc *argsort; + PyArray_ArgSortFuncWithArray *argsort; PyObject *ret; /* @@ -1812,7 +1835,7 @@ PyArray_ArgPartition(PyArrayObject *op, PyArrayObject *ktharray, int axis, return NULL; } - ret = _new_argsortlike(op2, axis, argsort, argpart, + ret = _new_argsortlike(op2, axis, NULL, argsort, argpart, PyArray_DATA(kthrvl), PyArray_SIZE(kthrvl)); Py_DECREF(kthrvl); @@ -1844,7 +1867,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis) int elsize; int maxelsize; int object = 0; - PyArray_ArgSortFunc *argsort; + PyArray_ArgSortFuncWithArray *argsort; NPY_BEGIN_THREADS_DEF; if (!PySequence_Check(sort_keys) diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 43d87f56b784..d5fe246453f8 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -523,99 +523,6 @@ stringdtype_sort_compare(void *a, void *b, void *arr) { return _compare(a, b, descr, descr); } -int -_stringdtype_sort(void *start, npy_intp num, void *varr, PyArray_SortFunc *sort) { - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(varr); - - NpyString_acquire_allocator(descr); - int result = sort(start, num, varr); - NpyString_release_allocator(descr->allocator); - - return result; -} - -int -_stringdtype_quicksort(void *start, npy_intp num, void *varr) { - return _stringdtype_sort(start, num, varr, &npy_quicksort); -} - -int -_stringdtype_heapsort(void *start, npy_intp num, void *varr) { - return _stringdtype_sort(start, num, varr, &npy_heapsort); -} - -int -_stringdtype_timsort(void *start, npy_intp num, void *varr) { - return _stringdtype_sort(start, num, varr, &npy_timsort); -} - -int -stringdtype_get_sort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_SortFunc **out_sort) { - - switch (sort_kind) { - default: - case NPY_QUICKSORT: - *out_sort = &_stringdtype_quicksort; - break; - case NPY_HEAPSORT: - *out_sort = &_stringdtype_heapsort; - break; - case NPY_STABLESORT: - *out_sort = &_stringdtype_timsort; - break; - } - - return 0; -} - -int -_stringdtype_argsort(void *vv, npy_intp *tosort, npy_intp num, void *varr, - PyArray_ArgSortFunc *argsort) { - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(varr); - - NpyString_acquire_allocator(descr); - int result = argsort(vv, tosort, num, varr); - NpyString_release_allocator(descr->allocator); - - return result; -} - -int -_stringdtype_aquicksort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { - return _stringdtype_argsort(vv, tosort, n, varr, &npy_aquicksort); -} - -int -_stringdtype_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { - return _stringdtype_argsort(vv, tosort, n, varr, &npy_aheapsort); -} - -int -_stringdtype_atimsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { - return _stringdtype_argsort(vv, tosort, n, varr, &npy_atimsort); -} - -int -stringdtype_get_argsort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFunc **out_argsort) { - - switch (sort_kind) { - default: - case NPY_QUICKSORT: - *out_argsort = &npy_aquicksort; - break; - case NPY_HEAPSORT: - *out_argsort = &npy_aheapsort; - break; - case NPY_STABLESORT: - *out_argsort = &npy_atimsort; - break; - } - - return 0; -} - // PyArray_ArgFunc // The max element is the one with the highest unicode code point. int @@ -757,8 +664,6 @@ static PyType_Slot PyArray_StringDType_Slots[] = { {NPY_DT_setitem, &stringdtype_setitem}, {NPY_DT_getitem, &stringdtype_getitem}, {NPY_DT_sort_compare, &stringdtype_sort_compare}, - {NPY_DT_get_sort_function, &stringdtype_get_sort_function}, - {NPY_DT_get_argsort_function, &stringdtype_get_argsort_function}, {NPY_DT_ensure_canonical, &stringdtype_ensure_canonical}, {NPY_DT_PyArray_ArrFuncs_nonzero, &nonzero}, {NPY_DT_PyArray_ArrFuncs_compare, &compare}, diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index d9a1379eebf8..ab95bdb619c4 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -54,7 +54,7 @@ npy_heapsort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; if (elsize == 0) { return 0; /* no need for sorting elements of no size */ } @@ -116,7 +116,7 @@ npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; npy_intp *a, i, j, l, tmp; /* The array needs to be offset by one for heapsort indexing */ diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 2d0ae02dce5a..a68c1340bba5 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -385,7 +385,7 @@ npy_mergesort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; char *pl = (char *)start; char *pr = pl + num * elsize; char *pw; @@ -461,7 +461,7 @@ npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; npy_intp *pl, *pr, *pw; /* Items that have zero size don't make sense to sort */ diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 4fd3a02be27a..ad1c0745551e 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -510,7 +510,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; char *vp; char *pl = (char *)start; char *pr = pl + (num - 1) * elsize; @@ -616,7 +616,7 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 9e7fed5d74cf..196aabee84a0 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -2250,7 +2250,7 @@ npy_timsort(void *start, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; @@ -2686,7 +2686,7 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyArray_GetSortCompareFunction(PyArray_DESCR(arr)); + PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; From 9f09b1378074e8581d430a9cda4d668f947f596e Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 4 Apr 2025 20:42:16 -0400 Subject: [PATCH 16/54] BUG: Fix unnecessarily private function call due to underscore typo --- numpy/_core/src/npysort/heapsort.cpp | 2 +- numpy/_core/src/npysort/mergesort.cpp | 2 +- numpy/_core/src/npysort/quicksort.cpp | 2 +- numpy/_core/src/npysort/timsort.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index ab95bdb619c4..492cd47262d8 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -116,7 +116,7 @@ npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; npy_intp *a, i, j, l, tmp; /* The array needs to be offset by one for heapsort indexing */ diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index a68c1340bba5..2fac0ccfafcd 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -461,7 +461,7 @@ npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; npy_intp *pl, *pr, *pw; /* Items that have zero size don't make sense to sort */ diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index ad1c0745551e..1161b729d63a 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -616,7 +616,7 @@ npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) char *v = (char *)vv; PyArrayObject *arr = (PyArrayObject *)varr; npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 196aabee84a0..0f0f5721e7cf 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -2686,7 +2686,7 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) { PyArrayObject *arr = reinterpret_cast(varr); size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = _PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; From 7aeba26676f3d96ea7fca6cdd41af7b73fa9b54f Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 4 Apr 2025 20:44:28 -0400 Subject: [PATCH 17/54] MAINT: Fix whitespace typos --- numpy/_core/src/multiarray/dtypemeta.h | 2 -- numpy/_core/src/npysort/quicksort.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index ca4e7886a96b..492296b91a7e 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -31,7 +31,6 @@ typedef struct { */ PyArrayDTypeMeta_SetItem *setitem; PyArrayDTypeMeta_GetItem *getitem; - /* * Either NULL or fetches a clearing function. Clearing means deallocating * any referenced data and setting it to a safe state. For Python objects @@ -45,7 +44,6 @@ typedef struct { * Python objects. */ PyArrayMethod_GetTraverseLoop *get_clear_loop; - /* Either NULL or a function that sets a function pointer to a traversal loop that fills an array with zero values appropriate for the dtype. If diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 1161b729d63a..50b48c374d84 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -606,7 +606,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) } free(vp); - + return 0; } From c7481b806029d8bcea53dd776d62a832c4febc73 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 11 Apr 2025 16:27:07 -0400 Subject: [PATCH 18/54] ENH: Allow flexible sorting compare for arr or descr in npy_sort functions --- numpy/_core/src/npysort/heapsort.cpp | 16 +++++--- numpy/_core/src/npysort/npysort_common.h | 20 ++++++++++ numpy/_core/src/npysort/quicksort.cpp | 16 +++++--- numpy/_core/src/npysort/timsort.cpp | 48 +++++++++++++----------- 4 files changed, 66 insertions(+), 34 deletions(-) diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 492cd47262d8..5ee7c1542581 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -52,9 +52,11 @@ NPY_NO_EXPORT int npy_heapsort(void *start, npy_intp num, void *varr) { - PyArrayObject *arr = (PyArrayObject *)varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp elsize; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + if (elsize == 0) { return 0; /* no need for sorting elements of no size */ } @@ -114,9 +116,11 @@ NPY_NO_EXPORT int npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) { char *v = (char *)vv; - PyArrayObject *arr = (PyArrayObject *)varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp elsize; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + npy_intp *a, i, j, l, tmp; /* The array needs to be offset by one for heapsort indexing */ diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 0680ae52afe3..6f4a8d743177 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -10,6 +10,26 @@ extern "C" { #endif +static inline void +fill_sort_data_from_arr_or_descr(void *arr_or_descr, void **out_arr_or_descr, + npy_intp *elsize, PyArray_CompareFunc **out_cmp) +{ + if (PyArray_Check(arr_or_descr)) { + PyArrayObject *arr = (PyArrayObject *)arr_or_descr; + *out_arr_or_descr = arr; + *elsize = PyArray_ITEMSIZE(arr); + *out_cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + } + else { + PyArray_Descr *descr = (PyArray_Descr *)arr_or_descr; + *out_arr_or_descr = descr; + *elsize = PyDataType_ELSIZE(descr); + *out_cmp = (PyArray_CompareFunc *)PyArray_GetSortCompareFunction(descr); + } +} + + + /* ***************************************************************************** ** SWAP MACROS ** diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 50b48c374d84..3ddda8ad0e0b 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -508,9 +508,11 @@ string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, void *varr) NPY_NO_EXPORT int npy_quicksort(void *start, npy_intp num, void *varr) { - PyArrayObject *arr = (PyArrayObject *)varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp elsize; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + char *vp; char *pl = (char *)start; char *pr = pl + (num - 1) * elsize; @@ -614,9 +616,11 @@ NPY_NO_EXPORT int npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) { char *v = (char *)vv; - PyArrayObject *arr = (PyArrayObject *)varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp elsize; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + char *vp; npy_intp *pl = tosort; npy_intp *pr = tosort + num - 1; diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 0f0f5721e7cf..62b87d00dcf0 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -1878,7 +1878,7 @@ resize_buffer_char(buffer_char *buffer, npy_intp new_size) static npy_intp npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, char *vp, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + size_t len, PyArray_CompareFunc *cmp, void *py_arr) { npy_intp sz; char *pl, *pi, *pj, *pr; @@ -1939,7 +1939,7 @@ npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun, char *vp, static npy_intp npy_gallop_right(const char *arr, const npy_intp size, const char *key, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + size_t len, PyArray_CompareFunc *cmp, void *py_arr) { npy_intp last_ofs, ofs, m; @@ -1984,7 +1984,7 @@ npy_gallop_right(const char *arr, const npy_intp size, const char *key, static npy_intp npy_gallop_left(const char *arr, const npy_intp size, const char *key, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + size_t len, PyArray_CompareFunc *cmp, void *py_arr) { npy_intp last_ofs, ofs, l, m, r; @@ -2031,7 +2031,7 @@ npy_gallop_left(const char *arr, const npy_intp size, const char *key, static void npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + size_t len, PyArray_CompareFunc *cmp, void *py_arr) { char *end = p2 + l2 * len; memcpy(p3, p1, sizeof(char) * l1 * len); @@ -2060,7 +2060,7 @@ npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, static void npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, - size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + size_t len, PyArray_CompareFunc *cmp, void *py_arr) { npy_intp ofs; char *start = p1 - len; @@ -2095,7 +2095,7 @@ npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2, char *p3, static int npy_merge_at(char *arr, const run *stack, const npy_intp at, buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { int ret; npy_intp s1, l1, s2, l2, k; @@ -2145,7 +2145,7 @@ npy_merge_at(char *arr, const run *stack, const npy_intp at, static int npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { int ret; npy_intp A, B, C, top; @@ -2205,7 +2205,7 @@ npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr, static int npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, buffer_char *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { int ret; npy_intp top = *stack_ptr; @@ -2248,9 +2248,11 @@ npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, NPY_NO_EXPORT int npy_timsort(void *start, npy_intp num, void *varr) { - PyArrayObject *arr = reinterpret_cast(varr); - size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp len; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_descr(varr, &arr, &len, &cmp); + int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; @@ -2313,7 +2315,7 @@ npy_timsort(void *start, npy_intp num, void *varr) static npy_intp npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, npy_intp minrun, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { npy_intp sz; npy_intp vi; @@ -2379,7 +2381,7 @@ npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num, static npy_intp npy_agallop_left(const char *arr, const npy_intp *tosort, const npy_intp size, const char *key, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { npy_intp last_ofs, ofs, l, m, r; @@ -2428,7 +2430,7 @@ npy_agallop_left(const char *arr, const npy_intp *tosort, const npy_intp size, static npy_intp npy_agallop_right(const char *arr, const npy_intp *tosort, const npy_intp size, const char *key, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { npy_intp last_ofs, ofs, m; @@ -2474,7 +2476,7 @@ npy_agallop_right(const char *arr, const npy_intp *tosort, const npy_intp size, static void npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, npy_intp *p3, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + PyArray_CompareFunc *cmp, void *py_arr) { npy_intp *end = p2 + l2; memcpy(p3, p1, sizeof(npy_intp) * l1); @@ -2498,7 +2500,7 @@ npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, static void npy_amerge_right(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, npy_intp l2, npy_intp *p3, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + PyArray_CompareFunc *cmp, void *py_arr) { npy_intp ofs; npy_intp *start = p1 - 1; @@ -2527,7 +2529,7 @@ npy_amerge_right(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2, static int npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, const npy_intp at, buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { int ret; npy_intp s1, l1, s2, l2, k; @@ -2577,7 +2579,7 @@ npy_amerge_at(char *arr, npy_intp *tosort, const run *stack, const npy_intp at, static int npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, buffer_intp *buffer, size_t len, PyArray_CompareFunc *cmp, - PyArrayObject *py_arr) + void *py_arr) { int ret; npy_intp A, B, C, top; @@ -2638,7 +2640,7 @@ npy_atry_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, static int npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, npy_intp *stack_ptr, buffer_intp *buffer, size_t len, - PyArray_CompareFunc *cmp, PyArrayObject *py_arr) + PyArray_CompareFunc *cmp, void *py_arr) { int ret; npy_intp top = *stack_ptr; @@ -2684,9 +2686,11 @@ npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, NPY_NO_EXPORT int npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) { - PyArrayObject *arr = reinterpret_cast(varr); - size_t len = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp len; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_descr(varr, &arr, &len, &cmp); + int ret; npy_intp l, n, stack_ptr, minrun; run stack[TIMSORT_STACK_SIZE]; From aa3415a4a484110ddd38fcaa25686292a1acd727 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 11 Apr 2025 17:17:02 -0400 Subject: [PATCH 19/54] ENH: Add new sort func implementations and use in stringdtype --- numpy/_core/src/common/npy_sort.h.src | 17 +++ numpy/_core/src/multiarray/item_selection.c | 34 +++++ .../_core/src/multiarray/stringdtype/dtype.c | 118 +++++++++++++++++- numpy/_core/src/npysort/heapsort.cpp | 20 ++- numpy/_core/src/npysort/mergesort.cpp | 18 +++ numpy/_core/src/npysort/npysort_common.h | 29 ++++- numpy/_core/src/npysort/quicksort.cpp | 18 +++ numpy/_core/src/npysort/timsort.cpp | 18 +++ 8 files changed, 266 insertions(+), 6 deletions(-) diff --git a/numpy/_core/src/common/npy_sort.h.src b/numpy/_core/src/common/npy_sort.h.src index d6e4357225a8..1c93ee566eba 100644 --- a/numpy/_core/src/common/npy_sort.h.src +++ b/numpy/_core/src/common/npy_sort.h.src @@ -5,6 +5,7 @@ #include #include #include +#include #define NPY_ENOMEM 1 #define NPY_ECOMP 2 @@ -97,6 +98,22 @@ NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void * ***************************************************************************** */ +NPY_NO_EXPORT int npy_quicksort_with_context(void *vec, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_heapsort_with_context(void *vec, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_mergesort_with_context(void *vec, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_timsort_with_context(void *vec, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_aquicksort_with_context(void *vec, npy_intp *ind, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_aheapsort_with_context(void *vec, npy_intp *ind, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_amergesort_with_context(void *vec, npy_intp *ind, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_atimsort_with_context(void *vec, npy_intp *ind, npy_intp cnt, + PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); NPY_NO_EXPORT int npy_quicksort(void *vec, npy_intp cnt, void *arr); NPY_NO_EXPORT int npy_heapsort(void *vec, npy_intp cnt, void *arr); diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 863ae2bfe219..64f3b95f39a3 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1599,6 +1599,23 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) } if (sort_with_array == NULL) { + if (PyArray_GetSortCompareFunction(PyArray_DESCR(op)) != NULL) { + switch (which) { + default: + case NPY_QUICKSORT: + sort = npy_quicksort_with_context; + break; + case NPY_HEAPSORT: + sort = npy_heapsort_with_context; + break; + case NPY_STABLESORT: + sort = npy_timsort_with_context; + break; + } + } + } + + if (sort == NULL) { if (PyDataType_GetArrFuncs(PyArray_DESCR(op))->compare != NULL) { switch (which) { default: @@ -1755,6 +1772,23 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) } if (argsort_with_array == NULL) { + if (PyArray_GetSortCompareFunction(PyArray_DESCR(op)) != NULL) { + switch (which) { + default: + case NPY_QUICKSORT: + argsort = npy_aquicksort_with_context; + break; + case NPY_HEAPSORT: + argsort = npy_aheapsort_with_context; + break; + case NPY_STABLESORT: + argsort = npy_atimsort_with_context; + break; + } + } + } + + if (argsort == NULL) { if (PyDataType_GetArrFuncs(PyArray_DESCR(op))->compare != NULL) { switch (which) { default: diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index d5fe246453f8..1007afdc858e 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -518,9 +518,121 @@ _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, } static int -stringdtype_sort_compare(void *a, void *b, void *arr) { - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)PyArray_DESCR(arr); - return _compare(a, b, descr, descr); +stringdtype_sort_compare(void *a, void *b, PyArray_Descr *descr) { + PyArray_StringDTypeObject *string_descr = (PyArray_StringDTypeObject *)descr; + return _compare(a, b, string_descr, string_descr); +} + +int +_stringdtype_sort(void *start, npy_intp num, PyArrayMethod_Context *context, + NpyAuxData *auxdata, NpyAuxData **out_auxdata, + PyArray_SortFunc *sort) { + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; + + NpyString_acquire_allocator(descr); + int result = sort(start, num, context, auxdata, out_auxdata); + NpyString_release_allocator(descr->allocator); + + return result; +} + +int +_stringdtype_quicksort(void *start, npy_intp num, PyArrayMethod_Context *context, + NpyAuxData *auxdata, NpyAuxData **out_auxdata) { + return _stringdtype_sort(start, num, context, auxdata, out_auxdata, + &npy_quicksort_with_context); +} + +int +_stringdtype_heapsort(void *start, npy_intp num, PyArrayMethod_Context *context, + NpyAuxData *auxdata, NpyAuxData **out_auxdata) { + return _stringdtype_sort(start, num, context, auxdata, out_auxdata, + &npy_heapsort_with_context); +} + +int +_stringdtype_timsort(void *start, npy_intp num, PyArrayMethod_Context *context, + NpyAuxData *auxdata, NpyAuxData **out_auxdata) { + return _stringdtype_sort(start, num, context, auxdata, out_auxdata, + &npy_timsort_with_context); +} + +int +stringdtype_get_sort_function(PyArray_Descr *descr, + NPY_SORTKIND sort_kind, int descending, PyArray_SortFunc **out_sort) { + + switch (sort_kind) { + default: + case NPY_QUICKSORT: + *out_sort = &_stringdtype_quicksort; + break; + case NPY_HEAPSORT: + *out_sort = &_stringdtype_heapsort; + break; + case NPY_STABLESORT: + *out_sort = &_stringdtype_timsort; + break; + } + + return 0; +} + +int +_stringdtype_argsort(void *vv, npy_intp *tosort, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata, + PyArray_ArgSortFunc *argsort) { + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; + + NpyString_acquire_allocator(descr); + int result = argsort(vv, tosort, num, context, auxdata, out_auxdata); + NpyString_release_allocator(descr->allocator); + + return result; +} + +int +_stringdtype_aquicksort(void *vv, npy_intp *tosort, npy_intp n, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) { + return _stringdtype_argsort(vv, tosort, n, context, auxdata, out_auxdata, + &npy_aquicksort_with_context); +} + +int +_stringdtype_aheapsort(void *vv, npy_intp *tosort, npy_intp n, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) { + return _stringdtype_argsort(vv, tosort, n, context, auxdata, out_auxdata, + &npy_aheapsort_with_context); +} + +int +_stringdtype_atimsort(void *vv, npy_intp *tosort, npy_intp n, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) { + return _stringdtype_argsort(vv, tosort, n, context, auxdata, out_auxdata, + &npy_atimsort_with_context); +} + +int +stringdtype_get_argsort_function(PyArray_Descr *descr, + NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFunc **out_argsort) { + + switch (sort_kind) { + default: + case NPY_QUICKSORT: + *out_argsort = &_stringdtype_aquicksort; + break; + case NPY_HEAPSORT: + *out_argsort = &_stringdtype_aheapsort; + break; + case NPY_STABLESORT: + *out_argsort = &_stringdtype_atimsort; + break; + } + + return 0; } // PyArray_ArgFunc diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 5ee7c1542581..75d2979cf63b 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -49,6 +49,24 @@ ***************************************************************************** */ +NPY_NO_EXPORT int +npy_heapsort_with_context(void *start, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npysort_with_context(start, num, context, auxdata, + out_auxdata, &npy_heapsort); +} + +NPY_NO_EXPORT int +npy_aheapsort_with_context(void *vv, npy_intp *tosort, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npyasort_with_context(vv, tosort, num, context, auxdata, + out_auxdata, &npy_aheapsort); +} + NPY_NO_EXPORT int npy_heapsort(void *start, npy_intp num, void *varr) { @@ -56,7 +74,7 @@ npy_heapsort(void *start, npy_intp num, void *varr) npy_intp elsize; PyArray_CompareFunc *cmp; fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); - + if (elsize == 0) { return 0; /* no need for sorting elements of no size */ } diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 2fac0ccfafcd..50ad4c17466b 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -335,6 +335,24 @@ string_amergesort_(type *v, npy_intp *tosort, npy_intp num, void *varr) ***************************************************************************** */ +NPY_NO_EXPORT int +npy_mergesort_with_context(void *start, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npysort_with_context(start, num, context, auxdata, + out_auxdata, &npy_mergesort); +} + +NPY_NO_EXPORT int +npy_amergesort_with_context(void *vv, npy_intp *tosort, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npyasort_with_context(vv, tosort, num, context, auxdata, + out_auxdata, &npy_amergesort); +} + static void npy_mergesort0(char *pl, char *pr, char *pw, char *vp, npy_intp elsize, PyArray_CompareFunc *cmp, PyArrayObject *arr) diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 6f4a8d743177..2e78557f0ede 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -10,6 +10,33 @@ extern "C" { #endif + +/* + ***************************************************************************** + ** NEW SORTFUNC HANDLERS ** + ***************************************************************************** + */ + +static inline int +handle_npysort_with_context(void *start, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata, + PyArray_SortFuncWithArray *sort) +{ + PyArray_Descr *descr = context->descriptors[0]; + return sort(start, num, descr); +} + +static inline int +handle_npyasort_with_context(void *vv, npy_intp *tosort, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata, + PyArray_ArgSortFuncWithArray *asort) +{ + PyArray_Descr *descr = context->descriptors[0]; + return asort(vv, tosort, num, descr); +} + static inline void fill_sort_data_from_arr_or_descr(void *arr_or_descr, void **out_arr_or_descr, npy_intp *elsize, PyArray_CompareFunc **out_cmp) @@ -28,8 +55,6 @@ fill_sort_data_from_arr_or_descr(void *arr_or_descr, void **out_arr_or_descr, } } - - /* ***************************************************************************** ** SWAP MACROS ** diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index 3ddda8ad0e0b..c4b6e4adad55 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -505,6 +505,24 @@ string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, void *varr) ***************************************************************************** */ +NPY_NO_EXPORT int +npy_quicksort_with_context(void *start, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npysort_with_context(start, num, context, auxdata, + out_auxdata, &npy_quicksort); +} + +NPY_NO_EXPORT int +npy_aquicksort_with_context(void *vv, npy_intp *tosort, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npyasort_with_context(vv, tosort, num, context, auxdata, + out_auxdata, &npy_aquicksort); +} + NPY_NO_EXPORT int npy_quicksort(void *start, npy_intp num, void *varr) { diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 62b87d00dcf0..10408c8b0b86 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -1851,6 +1851,24 @@ string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr) ***************************************************************************** */ +NPY_NO_EXPORT int +npy_timsort_with_context(void *start, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npysort_with_context(start, num, context, auxdata, + out_auxdata, &npy_timsort); +} + +NPY_NO_EXPORT int +npy_atimsort_with_context(void *vv, npy_intp *tosort, npy_intp num, + PyArrayMethod_Context *context, NpyAuxData *auxdata, + NpyAuxData **out_auxdata) +{ + return handle_npyasort_with_context(vv, tosort, num, context, auxdata, + out_auxdata, &npy_atimsort); +} + typedef struct { char *pw; npy_intp size; From e725ed5241a09f03189cd3b2dc927e193f340c59 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 11 Apr 2025 18:31:31 -0400 Subject: [PATCH 20/54] DOC: Fix missing newline in ctype doc --- doc/source/reference/c-api/array.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 4b5627cfe7db..ca124533e1d3 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3515,7 +3515,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_sort_function -.. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, +.. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort); If defined, sets a custom sorting function for the DType for each of @@ -3523,7 +3523,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_argsort_function -.. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, +.. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, \ npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort); If defined, sets a custom argsorting function for the DType for each of From 6d0ba214ee5368d82af8768f9c226d0200a498ba Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 11 Apr 2025 22:12:58 -0400 Subject: [PATCH 21/54] DOC: Add sortfunc typedef docs --- doc/source/reference/c-api/array.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index ca124533e1d3..3f6a3bc52cc3 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1873,6 +1873,29 @@ described below. pointer. Currently this is used for zero-filling and clearing arrays storing embedded references. +.. c:type:: int (PyArray_SortFunc)( \ + void *start, npy_intp num, PyArrayMethod_Context *context, + NpyAuxData *auxdata, NpyAuxData **out_auxdata) + + A function to sort a buffer of data. The *start* is a pointer to the + beginning of the buffer containing *num* elements. A function of this + type is returned by the `get_sort_function` function in the DType + slots, where *context* is passed in containing the descriptor for the + array. Returns 0 on success, -1 on failure. + +.. c:type:: int (PyArray_ArgSortFunc)( \ + void *start, npy_intp *tosort, npy_intp num, \ + PyArrayMethod_Context *context, NpyAuxData *auxdata, \ + NpyAuxData **out_auxdata) + + A function to arg-sort a buffer of data. The *start* is a pointer to the + beginning of the buffer containing *num* elements. The *tosort* is a + pointer to an array of indices that will be filled in with the + indices of the sorted elements. A function of this type is returned by + the `get_argsort_function` function in the DType slots, where + *context* is passed in containing the descriptor for the array. + Returns 0 on success, -1 on failure. + API Functions and Typedefs ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3562,7 +3585,7 @@ DType API slots but for now we have exposed the legacy Computes a comparison for `numpy.sort`, implements ``PyArray_CompareFunc``. If `NPY_DT_sort_compare` is defined, it will be used instead. This slot may - be deprecated in the future. + be deprecated in the future. .. c:macro:: NPY_DT_PyArray_ArrFuncs_argmax From 23204c5c47852f1aa2ef7dd72c9be1487adcb6b6 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Fri, 11 Apr 2025 23:14:57 -0400 Subject: [PATCH 22/54] DOC: Fix missing newline in ctype doc --- doc/source/reference/c-api/array.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 3f6a3bc52cc3..1a7f5df01672 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1874,7 +1874,7 @@ described below. embedded references. .. c:type:: int (PyArray_SortFunc)( \ - void *start, npy_intp num, PyArrayMethod_Context *context, + void *start, npy_intp num, PyArrayMethod_Context *context, \ NpyAuxData *auxdata, NpyAuxData **out_auxdata) A function to sort a buffer of data. The *start* is a pointer to the From ede3462ebcf96f9a6bb55b1c15e79f193ad28634 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 12 Apr 2025 15:45:18 -0400 Subject: [PATCH 23/54] ENH: Define SortCompareFunc type --- numpy/_core/include/numpy/dtype_api.h | 2 ++ numpy/_core/src/multiarray/dtypemeta.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index c222b84cb327..e124bbc7ba44 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -483,6 +483,8 @@ typedef PyObject *(PyArrayDTypeMeta_GetItem)(PyArray_Descr *, char *); typedef int (PyArray_CompareFuncWithDescr)(const void *, const void *, PyArray_Descr *); +typedef int (PyArray_SortCompareFunc)(const void *, const void *, + PyArray_Descr *); typedef int (PyArray_SortFunc)(void *, npy_intp, PyArrayMethod_Context *, NpyAuxData *, NpyAuxData **); diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 492296b91a7e..c791a45d17a4 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -72,7 +72,7 @@ typedef struct { PyArrayDTypeMeta_GetSortFunction *get_sort_function; PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; PyArray_CompareFuncWithDescr *compare; - PyArray_CompareFuncWithDescr *sort_compare; + PyArray_SortCompareFunc *sort_compare; /* * The casting implementation (ArrayMethod) to convert between two @@ -330,7 +330,7 @@ PyArray_GetCompareFunction(PyArray_Descr *descr) return NPY_DT_SLOTS(NPY_DTYPE(descr))->compare; } -static inline PyArray_CompareFuncWithDescr * +static inline PyArray_SortCompareFunc * PyArray_GetSortCompareFunction(PyArray_Descr *descr) { return NPY_DT_SLOTS(NPY_DTYPE(descr))->sort_compare; From 889ee1161354b0eaac62b95fed375b91c70b6d55 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 13 May 2025 17:59:37 -0400 Subject: [PATCH 24/54] Update dtype sorting signatures: move context, move out auxdata to get_loop --- doc/source/reference/c-api/array.rst | 15 ++--- numpy/_core/include/numpy/dtype_api.h | 16 ++--- numpy/_core/src/common/npy_sort.h.src | 32 +++++----- numpy/_core/src/multiarray/dtypemeta.h | 10 +-- numpy/_core/src/multiarray/item_selection.c | 24 ++++--- .../_core/src/multiarray/stringdtype/dtype.c | 63 +++++++++---------- numpy/_core/src/npysort/heapsort.cpp | 18 +++--- numpy/_core/src/npysort/mergesort.cpp | 16 +++-- numpy/_core/src/npysort/npysort_common.h | 12 ++-- numpy/_core/src/npysort/quicksort.cpp | 18 +++--- numpy/_core/src/npysort/timsort.cpp | 18 +++--- 11 files changed, 116 insertions(+), 126 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 1a7f5df01672..539e6026423f 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1874,8 +1874,8 @@ described below. embedded references. .. c:type:: int (PyArray_SortFunc)( \ - void *start, npy_intp num, PyArrayMethod_Context *context, \ - NpyAuxData *auxdata, NpyAuxData **out_auxdata) + PyArrayMethod_Context *data, void *start, \ + npy_intp num, NpyAuxData *auxdata) A function to sort a buffer of data. The *start* is a pointer to the beginning of the buffer containing *num* elements. A function of this @@ -1884,9 +1884,8 @@ described below. array. Returns 0 on success, -1 on failure. .. c:type:: int (PyArray_ArgSortFunc)( \ - void *start, npy_intp *tosort, npy_intp num, \ - PyArrayMethod_Context *context, NpyAuxData *auxdata, \ - NpyAuxData **out_auxdata) + PyArrayMethod_Context *data, void *start, \ + npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) A function to arg-sort a buffer of data. The *start* is a pointer to the beginning of the buffer containing *num* elements. The *tosort* is a @@ -3539,7 +3538,8 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_sort_function .. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort); + npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort, + NpyAuxData **out_auxdata) If defined, sets a custom sorting function for the DType for each of the sort kinds numpy implements. Returns 0 on success. @@ -3547,7 +3547,8 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_argsort_function .. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort); + npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort, + NpyAuxData **out_auxdata) If defined, sets a custom argsorting function for the DType for each of the sort kinds numpy implements. Returns 0 on success. diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index e124bbc7ba44..5371427ef936 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -485,16 +485,16 @@ typedef int (PyArray_CompareFuncWithDescr)(const void *, const void *, PyArray_Descr *); typedef int (PyArray_SortCompareFunc)(const void *, const void *, PyArray_Descr *); -typedef int (PyArray_SortFunc)(void *, npy_intp, - PyArrayMethod_Context *, NpyAuxData *, - NpyAuxData **); -typedef int (PyArray_ArgSortFunc)(void *, npy_intp *, npy_intp, - PyArrayMethod_Context *, NpyAuxData *, - NpyAuxData **); +typedef int (PyArray_SortFunc)(PyArrayMethod_Context *, + void *, npy_intp, + NpyAuxData *); +typedef int (PyArray_ArgSortFunc)(PyArrayMethod_Context *, + void *, npy_intp *, npy_intp, + NpyAuxData *); typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_SortFunc **); + npy_intp, int, PyArray_SortFunc **, NpyAuxData **); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_ArgSortFunc **); + npy_intp, int, PyArray_ArgSortFunc **, NpyAuxData **); #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ diff --git a/numpy/_core/src/common/npy_sort.h.src b/numpy/_core/src/common/npy_sort.h.src index 1c93ee566eba..52801726d1e6 100644 --- a/numpy/_core/src/common/npy_sort.h.src +++ b/numpy/_core/src/common/npy_sort.h.src @@ -98,22 +98,22 @@ NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void * ***************************************************************************** */ -NPY_NO_EXPORT int npy_quicksort_with_context(void *vec, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_heapsort_with_context(void *vec, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_mergesort_with_context(void *vec, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_timsort_with_context(void *vec, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_aquicksort_with_context(void *vec, npy_intp *ind, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_aheapsort_with_context(void *vec, npy_intp *ind, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_amergesort_with_context(void *vec, npy_intp *ind, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); -NPY_NO_EXPORT int npy_atimsort_with_context(void *vec, npy_intp *ind, npy_intp cnt, - PyArrayMethod_Context *context, NpyAuxData *auxdata, NpyAuxData **auxdata_out); +NPY_NO_EXPORT int npy_quicksort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_heapsort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_mergesort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_timsort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_aquicksort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_aheapsort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_amergesort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); +NPY_NO_EXPORT int npy_atimsort_with_context(PyArrayMethod_Context *context, void *vec, + npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); NPY_NO_EXPORT int npy_quicksort(void *vec, npy_intp cnt, void *arr); NPY_NO_EXPORT int npy_heapsort(void *vec, npy_intp cnt, void *arr); diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index c791a45d17a4..cbddc82a0315 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -300,27 +300,29 @@ PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) static inline int PyArray_GetSortFunction(PyArray_Descr *descr, - NPY_SORTKIND which, int descending, PyArray_SortFunc **out_sort) + NPY_SORTKIND which, int descending, PyArray_SortFunc **out_sort, + NpyAuxData **out_auxdata) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function == NULL) { return -1; } NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function( - descr, which, descending, out_sort); + descr, which, descending, out_sort, out_auxdata); return 0; } static inline int PyArray_GetArgSortFunction(PyArray_Descr *descr, - NPY_SORTKIND which, int descending, PyArray_ArgSortFunc **out_argsort) + NPY_SORTKIND which, int descending, PyArray_ArgSortFunc **out_argsort, + NpyAuxData **out_auxdata) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function == NULL) { return -1; } NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function( - descr, which, descending, out_argsort); + descr, which, descending, out_argsort, out_auxdata); return 0; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 64f3b95f39a3..3107e56ad982 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1192,7 +1192,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, */ static int _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, - PyArray_SortFuncWithArray *sort_with_array, + PyArray_SortFuncWithArray *sort_with_array, NpyAuxData *auxdata, PyArray_PartitionFunc *part, npy_intp const *kth, npy_intp nkth) { npy_intp N = PyArray_DIM(op, axis); @@ -1299,7 +1299,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, if (part == NULL) { if (sort != NULL) { - ret = sort(bufptr, N, &context, NULL, NULL); + ret = sort(&context, bufptr, N, auxdata); } else { ret = sort_with_array(bufptr, N, op); @@ -1370,7 +1370,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, static PyObject* _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, PyArray_ArgSortFuncWithArray *argsort_with_array, - PyArray_ArgPartitionFunc *argpart, + NpyAuxData *auxdata, PyArray_ArgPartitionFunc *argpart, npy_intp const *kth, npy_intp nkth) { npy_intp N = PyArray_DIM(op, axis); @@ -1499,7 +1499,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, if (argpart == NULL) { if (argsort != NULL) { - ret = argsort(valptr, idxptr, N, &context, NULL, NULL); + ret = argsort(&context, valptr, idxptr, N, auxdata); } else { ret = argsort_with_array(valptr, idxptr, N, op); @@ -1577,6 +1577,8 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyArray_SortFunc *sort = NULL; PyArray_SortFuncWithArray *sort_with_array = NULL; + NpyAuxData *auxdata = NULL; + int n = PyArray_NDIM(op); if (check_and_adjust_axis(&axis, n) < 0) { @@ -1592,7 +1594,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) return -1; } - PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort); + PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort, &auxdata); if (sort == NULL) { sort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; @@ -1637,7 +1639,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) } } - return _new_sortlike(op, axis, sort, sort_with_array, NULL, NULL, 0); + return _new_sortlike(op, axis, sort, sort_with_array, auxdata, NULL, NULL, 0); } @@ -1745,7 +1747,7 @@ PyArray_Partition(PyArrayObject *op, PyArrayObject * ktharray, int axis, return -1; } - ret = _new_sortlike(op, axis, NULL, sort, part, + ret = _new_sortlike(op, axis, NULL, sort, NULL, part, PyArray_DATA(kthrvl), PyArray_SIZE(kthrvl)); Py_DECREF(kthrvl); @@ -1765,7 +1767,9 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyArray_ArgSortFuncWithArray *argsort_with_array = NULL; PyObject *ret; - PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort); + NpyAuxData *auxdata = NULL; + + PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort, &auxdata); if (argsort == NULL) { argsort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; @@ -1815,7 +1819,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) return NULL; } - ret = _new_argsortlike(op2, axis, argsort, argsort_with_array, NULL, NULL, 0); + ret = _new_argsortlike(op2, axis, argsort, argsort_with_array, auxdata, NULL, NULL, 0); Py_DECREF(op2); return ret; @@ -1869,7 +1873,7 @@ PyArray_ArgPartition(PyArrayObject *op, PyArrayObject *ktharray, int axis, return NULL; } - ret = _new_argsortlike(op2, axis, NULL, argsort, argpart, + ret = _new_argsortlike(op2, axis, NULL, argsort, NULL, argpart, PyArray_DATA(kthrvl), PyArray_SIZE(kthrvl)); Py_DECREF(kthrvl); diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 1007afdc858e..c311c5c51e03 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -524,42 +524,42 @@ stringdtype_sort_compare(void *a, void *b, PyArray_Descr *descr) { } int -_stringdtype_sort(void *start, npy_intp num, PyArrayMethod_Context *context, - NpyAuxData *auxdata, NpyAuxData **out_auxdata, - PyArray_SortFunc *sort) { +_stringdtype_sort(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata, PyArray_SortFunc *sort) { PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; NpyString_acquire_allocator(descr); - int result = sort(start, num, context, auxdata, out_auxdata); + int result = sort(context, start, num, auxdata); NpyString_release_allocator(descr->allocator); return result; } int -_stringdtype_quicksort(void *start, npy_intp num, PyArrayMethod_Context *context, - NpyAuxData *auxdata, NpyAuxData **out_auxdata) { - return _stringdtype_sort(start, num, context, auxdata, out_auxdata, +_stringdtype_quicksort(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata) { + return _stringdtype_sort(context, start, num, auxdata, &npy_quicksort_with_context); } int -_stringdtype_heapsort(void *start, npy_intp num, PyArrayMethod_Context *context, - NpyAuxData *auxdata, NpyAuxData **out_auxdata) { - return _stringdtype_sort(start, num, context, auxdata, out_auxdata, +_stringdtype_heapsort(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata) { + return _stringdtype_sort(context, start, num, auxdata, &npy_heapsort_with_context); } int -_stringdtype_timsort(void *start, npy_intp num, PyArrayMethod_Context *context, - NpyAuxData *auxdata, NpyAuxData **out_auxdata) { - return _stringdtype_sort(start, num, context, auxdata, out_auxdata, +_stringdtype_timsort(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata) { + return _stringdtype_sort(context, start, num, auxdata, &npy_timsort_with_context); } int stringdtype_get_sort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_SortFunc **out_sort) { + NPY_SORTKIND sort_kind, int descending, PyArray_SortFunc **out_sort, + NpyAuxData **NPY_UNUSED(out_auxdata)) { switch (sort_kind) { default: @@ -578,41 +578,36 @@ stringdtype_get_sort_function(PyArray_Descr *descr, } int -_stringdtype_argsort(void *vv, npy_intp *tosort, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata, - PyArray_ArgSortFunc *argsort) { +_stringdtype_argsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFunc *argsort) { PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; NpyString_acquire_allocator(descr); - int result = argsort(vv, tosort, num, context, auxdata, out_auxdata); + int result = argsort(context, vv, tosort, num, auxdata); NpyString_release_allocator(descr->allocator); return result; } int -_stringdtype_aquicksort(void *vv, npy_intp *tosort, npy_intp n, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) { - return _stringdtype_argsort(vv, tosort, n, context, auxdata, out_auxdata, - &npy_aquicksort_with_context); +_stringdtype_aquicksort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp n, NpyAuxData *auxdata) { + return _stringdtype_argsort(context, vv, tosort, n, auxdata, + &npy_aquicksort_with_context); } int -_stringdtype_aheapsort(void *vv, npy_intp *tosort, npy_intp n, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) { - return _stringdtype_argsort(vv, tosort, n, context, auxdata, out_auxdata, - &npy_aheapsort_with_context); +_stringdtype_aheapsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp n, NpyAuxData *auxdata) { + return _stringdtype_argsort(context, vv, tosort, n, auxdata, + &npy_aheapsort_with_context); } int -_stringdtype_atimsort(void *vv, npy_intp *tosort, npy_intp n, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) { - return _stringdtype_argsort(vv, tosort, n, context, auxdata, out_auxdata, - &npy_atimsort_with_context); +_stringdtype_atimsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp n, NpyAuxData *auxdata) { + return _stringdtype_argsort(context, vv, tosort, n, auxdata, + &npy_atimsort_with_context); } int diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 75d2979cf63b..6ff40499aecc 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -50,21 +50,19 @@ */ NPY_NO_EXPORT int -npy_heapsort_with_context(void *start, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_heapsort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata) { - return handle_npysort_with_context(start, num, context, auxdata, - out_auxdata, &npy_heapsort); + return handle_npysort_with_context(context, start, num, auxdata, + &npy_heapsort); } NPY_NO_EXPORT int -npy_aheapsort_with_context(void *vv, npy_intp *tosort, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_aheapsort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(vv, tosort, num, context, auxdata, - out_auxdata, &npy_aheapsort); + return handle_npyasort_with_context(context, vv, tosort, num, auxdata, + &npy_aheapsort); } NPY_NO_EXPORT int diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 50ad4c17466b..1c40c16e3f87 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -337,20 +337,18 @@ string_amergesort_(type *v, npy_intp *tosort, npy_intp num, void *varr) NPY_NO_EXPORT int npy_mergesort_with_context(void *start, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) + PyArrayMethod_Context *context, NpyAuxData *auxdata) { - return handle_npysort_with_context(start, num, context, auxdata, - out_auxdata, &npy_mergesort); + return handle_npysort_with_context(context, start, num, auxdata, + &npy_mergesort); } NPY_NO_EXPORT int -npy_amergesort_with_context(void *vv, npy_intp *tosort, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_amergesort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(vv, tosort, num, context, auxdata, - out_auxdata, &npy_amergesort); + return handle_npyasort_with_context(context, vv, tosort, num, auxdata, + &npy_amergesort); } static void diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 2e78557f0ede..27584a312a2b 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -18,20 +18,16 @@ extern "C" { */ static inline int -handle_npysort_with_context(void *start, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata, - PyArray_SortFuncWithArray *sort) +handle_npysort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata, PyArray_SortFuncWithArray *sort) { PyArray_Descr *descr = context->descriptors[0]; return sort(start, num, descr); } static inline int -handle_npyasort_with_context(void *vv, npy_intp *tosort, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata, - PyArray_ArgSortFuncWithArray *asort) +handle_npyasort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFuncWithArray *asort) { PyArray_Descr *descr = context->descriptors[0]; return asort(vv, tosort, num, descr); diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index c4b6e4adad55..d6354ec6bd7d 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -506,21 +506,19 @@ string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, void *varr) */ NPY_NO_EXPORT int -npy_quicksort_with_context(void *start, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_quicksort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata) { - return handle_npysort_with_context(start, num, context, auxdata, - out_auxdata, &npy_quicksort); + return handle_npysort_with_context(context, start, num, auxdata, + &npy_quicksort); } NPY_NO_EXPORT int -npy_aquicksort_with_context(void *vv, npy_intp *tosort, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_aquicksort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(vv, tosort, num, context, auxdata, - out_auxdata, &npy_aquicksort); + return handle_npyasort_with_context(context, vv, tosort, num, auxdata, + &npy_aquicksort); } NPY_NO_EXPORT int diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 10408c8b0b86..47b1ba94e7c9 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -1852,21 +1852,19 @@ string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr) */ NPY_NO_EXPORT int -npy_timsort_with_context(void *start, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_timsort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, + NpyAuxData *auxdata) { - return handle_npysort_with_context(start, num, context, auxdata, - out_auxdata, &npy_timsort); + return handle_npysort_with_context(context, start, num, auxdata, + &npy_timsort); } NPY_NO_EXPORT int -npy_atimsort_with_context(void *vv, npy_intp *tosort, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata, - NpyAuxData **out_auxdata) +npy_atimsort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, + npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(vv, tosort, num, context, auxdata, - out_auxdata, &npy_atimsort); + return handle_npyasort_with_context(context, vv, tosort, num, auxdata, + &npy_atimsort); } typedef struct { From beba24247fd6885ff55afa1fccc8b0d134363bf8 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 13 May 2025 18:04:14 -0400 Subject: [PATCH 25/54] MAINT: Check error in Get(Arg)SortFunc using return value --- numpy/_core/src/multiarray/item_selection.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 3107e56ad982..bec12ee77df5 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1594,9 +1594,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) return -1; } - PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort, &auxdata); - - if (sort == NULL) { + if (PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort, &auxdata) < 0) { sort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; } @@ -1769,9 +1767,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) NpyAuxData *auxdata = NULL; - PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort, &auxdata); - - if (argsort == NULL) { + if (PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort, &auxdata) < 0) { argsort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; } From 61e6f16ca3e767855aff605daad54d0c4c548e6a Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 14 May 2025 00:02:04 -0400 Subject: [PATCH 26/54] DOC: Add missing newlines to c-types in array.rst --- doc/source/reference/c-api/array.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 539e6026423f..afb0fcb56bc7 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3538,7 +3538,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_sort_function .. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort, + npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort, \ NpyAuxData **out_auxdata) If defined, sets a custom sorting function for the DType for each of @@ -3547,7 +3547,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_argsort_function .. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort, + npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort, \ NpyAuxData **out_auxdata) If defined, sets a custom argsorting function for the DType for each of From 434005ef3ae3daa42fdab936aa4921e992159914 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 24 May 2025 00:12:03 -0400 Subject: [PATCH 27/54] MAINT: Rename new sort funcs and restore older names for existing public API --- doc/source/reference/c-api/array.rst | 12 +++++----- .../reference/c-api/types-and-structures.rst | 4 ++-- numpy/_core/include/numpy/dtype_api.h | 8 +++---- numpy/_core/include/numpy/ndarraytypes.h | 8 +++---- numpy/_core/src/multiarray/dtypemeta.h | 4 ++-- numpy/_core/src/multiarray/item_selection.c | 22 +++++++++---------- .../_core/src/multiarray/stringdtype/dtype.c | 8 +++---- numpy/_core/src/npysort/npysort_common.h | 4 ++-- 8 files changed, 35 insertions(+), 35 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index afb0fcb56bc7..945150d6a26b 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1873,7 +1873,7 @@ described below. pointer. Currently this is used for zero-filling and clearing arrays storing embedded references. -.. c:type:: int (PyArray_SortFunc)( \ +.. c:type:: int (PyArray_SortFuncWithContext)( \ PyArrayMethod_Context *data, void *start, \ npy_intp num, NpyAuxData *auxdata) @@ -1883,7 +1883,7 @@ described below. slots, where *context* is passed in containing the descriptor for the array. Returns 0 on success, -1 on failure. -.. c:type:: int (PyArray_ArgSortFunc)( \ +.. c:type:: int (PyArray_ArgSortFuncWithContext)( \ PyArrayMethod_Context *data, void *start, \ npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) @@ -3538,7 +3538,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_sort_function .. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_SortFunc **out_sort, \ + npy_intp sort_kind, int descending, PyArray_SortFuncWithContext **out_sort, \ NpyAuxData **out_auxdata) If defined, sets a custom sorting function for the DType for each of @@ -3547,7 +3547,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_argsort_function .. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_ArgSortFunc **out_argsort, \ + npy_intp sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort, \ NpyAuxData **out_auxdata) If defined, sets a custom argsorting function for the DType for each of @@ -3628,7 +3628,7 @@ DType API slots but for now we have exposed the legacy .. c:macro:: NPY_DT_PyArray_ArrFuncs_sort - An array of PyArray_SortFunc of length ``NPY_NSORTS``. If set, allows + An array of PyArray_SortFuncWithContext of length ``NPY_NSORTS``. If set, allows defining custom sorting implementations for each of the sorting algorithms numpy implements. If `NPY_DT_get_sort_function` is defined, it will be used instead. This slot may be deprecated in the @@ -3636,7 +3636,7 @@ DType API slots but for now we have exposed the legacy .. c:macro:: NPY_DT_PyArray_ArrFuncs_argsort - An array of PyArray_ArgSortFunc of length ``NPY_NSORTS``. If set, + An array of PyArray_ArgSortFuncWithContext of length ``NPY_NSORTS``. If set, allows defining custom argsorting implementations for each of the sorting algorithms numpy implements. If `NPY_DT_get_argsort_function` is defined, it will be used instead. This slot may be deprecated in diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 3f16b5f4dbc4..5561210657c8 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -494,8 +494,8 @@ PyArray_ArrFuncs PyArray_NonzeroFunc *nonzero; PyArray_FillFunc *fill; PyArray_FillWithScalarFunc *fillwithscalar; - PyArray_SortFunc *sort[NPY_NSORTS]; - PyArray_ArgSortFunc *argsort[NPY_NSORTS]; + PyArray_SortFuncWithContext *sort[NPY_NSORTS]; + PyArray_ArgSortFuncWithContext *argsort[NPY_NSORTS]; PyObject *castdict; PyArray_ScalarKindFunc *scalarkind; int **cancastscalarkindto; diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 5371427ef936..acb23b7f7bb9 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -485,16 +485,16 @@ typedef int (PyArray_CompareFuncWithDescr)(const void *, const void *, PyArray_Descr *); typedef int (PyArray_SortCompareFunc)(const void *, const void *, PyArray_Descr *); -typedef int (PyArray_SortFunc)(PyArrayMethod_Context *, +typedef int (PyArray_SortFuncWithContext)(PyArrayMethod_Context *, void *, npy_intp, NpyAuxData *); -typedef int (PyArray_ArgSortFunc)(PyArrayMethod_Context *, +typedef int (PyArray_ArgSortFuncWithContext)(PyArrayMethod_Context *, void *, npy_intp *, npy_intp, NpyAuxData *); typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_SortFunc **, NpyAuxData **); + npy_intp, int, PyArray_SortFuncWithContext **, NpyAuxData **); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_ArgSortFunc **, NpyAuxData **); + npy_intp, int, PyArray_ArgSortFuncWithContext **, NpyAuxData **); #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ diff --git a/numpy/_core/include/numpy/ndarraytypes.h b/numpy/_core/include/numpy/ndarraytypes.h index ad28dd953549..baa42406ac88 100644 --- a/numpy/_core/include/numpy/ndarraytypes.h +++ b/numpy/_core/include/numpy/ndarraytypes.h @@ -422,8 +422,8 @@ typedef int (PyArray_FromStrFunc)(char *s, void *dptr, char **endptr, typedef int (PyArray_FillFunc)(void *, npy_intp, void *); -typedef int (PyArray_SortFuncWithArray)(void *, npy_intp, void *); -typedef int (PyArray_ArgSortFuncWithArray)(void *, npy_intp *, npy_intp, void *); +typedef int (PyArray_SortFunc)(void *, npy_intp, void *); +typedef int (PyArray_ArgSortFunc)(void *, npy_intp *, npy_intp, void *); typedef int (PyArray_FillWithScalarFunc)(void *, npy_intp, void *, void *); @@ -514,8 +514,8 @@ typedef struct { * Sorting functions * Can be NULL */ - PyArray_SortFuncWithArray *sort[NPY_NSORTS]; - PyArray_ArgSortFuncWithArray *argsort[NPY_NSORTS]; + PyArray_SortFunc *sort[NPY_NSORTS]; + PyArray_ArgSortFunc *argsort[NPY_NSORTS]; /* * Dictionary of additional casting functions diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index cbddc82a0315..91869731afad 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -300,7 +300,7 @@ PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) static inline int PyArray_GetSortFunction(PyArray_Descr *descr, - NPY_SORTKIND which, int descending, PyArray_SortFunc **out_sort, + NPY_SORTKIND which, int descending, PyArray_SortFuncWithContext **out_sort, NpyAuxData **out_auxdata) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function == NULL) { @@ -314,7 +314,7 @@ PyArray_GetSortFunction(PyArray_Descr *descr, static inline int PyArray_GetArgSortFunction(PyArray_Descr *descr, - NPY_SORTKIND which, int descending, PyArray_ArgSortFunc **out_argsort, + NPY_SORTKIND which, int descending, PyArray_ArgSortFuncWithContext **out_argsort, NpyAuxData **out_auxdata) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function == NULL) { diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index bec12ee77df5..683dd0d55570 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1191,8 +1191,8 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, * over all but the desired sorting axis. */ static int -_new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, - PyArray_SortFuncWithArray *sort_with_array, NpyAuxData *auxdata, +_new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, + PyArray_SortFunc *sort_with_array, NpyAuxData *auxdata, PyArray_PartitionFunc *part, npy_intp const *kth, npy_intp nkth) { npy_intp N = PyArray_DIM(op, axis); @@ -1368,8 +1368,8 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, } static PyObject* -_new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, - PyArray_ArgSortFuncWithArray *argsort_with_array, +_new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *argsort, + PyArray_ArgSortFunc *argsort_with_array, NpyAuxData *auxdata, PyArray_ArgPartitionFunc *argpart, npy_intp const *kth, npy_intp nkth) { @@ -1574,8 +1574,8 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, NPY_NO_EXPORT int PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) { - PyArray_SortFunc *sort = NULL; - PyArray_SortFuncWithArray *sort_with_array = NULL; + PyArray_SortFuncWithContext *sort = NULL; + PyArray_SortFunc *sort_with_array = NULL; NpyAuxData *auxdata = NULL; @@ -1710,7 +1710,7 @@ PyArray_Partition(PyArrayObject *op, PyArrayObject * ktharray, int axis, { PyArrayObject *kthrvl; PyArray_PartitionFunc *part; - PyArray_SortFuncWithArray *sort; + PyArray_SortFunc *sort; int n = PyArray_NDIM(op); int ret; @@ -1761,8 +1761,8 @@ NPY_NO_EXPORT PyObject * PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) { PyArrayObject *op2; - PyArray_ArgSortFunc *argsort = NULL; - PyArray_ArgSortFuncWithArray *argsort_with_array = NULL; + PyArray_ArgSortFuncWithContext *argsort = NULL; + PyArray_ArgSortFunc *argsort_with_array = NULL; PyObject *ret; NpyAuxData *auxdata = NULL; @@ -1831,7 +1831,7 @@ PyArray_ArgPartition(PyArrayObject *op, PyArrayObject *ktharray, int axis, { PyArrayObject *op2, *kthrvl; PyArray_ArgPartitionFunc *argpart; - PyArray_ArgSortFuncWithArray *argsort; + PyArray_ArgSortFunc *argsort; PyObject *ret; /* @@ -1901,7 +1901,7 @@ PyArray_LexSort(PyObject *sort_keys, int axis) int elsize; int maxelsize; int object = 0; - PyArray_ArgSortFuncWithArray *argsort; + PyArray_ArgSortFunc *argsort; NPY_BEGIN_THREADS_DEF; if (!PySequence_Check(sort_keys) diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index c311c5c51e03..c9ee00f0b465 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -525,7 +525,7 @@ stringdtype_sort_compare(void *a, void *b, PyArray_Descr *descr) { int _stringdtype_sort(PyArrayMethod_Context *context, void *start, npy_intp num, - NpyAuxData *auxdata, PyArray_SortFunc *sort) { + NpyAuxData *auxdata, PyArray_SortFuncWithContext *sort) { PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; NpyString_acquire_allocator(descr); @@ -558,7 +558,7 @@ _stringdtype_timsort(PyArrayMethod_Context *context, void *start, npy_intp num, int stringdtype_get_sort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_SortFunc **out_sort, + NPY_SORTKIND sort_kind, int descending, PyArray_SortFuncWithContext **out_sort, NpyAuxData **NPY_UNUSED(out_auxdata)) { switch (sort_kind) { @@ -579,7 +579,7 @@ stringdtype_get_sort_function(PyArray_Descr *descr, int _stringdtype_argsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, - npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFunc *argsort) { + npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFuncWithContext *argsort) { PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; NpyString_acquire_allocator(descr); @@ -612,7 +612,7 @@ _stringdtype_atimsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort int stringdtype_get_argsort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFunc **out_argsort) { + NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort) { switch (sort_kind) { default: diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 27584a312a2b..cfaefa3c449d 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -19,7 +19,7 @@ extern "C" { static inline int handle_npysort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, - NpyAuxData *auxdata, PyArray_SortFuncWithArray *sort) + NpyAuxData *auxdata, PyArray_SortFunc *sort) { PyArray_Descr *descr = context->descriptors[0]; return sort(start, num, descr); @@ -27,7 +27,7 @@ handle_npysort_with_context(PyArrayMethod_Context *context, void *start, npy_int static inline int handle_npyasort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, - npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFuncWithArray *asort) + npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFunc *asort) { PyArray_Descr *descr = context->descriptors[0]; return asort(vv, tosort, num, descr); From 3c168e45f2311b12b61bf4c58d83bf8919c0feee Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 24 May 2025 00:12:47 -0400 Subject: [PATCH 28/54] MAINT: Rename start pointer in new sort func documentation to data --- doc/source/reference/c-api/array.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 945150d6a26b..52e174a78504 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1874,20 +1874,20 @@ described below. embedded references. .. c:type:: int (PyArray_SortFuncWithContext)( \ - PyArrayMethod_Context *data, void *start, \ + PyArrayMethod_Context *context, void *data, \ npy_intp num, NpyAuxData *auxdata) - A function to sort a buffer of data. The *start* is a pointer to the + A function to sort a buffer of data. The *data* is a pointer to the beginning of the buffer containing *num* elements. A function of this type is returned by the `get_sort_function` function in the DType slots, where *context* is passed in containing the descriptor for the array. Returns 0 on success, -1 on failure. .. c:type:: int (PyArray_ArgSortFuncWithContext)( \ - PyArrayMethod_Context *data, void *start, \ + PyArrayMethod_Context *context, void *data, \ npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) - - A function to arg-sort a buffer of data. The *start* is a pointer to the + + A function to arg-sort a buffer of data. The *data* is a pointer to the beginning of the buffer containing *num* elements. The *tosort* is a pointer to an array of indices that will be filled in with the indices of the sorted elements. A function of this type is returned by From d9d987242a4ec63bba841f093f21c46b1e44d09e Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 28 May 2025 06:32:30 -0400 Subject: [PATCH 29/54] ENH: Add flags to new get_(arg)sort_function --- doc/source/reference/c-api/array.rst | 4 ++-- numpy/_core/include/numpy/dtype_api.h | 6 ++++-- numpy/_core/src/multiarray/dtypemeta.h | 8 ++++---- numpy/_core/src/multiarray/item_selection.c | 6 ++++-- numpy/_core/src/multiarray/stringdtype/dtype.c | 8 +++++--- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 52e174a78504..9f09ec545652 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3539,7 +3539,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ npy_intp sort_kind, int descending, PyArray_SortFuncWithContext **out_sort, \ - NpyAuxData **out_auxdata) + NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) If defined, sets a custom sorting function for the DType for each of the sort kinds numpy implements. Returns 0 on success. @@ -3548,7 +3548,7 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, \ npy_intp sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort, \ - NpyAuxData **out_auxdata) + NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) If defined, sets a custom argsorting function for the DType for each of the sort kinds numpy implements. Returns 0 on success. diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index acb23b7f7bb9..78477c635798 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -493,8 +493,10 @@ typedef int (PyArray_ArgSortFuncWithContext)(PyArrayMethod_Context *, NpyAuxData *); typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_SortFuncWithContext **, NpyAuxData **); + npy_intp, int, PyArray_SortFuncWithContext **, NpyAuxData **, + NPY_ARRAYMETHOD_FLAGS *out_flags); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_ArgSortFuncWithContext **, NpyAuxData **); + npy_intp, int, PyArray_ArgSortFuncWithContext **, NpyAuxData **, + NPY_ARRAYMETHOD_FLAGS *out_flags); #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 91869731afad..c57e324f3c81 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -301,28 +301,28 @@ PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) static inline int PyArray_GetSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, int descending, PyArray_SortFuncWithContext **out_sort, - NpyAuxData **out_auxdata) + NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function == NULL) { return -1; } NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function( - descr, which, descending, out_sort, out_auxdata); + descr, which, descending, out_sort, out_auxdata, out_flags); return 0; } static inline int PyArray_GetArgSortFunction(PyArray_Descr *descr, NPY_SORTKIND which, int descending, PyArray_ArgSortFuncWithContext **out_argsort, - NpyAuxData **out_auxdata) + NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function == NULL) { return -1; } NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function( - descr, which, descending, out_argsort, out_auxdata); + descr, which, descending, out_argsort, out_auxdata, out_flags); return 0; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 683dd0d55570..e2492025ba50 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1578,6 +1578,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyArray_SortFunc *sort_with_array = NULL; NpyAuxData *auxdata = NULL; + NPY_ARRAYMETHOD_FLAGS flags = 0; int n = PyArray_NDIM(op); @@ -1594,7 +1595,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) return -1; } - if (PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort, &auxdata) < 0) { + if (PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort, &auxdata, &flags) < 0) { sort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; } @@ -1766,8 +1767,9 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) PyObject *ret; NpyAuxData *auxdata = NULL; + NPY_ARRAYMETHOD_FLAGS flags = 0; - if (PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort, &auxdata) < 0) { + if (PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort, &auxdata, &flags) < 0) { argsort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; } diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index c9ee00f0b465..5c52564de10f 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -559,7 +559,7 @@ _stringdtype_timsort(PyArrayMethod_Context *context, void *start, npy_intp num, int stringdtype_get_sort_function(PyArray_Descr *descr, NPY_SORTKIND sort_kind, int descending, PyArray_SortFuncWithContext **out_sort, - NpyAuxData **NPY_UNUSED(out_auxdata)) { + NpyAuxData **NPY_UNUSED(out_auxdata), NPY_ARRAYMETHOD_FLAGS *out_flags) { switch (sort_kind) { default: @@ -573,7 +573,7 @@ stringdtype_get_sort_function(PyArray_Descr *descr, *out_sort = &_stringdtype_timsort; break; } - + *out_flags = NPY_METH_REQUIRES_PYAPI; return 0; } @@ -612,7 +612,8 @@ _stringdtype_atimsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort int stringdtype_get_argsort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort) { + NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort, + NpyAuxData **NPY_UNUSED(out_auxdata), NPY_ARRAYMETHOD_FLAGS *out_flags) { switch (sort_kind) { default: @@ -626,6 +627,7 @@ stringdtype_get_argsort_function(PyArray_Descr *descr, *out_argsort = &_stringdtype_atimsort; break; } + *out_flags = NPY_METH_REQUIRES_PYAPI; return 0; } From a4484751d9c888f64fd95db82450fa205bddc448 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 28 May 2025 06:33:38 -0400 Subject: [PATCH 30/54] DOC: Mention new sort func buffers to be contiguous --- doc/source/reference/c-api/array.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 9f09ec545652..9f2b513366fc 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1878,8 +1878,8 @@ described below. npy_intp num, NpyAuxData *auxdata) A function to sort a buffer of data. The *data* is a pointer to the - beginning of the buffer containing *num* elements. A function of this - type is returned by the `get_sort_function` function in the DType + beginning of the contiguous buffer containing *num* elements. A function + of this type is returned by the `get_sort_function` function in the DType slots, where *context* is passed in containing the descriptor for the array. Returns 0 on success, -1 on failure. From ab8f3948871d0496c1881a72c18c70c989f56d4f Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 31 May 2025 21:14:33 -0400 Subject: [PATCH 31/54] ENH: Elaborate new sort context, add reverse sorting and non position with unordered returns, and restructure legacy wrappers --- doc/source/reference/c-api/array.rst | 4 +- numpy/_core/include/numpy/dtype_api.h | 39 ++++++++-- numpy/_core/src/common/npy_sort.h.src | 25 ++++-- numpy/_core/src/multiarray/dtypemeta.h | 4 +- numpy/_core/src/multiarray/item_selection.c | 18 +++-- .../_core/src/multiarray/stringdtype/dtype.c | 38 +++++---- numpy/_core/src/npysort/heapsort.cpp | 28 ++++--- numpy/_core/src/npysort/mergesort.cpp | 46 +++++++---- numpy/_core/src/npysort/npysort_common.h | 78 +++++++++++++------ numpy/_core/src/npysort/quicksort.cpp | 28 ++++--- numpy/_core/src/npysort/timsort.cpp | 30 ++++--- 11 files changed, 231 insertions(+), 107 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 9f2b513366fc..ffdf4474a11f 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1874,7 +1874,7 @@ described below. embedded references. .. c:type:: int (PyArray_SortFuncWithContext)( \ - PyArrayMethod_Context *context, void *data, \ + PyArrayMethod_SortContext *context, void *data, \ npy_intp num, NpyAuxData *auxdata) A function to sort a buffer of data. The *data* is a pointer to the @@ -1884,7 +1884,7 @@ described below. array. Returns 0 on success, -1 on failure. .. c:type:: int (PyArray_ArgSortFuncWithContext)( \ - PyArrayMethod_Context *context, void *data, \ + PyArrayMethod_SortContext *context, void *data, \ npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) A function to arg-sort a buffer of data. The *data* is a pointer to the diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 78477c635798..b41b5b8afe48 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -481,14 +481,39 @@ typedef PyArray_Descr *(PyArrayDTypeMeta_FinalizeDescriptor)(PyArray_Descr *dtyp typedef int(PyArrayDTypeMeta_SetItem)(PyArray_Descr *, PyObject *, char *); typedef PyObject *(PyArrayDTypeMeta_GetItem)(PyArray_Descr *, char *); -typedef int (PyArray_CompareFuncWithDescr)(const void *, const void *, - PyArray_Descr *); -typedef int (PyArray_SortCompareFunc)(const void *, const void *, - PyArray_Descr *); -typedef int (PyArray_SortFuncWithContext)(PyArrayMethod_Context *, - void *, npy_intp, +typedef enum { + NPY_LESS = -1, + NPY_EQUAL = 0, + NPY_GREATER = 1, + NPY_UNORDERED_LEFT = 2, + NPY_UNORDERED_RIGHT = 3, + NPY_UNORDERED_BOTH = 4, +} NPY_COMPARE_RESULT; + +typedef struct PyArrayMethod_SortContext_tag PyArrayMethod_SortContext; + +typedef NPY_COMPARE_RESULT (PyArray_CompareFuncWithContext)( + const void *a, const void *b, PyArrayMethod_SortContext *context); +typedef NPY_COMPARE_RESULT (PyArray_SortCompareFunc)( + const void *a, const void *b, PyArrayMethod_SortContext *context); + +typedef enum { + NPY_SORT_NAN_FIRST = 0, + NPY_SORT_NAN_LAST = 1, +} NPY_SORT_NAN_POSITION; + +struct PyArrayMethod_SortContext_tag { + PyArray_Descr *descriptor; + PyArrayObject *array; /* NULL if using new-style context */ + PyArray_SortCompareFunc *compare; + int reversed; + NPY_SORT_NAN_POSITION nan_position; +}; + +typedef int (PyArray_SortFuncWithContext)(PyArrayMethod_SortContext *, + void *, npy_intp, NpyAuxData *); -typedef int (PyArray_ArgSortFuncWithContext)(PyArrayMethod_Context *, +typedef int (PyArray_ArgSortFuncWithContext)(PyArrayMethod_SortContext *, void *, npy_intp *, npy_intp, NpyAuxData *); diff --git a/numpy/_core/src/common/npy_sort.h.src b/numpy/_core/src/common/npy_sort.h.src index 52801726d1e6..d14f247c00ce 100644 --- a/numpy/_core/src/common/npy_sort.h.src +++ b/numpy/_core/src/common/npy_sort.h.src @@ -98,21 +98,21 @@ NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void * ***************************************************************************** */ -NPY_NO_EXPORT int npy_quicksort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_quicksort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_heapsort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_heapsort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_mergesort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_mergesort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_timsort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_timsort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_aquicksort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_aquicksort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_aheapsort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_aheapsort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_amergesort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_amergesort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); -NPY_NO_EXPORT int npy_atimsort_with_context(PyArrayMethod_Context *context, void *vec, +NPY_NO_EXPORT int npy_atimsort_with_context(PyArrayMethod_SortContext *context, void *vec, npy_intp *ind, npy_intp cnt, NpyAuxData *auxdata); NPY_NO_EXPORT int npy_quicksort(void *vec, npy_intp cnt, void *arr); @@ -124,6 +124,15 @@ NPY_NO_EXPORT int npy_aheapsort(void *vec, npy_intp *ind, npy_intp cnt, void *ar NPY_NO_EXPORT int npy_amergesort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); NPY_NO_EXPORT int npy_atimsort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +NPY_NO_EXPORT int npy_quicksort_impl(void *vec, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_heapsort_impl(void *vec, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_mergesort_impl(void *vec, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_timsort_impl(void *vec, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_aquicksort_impl(void *vec, npy_intp *ind, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_aheapsort_impl(void *vec, npy_intp *ind, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_amergesort_impl(void *vec, npy_intp *ind, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); +NPY_NO_EXPORT int npy_atimsort_impl(void *vec, npy_intp *ind, npy_intp cnt, void *arr, PyArrayMethod_SortContext *context); + #ifdef __cplusplus } #endif diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index c57e324f3c81..72fc8c2bd9b8 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -71,7 +71,7 @@ typedef struct { /* DType sorting methods. */ PyArrayDTypeMeta_GetSortFunction *get_sort_function; PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; - PyArray_CompareFuncWithDescr *compare; + PyArray_CompareFuncWithContext *compare; PyArray_SortCompareFunc *sort_compare; /* @@ -326,7 +326,7 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, return 0; } -static inline PyArray_CompareFuncWithDescr * +static inline PyArray_CompareFuncWithContext * PyArray_GetCompareFunction(PyArray_Descr *descr) { return NPY_DT_SLOTS(NPY_DTYPE(descr))->compare; diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index e2492025ba50..7697245f36c8 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1216,9 +1216,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, NPY_cast_info to_cast_info = {.func = NULL}; NPY_cast_info from_cast_info = {.func = NULL}; - PyArrayMethod_Context context = { - .descriptors = &descr - }; + PyArrayMethod_SortContext context = {}; NPY_BEGIN_THREADS_DEF; @@ -1299,6 +1297,11 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, if (part == NULL) { if (sort != NULL) { + context.compare = PyArray_GetSortCompareFunction(descr); + context.descriptor = descr; + context.reversed = 0; + context.nan_position = NPY_SORT_NAN_FIRST; + ret = sort(&context, bufptr, N, auxdata); } else { @@ -1399,9 +1402,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar NPY_ARRAYMETHOD_FLAGS transfer_flags; NPY_cast_info cast_info = {.func = NULL}; - PyArrayMethod_Context context = { - .descriptors = &descr - }; + PyArrayMethod_SortContext context = {}; NPY_BEGIN_THREADS_DEF; @@ -1499,6 +1500,11 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar if (argpart == NULL) { if (argsort != NULL) { + context.compare = PyArray_GetSortCompareFunction(descr); + context.descriptor = descr; + context.reversed = 0; + context.nan_position = NPY_SORT_NAN_FIRST; + ret = argsort(&context, valptr, idxptr, N, auxdata); } else { diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 5c52564de10f..5d540e8fd6fb 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -517,16 +517,26 @@ _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, return NpyString_cmp(&s_a, &s_b); } -static int -stringdtype_sort_compare(void *a, void *b, PyArray_Descr *descr) { - PyArray_StringDTypeObject *string_descr = (PyArray_StringDTypeObject *)descr; - return _compare(a, b, string_descr, string_descr); +static NPY_COMPARE_RESULT +stringdtype_sort_compare(void *a, void *b, PyArrayMethod_SortContext *context) { + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptor; + int dist = _compare(a, b, descr, descr); + + if (dist < 0) { + return NPY_LESS; + } + else if (dist > 0) { + return NPY_GREATER; + } + else { + return NPY_EQUAL; + } } int -_stringdtype_sort(PyArrayMethod_Context *context, void *start, npy_intp num, +_stringdtype_sort(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata, PyArray_SortFuncWithContext *sort) { - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptor; NpyString_acquire_allocator(descr); int result = sort(context, start, num, auxdata); @@ -536,21 +546,21 @@ _stringdtype_sort(PyArrayMethod_Context *context, void *start, npy_intp num, } int -_stringdtype_quicksort(PyArrayMethod_Context *context, void *start, npy_intp num, +_stringdtype_quicksort(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata) { return _stringdtype_sort(context, start, num, auxdata, &npy_quicksort_with_context); } int -_stringdtype_heapsort(PyArrayMethod_Context *context, void *start, npy_intp num, +_stringdtype_heapsort(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata) { return _stringdtype_sort(context, start, num, auxdata, &npy_heapsort_with_context); } int -_stringdtype_timsort(PyArrayMethod_Context *context, void *start, npy_intp num, +_stringdtype_timsort(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata) { return _stringdtype_sort(context, start, num, auxdata, &npy_timsort_with_context); @@ -578,9 +588,9 @@ stringdtype_get_sort_function(PyArray_Descr *descr, } int -_stringdtype_argsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +_stringdtype_argsort(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFuncWithContext *argsort) { - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptors[0]; + PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptor; NpyString_acquire_allocator(descr); int result = argsort(context, vv, tosort, num, auxdata); @@ -590,21 +600,21 @@ _stringdtype_argsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, } int -_stringdtype_aquicksort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +_stringdtype_aquicksort(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp n, NpyAuxData *auxdata) { return _stringdtype_argsort(context, vv, tosort, n, auxdata, &npy_aquicksort_with_context); } int -_stringdtype_aheapsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +_stringdtype_aheapsort(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp n, NpyAuxData *auxdata) { return _stringdtype_argsort(context, vv, tosort, n, auxdata, &npy_aheapsort_with_context); } int -_stringdtype_atimsort(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +_stringdtype_atimsort(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp n, NpyAuxData *auxdata) { return _stringdtype_argsort(context, vv, tosort, n, auxdata, &npy_atimsort_with_context); diff --git a/numpy/_core/src/npysort/heapsort.cpp b/numpy/_core/src/npysort/heapsort.cpp index 6ff40499aecc..75edd4ae3b0a 100644 --- a/numpy/_core/src/npysort/heapsort.cpp +++ b/numpy/_core/src/npysort/heapsort.cpp @@ -50,28 +50,38 @@ */ NPY_NO_EXPORT int -npy_heapsort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, +npy_heapsort_with_context(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata) { - return handle_npysort_with_context(context, start, num, auxdata, - &npy_heapsort); + return npy_heapsort_impl(start, num, NULL, context); } NPY_NO_EXPORT int -npy_aheapsort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +npy_aheapsort_with_context(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(context, vv, tosort, num, auxdata, - &npy_aheapsort); + return npy_aheapsort_impl(vv, tosort, num, NULL, context); } NPY_NO_EXPORT int npy_heapsort(void *start, npy_intp num, void *varr) +{ + return npy_heapsort_impl(start, num, varr, NULL); +} + +NPY_NO_EXPORT int +npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) +{ + return npy_aheapsort_impl(vv, tosort, n, varr, NULL); +} + +NPY_NO_EXPORT int +npy_heapsort_impl(void *start, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { void *arr; npy_intp elsize; PyArray_CompareFunc *cmp; - fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + fill_sort_data_from_arr_or_context(varr, context, &arr, &elsize, &cmp); if (elsize == 0) { return 0; /* no need for sorting elements of no size */ @@ -129,13 +139,13 @@ npy_heapsort(void *start, npy_intp num, void *varr) } NPY_NO_EXPORT int -npy_aheapsort(void *vv, npy_intp *tosort, npy_intp n, void *varr) +npy_aheapsort_impl(void *vv, npy_intp *tosort, npy_intp n, void *varr, PyArrayMethod_SortContext *context) { char *v = (char *)vv; void *arr; npy_intp elsize; PyArray_CompareFunc *cmp; - fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + fill_sort_data_from_arr_or_context(varr, context, &arr, &elsize, &cmp); npy_intp *a, i, j, l, tmp; diff --git a/numpy/_core/src/npysort/mergesort.cpp b/numpy/_core/src/npysort/mergesort.cpp index 1c40c16e3f87..bbda4abd586d 100644 --- a/numpy/_core/src/npysort/mergesort.cpp +++ b/numpy/_core/src/npysort/mergesort.cpp @@ -336,24 +336,34 @@ string_amergesort_(type *v, npy_intp *tosort, npy_intp num, void *varr) */ NPY_NO_EXPORT int -npy_mergesort_with_context(void *start, npy_intp num, - PyArrayMethod_Context *context, NpyAuxData *auxdata) +npy_mergesort_with_context(PyArrayMethod_SortContext *context, void *start, npy_intp num, + NpyAuxData *auxdata) { - return handle_npysort_with_context(context, start, num, auxdata, - &npy_mergesort); + return npy_mergesort_impl(start, num, NULL, context); } NPY_NO_EXPORT int -npy_amergesort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +npy_amergesort_with_context(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(context, vv, tosort, num, auxdata, - &npy_amergesort); + return npy_amergesort_impl(vv, tosort, num, NULL, context); +} + +NPY_NO_EXPORT int +npy_mergesort(void *start, npy_intp num, void *varr) +{ + return npy_mergesort_impl(start, num, varr, NULL); +} + +NPY_NO_EXPORT int +npy_amergesort(void *vv, npy_intp *tosort, npy_intp num, void *varr) +{ + return npy_amergesort_impl(vv, tosort, num, varr, NULL); } static void npy_mergesort0(char *pl, char *pr, char *pw, char *vp, npy_intp elsize, - PyArray_CompareFunc *cmp, PyArrayObject *arr) + PyArray_CompareFunc *cmp, void *arr) { char *pi, *pj, *pk, *pm; @@ -397,11 +407,12 @@ npy_mergesort0(char *pl, char *pr, char *pw, char *vp, npy_intp elsize, } NPY_NO_EXPORT int -npy_mergesort(void *start, npy_intp num, void *varr) +npy_mergesort_impl(void *start, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { - PyArrayObject *arr = (PyArrayObject *)varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp elsize; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_context(varr, context, &arr, &elsize, &cmp); char *pl = (char *)start; char *pr = pl + num * elsize; char *pw; @@ -429,7 +440,7 @@ npy_mergesort(void *start, npy_intp num, void *varr) static void npy_amergesort0(npy_intp *pl, npy_intp *pr, char *v, npy_intp *pw, - npy_intp elsize, PyArray_CompareFunc *cmp, PyArrayObject *arr) + npy_intp elsize, PyArray_CompareFunc *cmp, void *arr) { char *vp; npy_intp vi, *pi, *pj, *pk, *pm; @@ -473,11 +484,12 @@ npy_amergesort0(npy_intp *pl, npy_intp *pr, char *v, npy_intp *pw, } NPY_NO_EXPORT int -npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr) +npy_amergesort_impl(void *v, npy_intp *tosort, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { - PyArrayObject *arr = (PyArrayObject *)varr; - npy_intp elsize = PyArray_ITEMSIZE(arr); - PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; + void *arr; + npy_intp elsize; + PyArray_CompareFunc *cmp; + fill_sort_data_from_arr_or_context(varr, context, &arr, &elsize, &cmp); npy_intp *pl, *pr, *pw; /* Items that have zero size don't make sense to sort */ diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index cfaefa3c449d..9679591771dc 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -18,36 +18,68 @@ extern "C" { */ static inline int -handle_npysort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, - NpyAuxData *auxdata, PyArray_SortFunc *sort) +compare_from_context(const void *a, const void *b, void *context) { - PyArray_Descr *descr = context->descriptors[0]; - return sort(start, num, descr); -} + PyArrayMethod_SortContext *sort_context = (PyArrayMethod_SortContext *)context; + PyArray_SortCompareFunc *cmp = sort_context->compare; + int nan_position = sort_context->nan_position; -static inline int -handle_npyasort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, - npy_intp num, NpyAuxData *auxdata, PyArray_ArgSortFunc *asort) -{ - PyArray_Descr *descr = context->descriptors[0]; - return asort(vv, tosort, num, descr); + NPY_COMPARE_RESULT result = cmp(a, b, sort_context); + + if (result == NPY_LESS) { + return -1; + } + else if (result == NPY_GREATER) { + return 1; + } + else if (result == NPY_EQUAL) { + return 0; + } + else { + if (nan_position == NPY_SORT_NAN_FIRST) { + if (result == NPY_UNORDERED_LEFT) { + return -1; + } + else if (result == NPY_UNORDERED_RIGHT) { + return 1; + } + else if (result == NPY_UNORDERED_BOTH) { + return 0; + } + } + else if (nan_position == NPY_SORT_NAN_LAST) { + if (result == NPY_UNORDERED_LEFT) { + return 1; + } + else if (result == NPY_UNORDERED_RIGHT) { + return -1; + } + else if (result == NPY_UNORDERED_BOTH) { + return 0; + } + } + } + + /* This should never happen, but just in case */ + PyErr_SetString(PyExc_RuntimeError, "Unexpected comparison result in sort function"); + return -1; /* Indicate an error */ } static inline void -fill_sort_data_from_arr_or_descr(void *arr_or_descr, void **out_arr_or_descr, - npy_intp *elsize, PyArray_CompareFunc **out_cmp) -{ - if (PyArray_Check(arr_or_descr)) { - PyArrayObject *arr = (PyArrayObject *)arr_or_descr; - *out_arr_or_descr = arr; - *elsize = PyArray_ITEMSIZE(arr); - *out_cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; +fill_sort_data_from_arr_or_context(void *array, PyArrayMethod_SortContext *context, + void **out_arr_or_context, npy_intp *elsize, + PyArray_CompareFunc **out_cmp) +{ + if (context != NULL) { + *out_arr_or_context = (void *)context; + *elsize = PyDataType_ELSIZE(context->descriptor); + *out_cmp = &compare_from_context; } else { - PyArray_Descr *descr = (PyArray_Descr *)arr_or_descr; - *out_arr_or_descr = descr; - *elsize = PyDataType_ELSIZE(descr); - *out_cmp = (PyArray_CompareFunc *)PyArray_GetSortCompareFunction(descr); + PyArrayObject *arr = (PyArrayObject *)array; + *out_arr_or_context = (void *)arr; + *elsize = PyArray_ITEMSIZE(arr); + *out_cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; } } diff --git a/numpy/_core/src/npysort/quicksort.cpp b/numpy/_core/src/npysort/quicksort.cpp index d6354ec6bd7d..259b36583189 100644 --- a/numpy/_core/src/npysort/quicksort.cpp +++ b/numpy/_core/src/npysort/quicksort.cpp @@ -506,28 +506,38 @@ string_aquicksort_(type *vv, npy_intp *tosort, npy_intp num, void *varr) */ NPY_NO_EXPORT int -npy_quicksort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, +npy_quicksort_with_context(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata) { - return handle_npysort_with_context(context, start, num, auxdata, - &npy_quicksort); + return npy_quicksort_impl(start, num, NULL, context); } NPY_NO_EXPORT int -npy_aquicksort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +npy_aquicksort_with_context(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(context, vv, tosort, num, auxdata, - &npy_aquicksort); + return npy_aquicksort_impl(vv, tosort, num, NULL, context); } NPY_NO_EXPORT int npy_quicksort(void *start, npy_intp num, void *varr) +{ + return npy_quicksort_impl(start, num, varr, NULL); +} + +NPY_NO_EXPORT int +npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) +{ + return npy_aquicksort_impl(vv, tosort, num, varr, NULL); +} + +NPY_NO_EXPORT int +npy_quicksort_impl(void *start, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { void *arr; npy_intp elsize; PyArray_CompareFunc *cmp; - fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + fill_sort_data_from_arr_or_context(varr, context, &arr, &elsize, &cmp); char *vp; char *pl = (char *)start; @@ -629,13 +639,13 @@ npy_quicksort(void *start, npy_intp num, void *varr) } NPY_NO_EXPORT int -npy_aquicksort(void *vv, npy_intp *tosort, npy_intp num, void *varr) +npy_aquicksort_impl(void *vv, npy_intp *tosort, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { char *v = (char *)vv; void *arr; npy_intp elsize; PyArray_CompareFunc *cmp; - fill_sort_data_from_arr_or_descr(varr, &arr, &elsize, &cmp); + fill_sort_data_from_arr_or_context(varr, context, &arr, &elsize, &cmp); char *vp; npy_intp *pl = tosort; diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 47b1ba94e7c9..4b1da96e2146 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -1852,19 +1852,29 @@ string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr) */ NPY_NO_EXPORT int -npy_timsort_with_context(PyArrayMethod_Context *context, void *start, npy_intp num, +npy_timsort_with_context(PyArrayMethod_SortContext *context, void *start, npy_intp num, NpyAuxData *auxdata) { - return handle_npysort_with_context(context, start, num, auxdata, - &npy_timsort); + return npy_timsort_impl(start, num, NULL, context); } NPY_NO_EXPORT int -npy_atimsort_with_context(PyArrayMethod_Context *context, void *vv, npy_intp *tosort, +npy_atimsort_with_context(PyArrayMethod_SortContext *context, void *vv, npy_intp *tosort, npy_intp num, NpyAuxData *auxdata) { - return handle_npyasort_with_context(context, vv, tosort, num, auxdata, - &npy_atimsort); + return npy_atimsort_impl(vv, tosort, num, NULL, context); +} + +NPY_NO_EXPORT int +npy_timsort(void *start, npy_intp num, void *varr) +{ + return npy_timsort_impl(start, num, varr, NULL); +} + +NPY_NO_EXPORT int +npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) +{ + return npy_atimsort_impl(start, tosort, num, varr, NULL); } typedef struct { @@ -2262,12 +2272,12 @@ npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr, } NPY_NO_EXPORT int -npy_timsort(void *start, npy_intp num, void *varr) +npy_timsort_impl(void *start, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { void *arr; npy_intp len; PyArray_CompareFunc *cmp; - fill_sort_data_from_arr_or_descr(varr, &arr, &len, &cmp); + fill_sort_data_from_arr_or_context(varr, context, &arr, &len, &cmp); int ret; npy_intp l, n, stack_ptr, minrun; @@ -2700,12 +2710,12 @@ npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack, } NPY_NO_EXPORT int -npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) +npy_atimsort_impl(void *start, npy_intp *tosort, npy_intp num, void *varr, PyArrayMethod_SortContext *context) { void *arr; npy_intp len; PyArray_CompareFunc *cmp; - fill_sort_data_from_arr_or_descr(varr, &arr, &len, &cmp); + fill_sort_data_from_arr_or_context(varr, context, &arr, &len, &cmp); int ret; npy_intp l, n, stack_ptr, minrun; From cd38b152ffa92b01fb192211d463c709f1f6ada0 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 31 May 2025 22:23:31 -0400 Subject: [PATCH 32/54] MAINT: Remove array from new sort context --- numpy/_core/include/numpy/dtype_api.h | 1 - 1 file changed, 1 deletion(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index b41b5b8afe48..a22bca0d57a4 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -504,7 +504,6 @@ typedef enum { struct PyArrayMethod_SortContext_tag { PyArray_Descr *descriptor; - PyArrayObject *array; /* NULL if using new-style context */ PyArray_SortCompareFunc *compare; int reversed; NPY_SORT_NAN_POSITION nan_position; From dcc465ae92da6988c3469522b317247e6d639747 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 31 May 2025 22:28:34 -0400 Subject: [PATCH 33/54] DOC: Add `PyArrayMethod_SortContext` to docs --- .../reference/c-api/types-and-structures.rst | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 5561210657c8..325477f0e90b 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -792,6 +792,59 @@ PyArrayMethod_Context and PyArrayMethod_Spec An array of slots for the method. Slot IDs must be one of the values below. +.. _arraymethod-sort-context: + +PyArrayMethod_SortContext +------------------------- + +.. c:enum:: NPY_SORT_NAN_POSITION + + An enum used to indicate the position of NaN values in sorting. + + .. code-block:: c + + typedef enum { + NPY_SORT_NAN_FIRST, + NPY_SORT_NAN_LAST, + } NPY_SORT_NAN_POSITION + + .. c:member:: NPY_SORT_NAN_FIRST + + Indicates that NaN values should be sorted first. + + .. c:member:: NPY_SORT_NAN_LAST + + Indicates that NaN values should be sorted last. + +.. c:type:: PyArrayMethod_SortContext + + A struct used to provide context for sorting methods. + + .. code-block:: c + + typedef struct { + PyArray_Descr *descriptor; + PyArray_SortCompareFunc *compare; + int reversed; + NPY_SORT_NAN_POSITION nan_position; + } PyArrayMethod_SortContext + + .. c:member:: PyArray_Descr *descriptor + + The descriptor for the data type being sorted. + + .. c:member:: PyArray_SortCompareFunc *compare + + A pointer to the comparison function used for sorting. + + .. c:member:: int reversed + + A flag indicating whether the sort is reversed. + + .. c:member:: NPY_SORT_NAN_POSITION nan_position + + The position of NaN values in the sort order. + .. _dtypemeta: PyArray_DTypeMeta and PyArrayDTypeMeta_Spec From 30b6c9ce83481d818cbbfe666c4d5682a4679941 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sun, 1 Jun 2025 00:00:44 -0400 Subject: [PATCH 34/54] DOC: Move sort nan position enum to array.rst --- doc/source/reference/c-api/array.rst | 11 +++++++++++ .../reference/c-api/types-and-structures.rst | 19 ------------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index ffdf4474a11f..ce9cb4257869 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -4384,6 +4384,17 @@ Enumerated Types :c:data:`NPY_STABLESORT` are aliased to each other and may refer to one of several stable sorting algorithms depending on the data type. +.. c:enum:: NPY_SORT_NAN_POSITION + + An enum used to indicate the position of NaN values in sorting. + + .. c:enumerator:: NPY_SORT_NAN_FIRST + + Indicates that NaN values should be sorted first. + + .. c:enumerator:: NPY_SORT_NAN_LAST + + Indicates that NaN values should be sorted last. .. c:enum:: NPY_SCALARKIND diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 325477f0e90b..9d8672d4f79c 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -797,25 +797,6 @@ PyArrayMethod_Context and PyArrayMethod_Spec PyArrayMethod_SortContext ------------------------- -.. c:enum:: NPY_SORT_NAN_POSITION - - An enum used to indicate the position of NaN values in sorting. - - .. code-block:: c - - typedef enum { - NPY_SORT_NAN_FIRST, - NPY_SORT_NAN_LAST, - } NPY_SORT_NAN_POSITION - - .. c:member:: NPY_SORT_NAN_FIRST - - Indicates that NaN values should be sorted first. - - .. c:member:: NPY_SORT_NAN_LAST - - Indicates that NaN values should be sorted last. - .. c:type:: PyArrayMethod_SortContext A struct used to provide context for sorting methods. From 60d7f80f5744662402a0bdc21d574ed5f13c81c5 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sun, 1 Jun 2025 00:03:03 -0400 Subject: [PATCH 35/54] DOC: Add accidentally removed documentation for finalize_descr slot --- doc/source/reference/c-api/array.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index ce9cb4257869..c772a1071c2b 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3535,6 +3535,14 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_finalize_descr +.. c:type:: PyArray_Descr *(PyArrayDTypeMeta_FinalizeDescriptor)( \ + PyArray_Descr *dtype) + + If defined, a function that is called to "finalize" a descriptor + instance after an array is created. One use of this function is to + force newly created arrays to have a newly created descriptor + instance, no matter what input descriptor is provided by a user. + .. c:macro:: NPY_DT_get_sort_function .. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ From 38e29e0797115810eadf0281a785302f3ee0688e Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sun, 1 Jun 2025 14:40:52 -0400 Subject: [PATCH 36/54] BUG: Fix context initiation in _new_(arg)sortlike --- numpy/_core/src/multiarray/item_selection.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 7697245f36c8..a7f182e30a9f 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1216,7 +1216,11 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, NPY_cast_info to_cast_info = {.func = NULL}; NPY_cast_info from_cast_info = {.func = NULL}; - PyArrayMethod_SortContext context = {}; + PyArrayMethod_SortContext context = { + .descriptor = descr, + .reversed = 0, + .nan_position = NPY_SORT_NAN_FIRST + }; NPY_BEGIN_THREADS_DEF; @@ -1298,9 +1302,6 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, if (part == NULL) { if (sort != NULL) { context.compare = PyArray_GetSortCompareFunction(descr); - context.descriptor = descr; - context.reversed = 0; - context.nan_position = NPY_SORT_NAN_FIRST; ret = sort(&context, bufptr, N, auxdata); } @@ -1402,7 +1403,11 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar NPY_ARRAYMETHOD_FLAGS transfer_flags; NPY_cast_info cast_info = {.func = NULL}; - PyArrayMethod_SortContext context = {}; + PyArrayMethod_SortContext context = { + .descriptor = descr, + .reversed = 0, + .nan_position = NPY_SORT_NAN_FIRST, + }; NPY_BEGIN_THREADS_DEF; @@ -1501,9 +1506,6 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar if (argpart == NULL) { if (argsort != NULL) { context.compare = PyArray_GetSortCompareFunction(descr); - context.descriptor = descr; - context.reversed = 0; - context.nan_position = NPY_SORT_NAN_FIRST; ret = argsort(&context, valptr, idxptr, N, auxdata); } From bbbd27477e4cdfc1fa1e35ebc7fad1c30556f5c2 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sun, 1 Jun 2025 14:42:56 -0400 Subject: [PATCH 37/54] ENH: Add descending flag to SortContext --- numpy/_core/include/numpy/dtype_api.h | 1 + numpy/_core/src/multiarray/item_selection.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index a22bca0d57a4..d2deef60c3f9 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -505,6 +505,7 @@ typedef enum { struct PyArrayMethod_SortContext_tag { PyArray_Descr *descriptor; PyArray_SortCompareFunc *compare; + int descending; int reversed; NPY_SORT_NAN_POSITION nan_position; }; diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index a7f182e30a9f..589a426833c5 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1219,6 +1219,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, PyArrayMethod_SortContext context = { .descriptor = descr, .reversed = 0, + .descending = 0, .nan_position = NPY_SORT_NAN_FIRST }; @@ -1406,6 +1407,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar PyArrayMethod_SortContext context = { .descriptor = descr, .reversed = 0, + .descending = 0, .nan_position = NPY_SORT_NAN_FIRST, }; From 178445daa1abb1ac85cf1a524b7cb0d1b3ce957d Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 1 Jul 2025 09:46:40 -0400 Subject: [PATCH 38/54] MAINT: Pass descr in sort compare func and remove compare slot --- numpy/_core/include/numpy/dtype_api.h | 7 ++----- numpy/_core/src/multiarray/dtypemeta.c | 10 ++-------- numpy/_core/src/multiarray/dtypemeta.h | 9 +-------- numpy/_core/src/multiarray/stringdtype/dtype.c | 8 +++++--- numpy/_core/src/npysort/npysort_common.h | 2 +- 5 files changed, 11 insertions(+), 25 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index d2deef60c3f9..3afcf591b8df 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -369,8 +369,7 @@ typedef int (PyArrayMethod_PromoterFunction)(PyObject *ufunc, #define NPY_DT_finalize_descr 11 #define NPY_DT_get_sort_function 12 #define NPY_DT_get_argsort_function 13 -#define NPY_DT_compare 14 -#define NPY_DT_sort_compare 15 +#define NPY_DT_sort_compare 14 // These PyArray_ArrFunc slots will be deprecated and replaced eventually // getitem and setitem can be defined as a performance optimization; @@ -492,10 +491,8 @@ typedef enum { typedef struct PyArrayMethod_SortContext_tag PyArrayMethod_SortContext; -typedef NPY_COMPARE_RESULT (PyArray_CompareFuncWithContext)( - const void *a, const void *b, PyArrayMethod_SortContext *context); typedef NPY_COMPARE_RESULT (PyArray_SortCompareFunc)( - const void *a, const void *b, PyArrayMethod_SortContext *context); + const void *a, const void *b, PyArray_Descr *descr); typedef enum { NPY_SORT_NAN_FIRST = 0, diff --git a/numpy/_core/src/multiarray/dtypemeta.c b/numpy/_core/src/multiarray/dtypemeta.c index 17bcde03fa68..0f01ad8c8222 100644 --- a/numpy/_core/src/multiarray/dtypemeta.c +++ b/numpy/_core/src/multiarray/dtypemeta.c @@ -192,12 +192,11 @@ dtypemeta_initialize_struct_from_spec( NPY_DT_SLOTS(DType)->common_instance = NULL; NPY_DT_SLOTS(DType)->setitem = NULL; NPY_DT_SLOTS(DType)->getitem = NULL; + NPY_DT_SLOTS(DType)->get_clear_loop = NULL; + NPY_DT_SLOTS(DType)->get_fill_zero_loop = NULL; NPY_DT_SLOTS(DType)->get_sort_function = NULL; NPY_DT_SLOTS(DType)->get_argsort_function = NULL; - NPY_DT_SLOTS(DType)->compare = NULL; NPY_DT_SLOTS(DType)->sort_compare = NULL; - NPY_DT_SLOTS(DType)->get_clear_loop = NULL; - NPY_DT_SLOTS(DType)->get_fill_zero_loop = NULL; NPY_DT_SLOTS(DType)->finalize_descr = NULL; NPY_DT_SLOTS(DType)->f = default_funcs; @@ -1243,11 +1242,6 @@ dtypemeta_wrap_legacy_descriptor( } } - /* Auto-fill compare slot with sort-compare as default */ - if (dt_slots->compare == NULL && dt_slots->sort_compare != NULL) { - dt_slots->compare = dt_slots->sort_compare; - } - if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj, PyTypeNum_ISUSERDEF(dtype_class->type_num)) < 0) { Py_DECREF(dtype_class); diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 72fc8c2bd9b8..20d6292cb288 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -71,7 +71,6 @@ typedef struct { /* DType sorting methods. */ PyArrayDTypeMeta_GetSortFunction *get_sort_function; PyArrayDTypeMeta_GetArgSortFunction *get_argsort_function; - PyArray_CompareFuncWithContext *compare; PyArray_SortCompareFunc *sort_compare; /* @@ -96,7 +95,7 @@ typedef struct { // This must be updated if new slots before within_dtype_castingimpl // are added -#define NPY_NUM_DTYPE_SLOTS 15 +#define NPY_NUM_DTYPE_SLOTS 14 #define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22 #define NPY_DT_MAX_ARRFUNCS_SLOT \ NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET @@ -326,12 +325,6 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, return 0; } -static inline PyArray_CompareFuncWithContext * -PyArray_GetCompareFunction(PyArray_Descr *descr) -{ - return NPY_DT_SLOTS(NPY_DTYPE(descr))->compare; -} - static inline PyArray_SortCompareFunc * PyArray_GetSortCompareFunction(PyArray_Descr *descr) { diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 5d540e8fd6fb..672d030978ac 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -518,9 +518,9 @@ _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, } static NPY_COMPARE_RESULT -stringdtype_sort_compare(void *a, void *b, PyArrayMethod_SortContext *context) { - PyArray_StringDTypeObject *descr = (PyArray_StringDTypeObject *)context->descriptor; - int dist = _compare(a, b, descr, descr); +stringdtype_sort_compare(void *a, void *b, PyArray_Descr *descr) { + PyArray_StringDTypeObject *string_descr = (PyArray_StringDTypeObject *)descr; + int dist = _compare(a, b, string_descr, string_descr); if (dist < 0) { return NPY_LESS; @@ -783,6 +783,8 @@ static PyType_Slot PyArray_StringDType_Slots[] = { {NPY_DT_setitem, &stringdtype_setitem}, {NPY_DT_getitem, &stringdtype_getitem}, {NPY_DT_sort_compare, &stringdtype_sort_compare}, + {NPY_DT_get_sort_function, &stringdtype_get_sort_function}, + {NPY_DT_get_argsort_function, &stringdtype_get_argsort_function}, {NPY_DT_ensure_canonical, &stringdtype_ensure_canonical}, {NPY_DT_PyArray_ArrFuncs_nonzero, &nonzero}, {NPY_DT_PyArray_ArrFuncs_compare, &compare}, diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 9679591771dc..84045b25add1 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -24,7 +24,7 @@ compare_from_context(const void *a, const void *b, void *context) PyArray_SortCompareFunc *cmp = sort_context->compare; int nan_position = sort_context->nan_position; - NPY_COMPARE_RESULT result = cmp(a, b, sort_context); + NPY_COMPARE_RESULT result = cmp(a, b, sort_context->descriptor); if (result == NPY_LESS) { return -1; From 29cd764b5663f98eaf73c7fab5ca68c3bda61670 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 1 Jul 2025 09:48:38 -0400 Subject: [PATCH 39/54] MAINT: Remove name for only one parameter in typedef --- numpy/_core/include/numpy/dtype_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 3afcf591b8df..7f2e0c8da1a3 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -516,9 +516,9 @@ typedef int (PyArray_ArgSortFuncWithContext)(PyArrayMethod_SortContext *, typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, npy_intp, int, PyArray_SortFuncWithContext **, NpyAuxData **, - NPY_ARRAYMETHOD_FLAGS *out_flags); + NPY_ARRAYMETHOD_FLAGS *); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, npy_intp, int, PyArray_ArgSortFuncWithContext **, NpyAuxData **, - NPY_ARRAYMETHOD_FLAGS *out_flags); + NPY_ARRAYMETHOD_FLAGS *); #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ From 852783f9a45ba34742621245fa0a42fbfdbfc569 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 1 Jul 2025 09:59:48 -0400 Subject: [PATCH 40/54] ENH: Add NumPy 2.4 API version and use for new sorting slots --- numpy/_core/include/numpy/dtype_api.h | 3 +++ numpy/_core/include/numpy/numpyconfig.h | 1 + numpy/_core/src/multiarray/dtypemeta.h | 4 ++++ numpy/_core/src/multiarray/stringdtype/dtype.c | 4 ++++ 4 files changed, 12 insertions(+) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 7f2e0c8da1a3..234dc07b2715 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -367,9 +367,12 @@ typedef int (PyArrayMethod_PromoterFunction)(PyObject *ufunc, #define NPY_DT_get_clear_loop 9 #define NPY_DT_get_fill_zero_loop 10 #define NPY_DT_finalize_descr 11 + +#if NPY_API_VERSION >= NPY_2_4_API_VERSION #define NPY_DT_get_sort_function 12 #define NPY_DT_get_argsort_function 13 #define NPY_DT_sort_compare 14 +#endif // These PyArray_ArrFunc slots will be deprecated and replaced eventually // getitem and setitem can be defined as a performance optimization; diff --git a/numpy/_core/include/numpy/numpyconfig.h b/numpy/_core/include/numpy/numpyconfig.h index 52d7e2b5d7d7..c110baf9d379 100644 --- a/numpy/_core/include/numpy/numpyconfig.h +++ b/numpy/_core/include/numpy/numpyconfig.h @@ -84,6 +84,7 @@ #define NPY_2_1_API_VERSION 0x00000013 #define NPY_2_2_API_VERSION 0x00000013 #define NPY_2_3_API_VERSION 0x00000014 +#define NPY_2_4_API_VERSION 0x00000015 /* diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index 20d6292cb288..c43b86168f17 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -95,7 +95,11 @@ typedef struct { // This must be updated if new slots before within_dtype_castingimpl // are added +#if NPY_API_VERSION >= NPY_2_4_API_VERSION #define NPY_NUM_DTYPE_SLOTS 14 +#else +#define NPY_NUM_DTYPE_SLOTS 11 +#endif #define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22 #define NPY_DT_MAX_ARRFUNCS_SLOT \ NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 672d030978ac..630cdbbef574 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -517,6 +517,7 @@ _compare(void *a, void *b, PyArray_StringDTypeObject *descr_a, return NpyString_cmp(&s_a, &s_b); } +#if NPY_API_VERSION >= NPY_2_4_API_VERSION static NPY_COMPARE_RESULT stringdtype_sort_compare(void *a, void *b, PyArray_Descr *descr) { PyArray_StringDTypeObject *string_descr = (PyArray_StringDTypeObject *)descr; @@ -641,6 +642,7 @@ stringdtype_get_argsort_function(PyArray_Descr *descr, return 0; } +#endif // NPY_API_VERSION >= NPY_2_4_API_VERSION // PyArray_ArgFunc // The max element is the one with the highest unicode code point. @@ -782,9 +784,11 @@ static PyType_Slot PyArray_StringDType_Slots[] = { &string_discover_descriptor_from_pyobject}, {NPY_DT_setitem, &stringdtype_setitem}, {NPY_DT_getitem, &stringdtype_getitem}, +#if NPY_API_VERSION >= NPY_2_4_API_VERSION {NPY_DT_sort_compare, &stringdtype_sort_compare}, {NPY_DT_get_sort_function, &stringdtype_get_sort_function}, {NPY_DT_get_argsort_function, &stringdtype_get_argsort_function}, +#endif {NPY_DT_ensure_canonical, &stringdtype_ensure_canonical}, {NPY_DT_PyArray_ArrFuncs_nonzero, &nonzero}, {NPY_DT_PyArray_ArrFuncs_compare, &compare}, From 5aaa15fa17e3e0bcfa8468af4aa0a45c4e0e7f5e Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Tue, 1 Jul 2025 10:05:46 -0400 Subject: [PATCH 41/54] ENH: Add error result to `NPY_COMPARE_RESULT` enum --- numpy/_core/include/numpy/dtype_api.h | 1 + 1 file changed, 1 insertion(+) diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index 234dc07b2715..b2de3d495954 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -490,6 +490,7 @@ typedef enum { NPY_UNORDERED_LEFT = 2, NPY_UNORDERED_RIGHT = 3, NPY_UNORDERED_BOTH = 4, + NPY_COMPARE_ERROR = 5, } NPY_COMPARE_RESULT; typedef struct PyArrayMethod_SortContext_tag PyArrayMethod_SortContext; From 28e8c3a77e149225487f89e64dde48989d1b8b83 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:00:30 -0400 Subject: [PATCH 42/54] DOC: Fix incorrectly changed arrfunc docs for sort functions --- doc/source/reference/c-api/array.rst | 4 ++-- doc/source/reference/c-api/types-and-structures.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index c772a1071c2b..fbb700ed156c 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3636,7 +3636,7 @@ DType API slots but for now we have exposed the legacy .. c:macro:: NPY_DT_PyArray_ArrFuncs_sort - An array of PyArray_SortFuncWithContext of length ``NPY_NSORTS``. If set, allows + An array of PyArray_SortFunc of length ``NPY_NSORTS``. If set, allows defining custom sorting implementations for each of the sorting algorithms numpy implements. If `NPY_DT_get_sort_function` is defined, it will be used instead. This slot may be deprecated in the @@ -3644,7 +3644,7 @@ DType API slots but for now we have exposed the legacy .. c:macro:: NPY_DT_PyArray_ArrFuncs_argsort - An array of PyArray_ArgSortFuncWithContext of length ``NPY_NSORTS``. If set, + An array of PyArray_ArgSortFunc of length ``NPY_NSORTS``. If set, allows defining custom argsorting implementations for each of the sorting algorithms numpy implements. If `NPY_DT_get_argsort_function` is defined, it will be used instead. This slot may be deprecated in diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 9d8672d4f79c..f1985cda80be 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -494,8 +494,8 @@ PyArray_ArrFuncs PyArray_NonzeroFunc *nonzero; PyArray_FillFunc *fill; PyArray_FillWithScalarFunc *fillwithscalar; - PyArray_SortFuncWithContext *sort[NPY_NSORTS]; - PyArray_ArgSortFuncWithContext *argsort[NPY_NSORTS]; + PyArray_SortFunc *sort[NPY_NSORTS]; + PyArray_ArgSortFunc *argsort[NPY_NSORTS]; PyObject *castdict; PyArray_ScalarKindFunc *scalarkind; int **cancastscalarkindto; From 6e65f8db9cd117d1bd62df6acf54606f48b6f857 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:07:38 -0400 Subject: [PATCH 43/54] MAINT: Clearer names for NPY_SORT_NAN_POSITION members --- doc/source/reference/c-api/array.rst | 8 ++++---- numpy/_core/include/numpy/dtype_api.h | 4 ++-- numpy/_core/src/multiarray/item_selection.c | 4 ++-- numpy/_core/src/npysort/npysort_common.h | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index fbb700ed156c..b90350d0daa4 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -4396,13 +4396,13 @@ Enumerated Types An enum used to indicate the position of NaN values in sorting. - .. c:enumerator:: NPY_SORT_NAN_FIRST + .. c:enumerator:: NPY_SORT_NAN_TO_START - Indicates that NaN values should be sorted first. + Indicates that NaN values should be sorted to the start. - .. c:enumerator:: NPY_SORT_NAN_LAST + .. c:enumerator:: NPY_SORT_NAN_TO_END - Indicates that NaN values should be sorted last. + Indicates that NaN values should be sorted to the end. .. c:enum:: NPY_SCALARKIND diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index b2de3d495954..f360b02285a5 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -499,8 +499,8 @@ typedef NPY_COMPARE_RESULT (PyArray_SortCompareFunc)( const void *a, const void *b, PyArray_Descr *descr); typedef enum { - NPY_SORT_NAN_FIRST = 0, - NPY_SORT_NAN_LAST = 1, + NPY_SORT_NAN_TO_START = 0, + NPY_SORT_NAN_TO_END = 1, } NPY_SORT_NAN_POSITION; struct PyArrayMethod_SortContext_tag { diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 589a426833c5..48df80cfc9df 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1220,7 +1220,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, .descriptor = descr, .reversed = 0, .descending = 0, - .nan_position = NPY_SORT_NAN_FIRST + .nan_position = NPY_SORT_NAN_TO_END, }; NPY_BEGIN_THREADS_DEF; @@ -1408,7 +1408,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar .descriptor = descr, .reversed = 0, .descending = 0, - .nan_position = NPY_SORT_NAN_FIRST, + .nan_position = NPY_SORT_NAN_TO_END, }; NPY_BEGIN_THREADS_DEF; diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 84045b25add1..1b86cc2a4b3c 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -36,7 +36,7 @@ compare_from_context(const void *a, const void *b, void *context) return 0; } else { - if (nan_position == NPY_SORT_NAN_FIRST) { + if (nan_position == NPY_SORT_NAN_TO_END) { if (result == NPY_UNORDERED_LEFT) { return -1; } @@ -47,7 +47,7 @@ compare_from_context(const void *a, const void *b, void *context) return 0; } } - else if (nan_position == NPY_SORT_NAN_LAST) { + else if (nan_position == NPY_SORT_NAN_TO_START) { if (result == NPY_UNORDERED_LEFT) { return 1; } From 73bb5f889188d5219fcca784ad63ffedeb7f1362 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:07:48 -0400 Subject: [PATCH 44/54] DOC: Remove unimplemented release note --- doc/release/upcoming_changes/28516.c_api.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 doc/release/upcoming_changes/28516.c_api.rst diff --git a/doc/release/upcoming_changes/28516.c_api.rst b/doc/release/upcoming_changes/28516.c_api.rst deleted file mode 100644 index 95f677e4897c..000000000000 --- a/doc/release/upcoming_changes/28516.c_api.rst +++ /dev/null @@ -1 +0,0 @@ -* `PyArray_GetSortFunction`, `PyArray_GetArgSortFunction`, and `PyArray_GetSortCompareFunction` have been added to the C-API. These functions return the sorting, argsorting, and sort comparison functions if provided for a given dtype in new slots. \ No newline at end of file From 5eca653797b19da4ec93a2357ee40271ef1eb362 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:13:01 -0400 Subject: [PATCH 45/54] DOC: Document new-style sorts are always descending and nan to end --- doc/source/reference/c-api/array.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index b90350d0daa4..cb779bfcba31 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3550,7 +3550,9 @@ member of ``PyArrayDTypeMeta_Spec`` struct. NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) If defined, sets a custom sorting function for the DType for each of - the sort kinds numpy implements. Returns 0 on success. + the sort kinds numpy implements. Currently, sorts are always descending + and always use nulls to the end, but this must be checked in the + implementation. Returns 0 on success. .. c:macro:: NPY_DT_get_argsort_function @@ -3559,7 +3561,9 @@ member of ``PyArrayDTypeMeta_Spec`` struct. NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) If defined, sets a custom argsorting function for the DType for each of - the sort kinds numpy implements. Returns 0 on success. + the sort kinds numpy implements. Currently, sorts are always descending + and always use nulls to the end, but this must be checked in the + implementation. Returns 0 on success. .. c:macro:: NPY_DT_sort_compare From 9d3ae70a61ae7c88abe5eed8d2abd9d69aabf128 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:22:29 -0400 Subject: [PATCH 46/54] MAINT: Remove descending from get_sort_function and rename in sort context --- doc/source/reference/c-api/array.rst | 8 ++++---- doc/source/reference/c-api/types-and-structures.rst | 6 +++--- numpy/_core/include/numpy/dtype_api.h | 7 +++---- numpy/_core/src/multiarray/dtypemeta.h | 8 ++++---- numpy/_core/src/multiarray/item_selection.c | 10 ++++------ numpy/_core/src/multiarray/stringdtype/dtype.c | 4 ++-- 6 files changed, 20 insertions(+), 23 deletions(-) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index cb779bfcba31..05412ae18270 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3546,23 +3546,23 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_get_sort_function .. c:type:: int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_SortFuncWithContext **out_sort, \ + npy_intp sort_kind, PyArray_SortFuncWithContext **out_sort, \ NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) If defined, sets a custom sorting function for the DType for each of the sort kinds numpy implements. Currently, sorts are always descending - and always use nulls to the end, but this must be checked in the + and always use nulls to the end, and this must be checked in the implementation. Returns 0 on success. .. c:macro:: NPY_DT_get_argsort_function .. c:type:: int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, \ - npy_intp sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort, \ + npy_intp sort_kind, PyArray_ArgSortFuncWithContext **out_argsort, \ NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) If defined, sets a custom argsorting function for the DType for each of the sort kinds numpy implements. Currently, sorts are always descending - and always use nulls to the end, but this must be checked in the + and always use nulls to the end, and this must be checked in the implementation. Returns 0 on success. .. c:macro:: NPY_DT_sort_compare diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index f1985cda80be..6aa9bb04884b 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -806,7 +806,7 @@ PyArrayMethod_SortContext typedef struct { PyArray_Descr *descriptor; PyArray_SortCompareFunc *compare; - int reversed; + npy_bool descending; NPY_SORT_NAN_POSITION nan_position; } PyArrayMethod_SortContext @@ -818,9 +818,9 @@ PyArrayMethod_SortContext A pointer to the comparison function used for sorting. - .. c:member:: int reversed + .. c:member:: npy_bool descending - A flag indicating whether the sort is reversed. + A flag indicating whether the sort is descending. .. c:member:: NPY_SORT_NAN_POSITION nan_position diff --git a/numpy/_core/include/numpy/dtype_api.h b/numpy/_core/include/numpy/dtype_api.h index f360b02285a5..d9ecc7321289 100644 --- a/numpy/_core/include/numpy/dtype_api.h +++ b/numpy/_core/include/numpy/dtype_api.h @@ -506,8 +506,7 @@ typedef enum { struct PyArrayMethod_SortContext_tag { PyArray_Descr *descriptor; PyArray_SortCompareFunc *compare; - int descending; - int reversed; + npy_bool descending; NPY_SORT_NAN_POSITION nan_position; }; @@ -519,10 +518,10 @@ typedef int (PyArray_ArgSortFuncWithContext)(PyArrayMethod_SortContext *, NpyAuxData *); typedef int *(PyArrayDTypeMeta_GetSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_SortFuncWithContext **, NpyAuxData **, + npy_intp, PyArray_SortFuncWithContext **, NpyAuxData **, NPY_ARRAYMETHOD_FLAGS *); typedef int *(PyArrayDTypeMeta_GetArgSortFunction)(PyArray_Descr *, - npy_intp, int, PyArray_ArgSortFuncWithContext **, NpyAuxData **, + npy_intp, PyArray_ArgSortFuncWithContext **, NpyAuxData **, NPY_ARRAYMETHOD_FLAGS *); #endif /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */ diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index c43b86168f17..aded16d8e4a3 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -303,7 +303,7 @@ PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) static inline int PyArray_GetSortFunction(PyArray_Descr *descr, - NPY_SORTKIND which, int descending, PyArray_SortFuncWithContext **out_sort, + NPY_SORTKIND which, PyArray_SortFuncWithContext **out_sort, NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function == NULL) { @@ -311,13 +311,13 @@ PyArray_GetSortFunction(PyArray_Descr *descr, } NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function( - descr, which, descending, out_sort, out_auxdata, out_flags); + descr, which, out_sort, out_auxdata, out_flags); return 0; } static inline int PyArray_GetArgSortFunction(PyArray_Descr *descr, - NPY_SORTKIND which, int descending, PyArray_ArgSortFuncWithContext **out_argsort, + NPY_SORTKIND which, PyArray_ArgSortFuncWithContext **out_argsort, NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) { if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function == NULL) { @@ -325,7 +325,7 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, } NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function( - descr, which, descending, out_argsort, out_auxdata, out_flags); + descr, which, out_argsort, out_auxdata, out_flags); return 0; } diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c index 48df80cfc9df..71539ec82bb6 100644 --- a/numpy/_core/src/multiarray/item_selection.c +++ b/numpy/_core/src/multiarray/item_selection.c @@ -1218,8 +1218,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFuncWithContext *sort, PyArrayMethod_SortContext context = { .descriptor = descr, - .reversed = 0, - .descending = 0, + .descending = NPY_FALSE, .nan_position = NPY_SORT_NAN_TO_END, }; @@ -1406,8 +1405,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFuncWithContext *ar PyArrayMethod_SortContext context = { .descriptor = descr, - .reversed = 0, - .descending = 0, + .descending = NPY_FALSE, .nan_position = NPY_SORT_NAN_TO_END, }; @@ -1605,7 +1603,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which) return -1; } - if (PyArray_GetSortFunction(PyArray_DESCR(op), which, 0, &sort, &auxdata, &flags) < 0) { + if (PyArray_GetSortFunction(PyArray_DESCR(op), which, &sort, &auxdata, &flags) < 0) { sort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->sort[which]; } @@ -1779,7 +1777,7 @@ PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which) NpyAuxData *auxdata = NULL; NPY_ARRAYMETHOD_FLAGS flags = 0; - if (PyArray_GetArgSortFunction(PyArray_DESCR(op), which, 0, &argsort, &auxdata, &flags) < 0) { + if (PyArray_GetArgSortFunction(PyArray_DESCR(op), which, &argsort, &auxdata, &flags) < 0) { argsort_with_array = PyDataType_GetArrFuncs(PyArray_DESCR(op))->argsort[which]; } diff --git a/numpy/_core/src/multiarray/stringdtype/dtype.c b/numpy/_core/src/multiarray/stringdtype/dtype.c index 630cdbbef574..cc57df4cbd97 100644 --- a/numpy/_core/src/multiarray/stringdtype/dtype.c +++ b/numpy/_core/src/multiarray/stringdtype/dtype.c @@ -569,7 +569,7 @@ _stringdtype_timsort(PyArrayMethod_SortContext *context, void *start, npy_intp n int stringdtype_get_sort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_SortFuncWithContext **out_sort, + NPY_SORTKIND sort_kind, PyArray_SortFuncWithContext **out_sort, NpyAuxData **NPY_UNUSED(out_auxdata), NPY_ARRAYMETHOD_FLAGS *out_flags) { switch (sort_kind) { @@ -623,7 +623,7 @@ _stringdtype_atimsort(PyArrayMethod_SortContext *context, void *vv, npy_intp *to int stringdtype_get_argsort_function(PyArray_Descr *descr, - NPY_SORTKIND sort_kind, int descending, PyArray_ArgSortFuncWithContext **out_argsort, + NPY_SORTKIND sort_kind, PyArray_ArgSortFuncWithContext **out_argsort, NpyAuxData **NPY_UNUSED(out_auxdata), NPY_ARRAYMETHOD_FLAGS *out_flags) { switch (sort_kind) { From f46a49817b6052a1071fe77b83ad7b056e61d6fb Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:24:05 -0400 Subject: [PATCH 47/54] DOC: Add version added directives to new sorting slots --- doc/source/reference/c-api/array.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 05412ae18270..0107b81a5190 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -3549,6 +3549,8 @@ member of ``PyArrayDTypeMeta_Spec`` struct. npy_intp sort_kind, PyArray_SortFuncWithContext **out_sort, \ NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) + .. versionadded:: 2.4 + If defined, sets a custom sorting function for the DType for each of the sort kinds numpy implements. Currently, sorts are always descending and always use nulls to the end, and this must be checked in the @@ -3560,6 +3562,8 @@ member of ``PyArrayDTypeMeta_Spec`` struct. npy_intp sort_kind, PyArray_ArgSortFuncWithContext **out_argsort, \ NpyAuxData **out_auxdata, NPY_ARRAYMETHOD_FLAGS *out_flags) + .. versionadded:: 2.4 + If defined, sets a custom argsorting function for the DType for each of the sort kinds numpy implements. Currently, sorts are always descending and always use nulls to the end, and this must be checked in the @@ -3567,6 +3571,8 @@ member of ``PyArrayDTypeMeta_Spec`` struct. .. c:macro:: NPY_DT_sort_compare + .. versionadded:: 2.4 + If defined, sets a custom comparison function for the DType for use in sorting, which will replace `NPY_DT_PyArray_ArrFuncs_compare`. Implements ``PyArray_CompareFunc``. From b4715620da88e3e3c7f0dc52631b66f6876f3bb3 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:29:22 -0400 Subject: [PATCH 48/54] BUG: Check return value of slots in GetSortFunction and GetArgSortFunction --- numpy/_core/src/multiarray/dtypemeta.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/numpy/_core/src/multiarray/dtypemeta.h b/numpy/_core/src/multiarray/dtypemeta.h index aded16d8e4a3..ecc87bb92124 100644 --- a/numpy/_core/src/multiarray/dtypemeta.h +++ b/numpy/_core/src/multiarray/dtypemeta.h @@ -310,8 +310,10 @@ PyArray_GetSortFunction(PyArray_Descr *descr, return -1; } - NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function( - descr, which, out_sort, out_auxdata, out_flags); + if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_sort_function( + descr, which, out_sort, out_auxdata, out_flags) == NULL) { + return -1; + } return 0; } @@ -324,8 +326,10 @@ PyArray_GetArgSortFunction(PyArray_Descr *descr, return -1; } - NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function( - descr, which, out_argsort, out_auxdata, out_flags); + if (NPY_DT_SLOTS(NPY_DTYPE(descr))->get_argsort_function( + descr, which, out_argsort, out_auxdata, out_flags) == NULL) { + return -1; + } return 0; } From d022a2da34b4b9c135e777216ccfac395c35a292 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:33:28 -0400 Subject: [PATCH 49/54] DOC: Clarify that comparison function pointer can be NULL in sorting context --- doc/source/reference/c-api/types-and-structures.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 6aa9bb04884b..1b24fe854943 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -816,7 +816,8 @@ PyArrayMethod_SortContext .. c:member:: PyArray_SortCompareFunc *compare - A pointer to the comparison function used for sorting. + A pointer to the comparison function used for sorting. This function + can be NULL if the sort is not based on a comparison function. .. c:member:: npy_bool descending From b1222b20a0f6d9fc556438a02d30da2ad4fcabc0 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Wed, 2 Jul 2025 21:36:19 -0400 Subject: [PATCH 50/54] BUG: Use NPY_MIN_INT return value in compare_from_context to indicate error --- numpy/_core/src/npysort/npysort_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index 1b86cc2a4b3c..ceff81f8b842 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -62,7 +62,7 @@ compare_from_context(const void *a, const void *b, void *context) /* This should never happen, but just in case */ PyErr_SetString(PyExc_RuntimeError, "Unexpected comparison result in sort function"); - return -1; /* Indicate an error */ + return NPY_MIN_INT; /* Indicate an error */ } static inline void From 4d7d592a2b2da54984da997d45a481b7c7392fb3 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 5 Jul 2025 16:45:02 -0400 Subject: [PATCH 51/54] DOC: Add release note for new sort compare c-api change --- doc/release/upcoming_changes/28516.c_api.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 doc/release/upcoming_changes/28516.c_api.rst diff --git a/doc/release/upcoming_changes/28516.c_api.rst b/doc/release/upcoming_changes/28516.c_api.rst new file mode 100644 index 000000000000..fc2fccc2d7a5 --- /dev/null +++ b/doc/release/upcoming_changes/28516.c_api.rst @@ -0,0 +1,17 @@ +New comparison and null handling enums for sorting in dtype API +--------------------------------------------------------------- + +Using the new `NPY_DT_sort_compare` slot, user-defined dtypes can +now specify how to compare elements during sorting operations. +The sort compare function should return a member of the +`NPY_COMPARE_RESULT` enum to indicate the result of the comparison, +including support for unordered comparisons. + +The sorting context of type `PyArrayMethod_SortContext` is passed +to the `NPY_DT_get_sort_function` and `NPY_DT_get_argsort_function` +functions and contains a boolean `descending` flag and a +`nan_position` of type `NPY_NAN_POSITION`, which can be used to +control the behavior of sorting with respect to NaN values. +Currently, sorts are always ascending and nulls are always sorted last, +but this must be checked in the context passed to the sort function +to allow for future features. \ No newline at end of file From b7345276148e695e63bda06458c535c8e8a58cc6 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 5 Jul 2025 16:49:08 -0400 Subject: [PATCH 52/54] DOC: Add `PyArray_SortCompareFunc` to array docs --- doc/source/reference/c-api/array.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 0107b81a5190..7a75650f1b7e 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -1895,6 +1895,14 @@ described below. *context* is passed in containing the descriptor for the array. Returns 0 on success, -1 on failure. +.. c:type:: NPY_COMPARE_RESULT (PyArray_SortCompareFunc) ( \ + const void *a, const void *b, PyArray_Descr *descr) + + A function to compare two elements of an array for sorting. The *a* and *b* + pointers point to the elements to compare, and *descr* is the descriptor for + the array. Returns a value of type :c:type:`NPY_COMPARE_RESULT` indicating + the result of the comparison, including whether each element is unordered. + API Functions and Typedefs ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 7d9ac658b354d4a023cc258917e19164c6886ce6 Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Sat, 5 Jul 2025 16:54:21 -0400 Subject: [PATCH 53/54] DOC: Update sorting documentation with NPY_COMPARE_RESULT enum --- doc/release/upcoming_changes/28516.c_api.rst | 2 +- doc/source/reference/c-api/array.rst | 23 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/release/upcoming_changes/28516.c_api.rst b/doc/release/upcoming_changes/28516.c_api.rst index fc2fccc2d7a5..ec4cf0aa2d6b 100644 --- a/doc/release/upcoming_changes/28516.c_api.rst +++ b/doc/release/upcoming_changes/28516.c_api.rst @@ -10,7 +10,7 @@ including support for unordered comparisons. The sorting context of type `PyArrayMethod_SortContext` is passed to the `NPY_DT_get_sort_function` and `NPY_DT_get_argsort_function` functions and contains a boolean `descending` flag and a -`nan_position` of type `NPY_NAN_POSITION`, which can be used to +`nan_position` of type `NPY_SORT_NAN_POSITION`, which can be used to control the behavior of sorting with respect to NaN values. Currently, sorts are always ascending and nulls are always sorted last, but this must be checked in the context passed to the sort function diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst index 7a75650f1b7e..aedade624f04 100644 --- a/doc/source/reference/c-api/array.rst +++ b/doc/source/reference/c-api/array.rst @@ -4422,6 +4422,29 @@ Enumerated Types Indicates that NaN values should be sorted to the end. +.. c:enum:: NPY_COMPARE_RESULT + + An enum used to indicate the result of a comparison operation. + The unordered comparisons are used to indicate that the + comparison is not well-defined for one or both of the operands, + such as when comparing NaN values. + + .. c:enumerator:: NPY_LESS + + .. c:enumerator:: NPY_EQUAL + + .. c:enumerator:: NPY_GREATER + + .. c:enumerator:: NPY_UNORDERED_LEFT + + .. c:enumerator:: NPY_UNORDERED_RIGHT + + .. c:enumerator:: NPY_UNORDERED_BOTH + + .. c:enumerator:: NPY_COMPARE_ERROR + + Indicates that an error occurred during the comparison operation. + .. c:enum:: NPY_SCALARKIND A special variable type indicating the number of "kinds" of From c21a16417cc32ef57e3c7494b9768a6ad0d9ad2b Mon Sep 17 00:00:00 2001 From: Maanas Arora Date: Mon, 14 Jul 2025 15:56:08 -0400 Subject: [PATCH 54/54] ENH: Add descending sorts to new `compare_from_context` --- numpy/_core/src/npysort/npysort_common.h | 33 +++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/numpy/_core/src/npysort/npysort_common.h b/numpy/_core/src/npysort/npysort_common.h index ceff81f8b842..16e889e7d310 100644 --- a/numpy/_core/src/npysort/npysort_common.h +++ b/numpy/_core/src/npysort/npysort_common.h @@ -18,14 +18,8 @@ extern "C" { */ static inline int -compare_from_context(const void *a, const void *b, void *context) +compare_result_to_int(NPY_COMPARE_RESULT result, NPY_SORT_NAN_POSITION nan_position) { - PyArrayMethod_SortContext *sort_context = (PyArrayMethod_SortContext *)context; - PyArray_SortCompareFunc *cmp = sort_context->compare; - int nan_position = sort_context->nan_position; - - NPY_COMPARE_RESULT result = cmp(a, b, sort_context->descriptor); - if (result == NPY_LESS) { return -1; } @@ -65,6 +59,31 @@ compare_from_context(const void *a, const void *b, void *context) return NPY_MIN_INT; /* Indicate an error */ } +static inline int +compare_from_context(const void *a, const void *b, void *context) +{ + PyArrayMethod_SortContext *sort_context = (PyArrayMethod_SortContext *)context; + PyArray_SortCompareFunc *cmp = sort_context->compare; + + int descending = sort_context->descending; + NPY_SORT_NAN_POSITION nan_position = sort_context->nan_position; + + NPY_COMPARE_RESULT result = cmp(a, b, sort_context->descriptor); + + if (result == NPY_COMPARE_ERROR) { + PyErr_SetString(PyExc_RuntimeError, "Unexpected comparison result in sort function"); + return NPY_MIN_INT; /* Indicate an error */ + } + + int cmp_result = compare_result_to_int(result, nan_position); + + if (descending) { + cmp_result = -cmp_result; + } + + return cmp_result; +} + static inline void fill_sort_data_from_arr_or_context(void *array, PyArrayMethod_SortContext *context, void **out_arr_or_context, npy_intp *elsize, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy