Skip to content

Commit f9932f5

Browse files
dg-pbkumaraditya303serhiy-storchaka
authored
gh-119109: improve functools.partial vectorcall with keywords (#124584)
Co-authored-by: Kumar Aditya <kumaraditya@python.org> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 6ea4258 commit f9932f5

File tree

2 files changed

+140
-53
lines changed

2 files changed

+140
-53
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:func:`functools.partial` calls are now faster when keyword arguments are used.

Modules/_functoolsmodule.c

Lines changed: 139 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -367,19 +367,6 @@ partial_descr_get(PyObject *self, PyObject *obj, PyObject *type)
367367
return PyMethod_New(self, obj);
368368
}
369369

370-
/* Merging keyword arguments using the vectorcall convention is messy, so
371-
* if we would need to do that, we stop using vectorcall and fall back
372-
* to using partial_call() instead. */
373-
Py_NO_INLINE static PyObject *
374-
partial_vectorcall_fallback(PyThreadState *tstate, partialobject *pto,
375-
PyObject *const *args, size_t nargsf,
376-
PyObject *kwnames)
377-
{
378-
pto->vectorcall = NULL;
379-
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
380-
return _PyObject_MakeTpCall(tstate, (PyObject *)pto, args, nargs, kwnames);
381-
}
382-
383370
static PyObject *
384371
partial_vectorcall(PyObject *self, PyObject *const *args,
385372
size_t nargsf, PyObject *kwnames)
@@ -388,10 +375,7 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
388375
PyThreadState *tstate = _PyThreadState_GET();
389376
Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
390377

391-
/* pto->kw is mutable, so need to check every time */
392-
if (PyDict_GET_SIZE(pto->kw)) {
393-
return partial_vectorcall_fallback(tstate, pto, args, nargsf, kwnames);
394-
}
378+
/* Placeholder check */
395379
Py_ssize_t pto_phcount = pto->phcount;
396380
if (nargs < pto_phcount) {
397381
PyErr_Format(PyExc_TypeError,
@@ -400,50 +384,143 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
400384
return NULL;
401385
}
402386

403-
Py_ssize_t nargskw = nargs;
404-
if (kwnames != NULL) {
405-
nargskw += PyTuple_GET_SIZE(kwnames);
406-
}
407-
408387
PyObject **pto_args = _PyTuple_ITEMS(pto->args);
409388
Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
389+
Py_ssize_t pto_nkwds = PyDict_GET_SIZE(pto->kw);
390+
Py_ssize_t nkwds = kwnames == NULL ? 0 : PyTuple_GET_SIZE(kwnames);
391+
Py_ssize_t nargskw = nargs + nkwds;
392+
393+
/* Special cases */
394+
if (!pto_nkwds) {
395+
/* Fast path if we're called without arguments */
396+
if (nargskw == 0) {
397+
return _PyObject_VectorcallTstate(tstate, pto->fn, pto_args,
398+
pto_nargs, NULL);
399+
}
410400

411-
/* Fast path if we're called without arguments */
412-
if (nargskw == 0) {
413-
return _PyObject_VectorcallTstate(tstate, pto->fn,
414-
pto_args, pto_nargs, NULL);
401+
/* Use PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
402+
* positional argument. */
403+
if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
404+
PyObject **newargs = (PyObject **)args - 1;
405+
PyObject *tmp = newargs[0];
406+
newargs[0] = pto_args[0];
407+
PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn, newargs,
408+
nargs + 1, kwnames);
409+
newargs[0] = tmp;
410+
return ret;
411+
}
415412
}
416413

417-
/* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
418-
* positional argument */
419-
if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
420-
PyObject **newargs = (PyObject **)args - 1;
421-
PyObject *tmp = newargs[0];
422-
newargs[0] = pto_args[0];
423-
PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn,
424-
newargs, nargs + 1, kwnames);
425-
newargs[0] = tmp;
426-
return ret;
427-
}
414+
/* Total sizes */
415+
Py_ssize_t tot_nargs = pto_nargs + nargs - pto_phcount;
416+
Py_ssize_t tot_nkwds = pto_nkwds + nkwds;
417+
Py_ssize_t tot_nargskw = tot_nargs + tot_nkwds;
428418

429-
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
430-
PyObject **stack;
419+
PyObject *pto_kw_merged = NULL; // pto_kw with duplicates merged (if any)
420+
PyObject *tot_kwnames;
431421

432-
Py_ssize_t tot_nargskw = pto_nargs + nargskw - pto_phcount;
433-
if (tot_nargskw <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
422+
/* Allocate Stack
423+
* Note, _PY_FASTCALL_SMALL_STACK is optimal for positional only
424+
* This case might have keyword arguments
425+
* furthermore, it might use extra stack space for temporary key storage
426+
* thus, double small_stack size is used, which is 10 * 8 = 80 bytes */
427+
PyObject *small_stack[_PY_FASTCALL_SMALL_STACK * 2];
428+
PyObject **tmp_stack, **stack;
429+
Py_ssize_t init_stack_size = tot_nargskw;
430+
if (pto_nkwds) {
431+
// If pto_nkwds, allocate additional space for temporary new keys
432+
init_stack_size += nkwds;
433+
}
434+
if (init_stack_size <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
434435
stack = small_stack;
435436
}
436437
else {
437-
stack = PyMem_Malloc(tot_nargskw * sizeof(PyObject *));
438+
stack = PyMem_Malloc(init_stack_size * sizeof(PyObject *));
438439
if (stack == NULL) {
439-
PyErr_NoMemory();
440-
return NULL;
440+
return PyErr_NoMemory();
441441
}
442442
}
443443

444-
Py_ssize_t tot_nargs;
444+
/* Copy keywords to stack */
445+
if (!pto_nkwds) {
446+
tot_kwnames = kwnames;
447+
if (nkwds) {
448+
/* if !pto_nkwds & nkwds, then simply append kw */
449+
memcpy(stack + tot_nargs, args + nargs, nkwds * sizeof(PyObject*));
450+
}
451+
}
452+
else {
453+
/* stack is now [<positionals>, <pto_kwds>, <kwds>, <kwds_keys>]
454+
* Will resize later to [<positionals>, <merged_kwds>] */
455+
PyObject *key, *val;
456+
457+
/* Merge kw to pto_kw or add to tail (if not duplicate) */
458+
Py_ssize_t n_tail = 0;
459+
for (Py_ssize_t i = 0; i < nkwds; ++i) {
460+
key = PyTuple_GET_ITEM(kwnames, i);
461+
val = args[nargs + i];
462+
if (PyDict_Contains(pto->kw, key)) {
463+
if (pto_kw_merged == NULL) {
464+
pto_kw_merged = PyDict_Copy(pto->kw);
465+
if (pto_kw_merged == NULL) {
466+
goto error;
467+
}
468+
}
469+
if (PyDict_SetItem(pto_kw_merged, key, val) < 0) {
470+
Py_DECREF(pto_kw_merged);
471+
goto error;
472+
}
473+
}
474+
else {
475+
/* Copy keyword tail to stack */
476+
stack[tot_nargs + pto_nkwds + n_tail] = val;
477+
stack[tot_nargskw + n_tail] = key;
478+
n_tail++;
479+
}
480+
}
481+
Py_ssize_t n_merges = nkwds - n_tail;
482+
483+
/* Create total kwnames */
484+
tot_kwnames = PyTuple_New(tot_nkwds - n_merges);
485+
if (tot_kwnames == NULL) {
486+
Py_XDECREF(pto_kw_merged);
487+
goto error;
488+
}
489+
for (Py_ssize_t i = 0; i < n_tail; ++i) {
490+
key = Py_NewRef(stack[tot_nargskw + i]);
491+
PyTuple_SET_ITEM(tot_kwnames, pto_nkwds + i, key);
492+
}
493+
494+
/* Copy pto_keywords with overlapping call keywords merged
495+
* Note, tail is already coppied. */
496+
Py_ssize_t pos = 0, i = 0;
497+
while (PyDict_Next(n_merges ? pto_kw_merged : pto->kw, &pos, &key, &val)) {
498+
assert(i < pto_nkwds);
499+
PyTuple_SET_ITEM(tot_kwnames, i, Py_NewRef(key));
500+
stack[tot_nargs + i] = val;
501+
i++;
502+
}
503+
assert(i == pto_nkwds);
504+
Py_XDECREF(pto_kw_merged);
505+
506+
/* Resize Stack if the removing overallocation saves some noticable memory
507+
* NOTE: This whole block can be removed without breaking anything */
508+
Py_ssize_t noveralloc = n_merges + nkwds;
509+
if (stack != small_stack && noveralloc > 6 && noveralloc > init_stack_size / 10) {
510+
tmp_stack = PyMem_Realloc(stack, (tot_nargskw - n_merges) * sizeof(PyObject *));
511+
if (tmp_stack == NULL) {
512+
Py_DECREF(tot_kwnames);
513+
if (stack != small_stack) {
514+
PyMem_Free(stack);
515+
}
516+
return PyErr_NoMemory();
517+
}
518+
stack = tmp_stack;
519+
}
520+
}
521+
522+
/* Copy Positionals to stack */
445523
if (pto_phcount) {
446-
tot_nargs = pto_nargs + nargs - pto_phcount;
447524
Py_ssize_t j = 0; // New args index
448525
for (Py_ssize_t i = 0; i < pto_nargs; i++) {
449526
if (pto_args[i] == pto->placeholder) {
@@ -455,22 +532,31 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
455532
}
456533
}
457534
assert(j == pto_phcount);
458-
if (nargskw > pto_phcount) {
459-
memcpy(stack + pto_nargs, args + j, (nargskw - j) * sizeof(PyObject*));
535+
/* Add remaining args from new_args */
536+
if (nargs > pto_phcount) {
537+
memcpy(stack + pto_nargs, args + j, (nargs - j) * sizeof(PyObject*));
460538
}
461539
}
462540
else {
463-
tot_nargs = pto_nargs + nargs;
464-
/* Copy to new stack, using borrowed references */
465541
memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
466-
memcpy(stack + pto_nargs, args, nargskw * sizeof(PyObject*));
542+
memcpy(stack + pto_nargs, args, nargs * sizeof(PyObject*));
467543
}
468-
PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn,
469-
stack, tot_nargs, kwnames);
544+
545+
PyObject *ret = _PyObject_VectorcallTstate(tstate, pto->fn, stack,
546+
tot_nargs, tot_kwnames);
470547
if (stack != small_stack) {
471548
PyMem_Free(stack);
472549
}
550+
if (pto_nkwds) {
551+
Py_DECREF(tot_kwnames);
552+
}
473553
return ret;
554+
555+
error:
556+
if (stack != small_stack) {
557+
PyMem_Free(stack);
558+
}
559+
return NULL;
474560
}
475561

476562
/* Set pto->vectorcall depending on the parameters of the partial object */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy