@@ -367,19 +367,6 @@ partial_descr_get(PyObject *self, PyObject *obj, PyObject *type)
367
367
return PyMethod_New (self , obj );
368
368
}
369
369
370
- /* Merging keyword arguments using the vectorcall convention is messy, so
371
- * if we would need to do that, we stop using vectorcall and fall back
372
- * to using partial_call() instead. */
373
- Py_NO_INLINE static PyObject *
374
- partial_vectorcall_fallback (PyThreadState * tstate , partialobject * pto ,
375
- PyObject * const * args , size_t nargsf ,
376
- PyObject * kwnames )
377
- {
378
- pto -> vectorcall = NULL ;
379
- Py_ssize_t nargs = PyVectorcall_NARGS (nargsf );
380
- return _PyObject_MakeTpCall (tstate , (PyObject * )pto , args , nargs , kwnames );
381
- }
382
-
383
370
static PyObject *
384
371
partial_vectorcall (PyObject * self , PyObject * const * args ,
385
372
size_t nargsf , PyObject * kwnames )
@@ -388,10 +375,7 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
388
375
PyThreadState * tstate = _PyThreadState_GET ();
389
376
Py_ssize_t nargs = PyVectorcall_NARGS (nargsf );
390
377
391
- /* pto->kw is mutable, so need to check every time */
392
- if (PyDict_GET_SIZE (pto -> kw )) {
393
- return partial_vectorcall_fallback (tstate , pto , args , nargsf , kwnames );
394
- }
378
+ /* Placeholder check */
395
379
Py_ssize_t pto_phcount = pto -> phcount ;
396
380
if (nargs < pto_phcount ) {
397
381
PyErr_Format (PyExc_TypeError ,
@@ -400,50 +384,143 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
400
384
return NULL ;
401
385
}
402
386
403
- Py_ssize_t nargskw = nargs ;
404
- if (kwnames != NULL ) {
405
- nargskw += PyTuple_GET_SIZE (kwnames );
406
- }
407
-
408
387
PyObject * * pto_args = _PyTuple_ITEMS (pto -> args );
409
388
Py_ssize_t pto_nargs = PyTuple_GET_SIZE (pto -> args );
389
+ Py_ssize_t pto_nkwds = PyDict_GET_SIZE (pto -> kw );
390
+ Py_ssize_t nkwds = kwnames == NULL ? 0 : PyTuple_GET_SIZE (kwnames );
391
+ Py_ssize_t nargskw = nargs + nkwds ;
392
+
393
+ /* Special cases */
394
+ if (!pto_nkwds ) {
395
+ /* Fast path if we're called without arguments */
396
+ if (nargskw == 0 ) {
397
+ return _PyObject_VectorcallTstate (tstate , pto -> fn , pto_args ,
398
+ pto_nargs , NULL );
399
+ }
410
400
411
- /* Fast path if we're called without arguments */
412
- if (nargskw == 0 ) {
413
- return _PyObject_VectorcallTstate (tstate , pto -> fn ,
414
- pto_args , pto_nargs , NULL );
401
+ /* Use PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
402
+ * positional argument. */
403
+ if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET )) {
404
+ PyObject * * newargs = (PyObject * * )args - 1 ;
405
+ PyObject * tmp = newargs [0 ];
406
+ newargs [0 ] = pto_args [0 ];
407
+ PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn , newargs ,
408
+ nargs + 1 , kwnames );
409
+ newargs [0 ] = tmp ;
410
+ return ret ;
411
+ }
415
412
}
416
413
417
- /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
418
- * positional argument */
419
- if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET )) {
420
- PyObject * * newargs = (PyObject * * )args - 1 ;
421
- PyObject * tmp = newargs [0 ];
422
- newargs [0 ] = pto_args [0 ];
423
- PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn ,
424
- newargs , nargs + 1 , kwnames );
425
- newargs [0 ] = tmp ;
426
- return ret ;
427
- }
414
+ /* Total sizes */
415
+ Py_ssize_t tot_nargs = pto_nargs + nargs - pto_phcount ;
416
+ Py_ssize_t tot_nkwds = pto_nkwds + nkwds ;
417
+ Py_ssize_t tot_nargskw = tot_nargs + tot_nkwds ;
428
418
429
- PyObject * small_stack [ _PY_FASTCALL_SMALL_STACK ];
430
- PyObject * * stack ;
419
+ PyObject * pto_kw_merged = NULL ; // pto_kw with duplicates merged (if any)
420
+ PyObject * tot_kwnames ;
431
421
432
- Py_ssize_t tot_nargskw = pto_nargs + nargskw - pto_phcount ;
433
- if (tot_nargskw <= (Py_ssize_t )Py_ARRAY_LENGTH (small_stack )) {
422
+ /* Allocate Stack
423
+ * Note, _PY_FASTCALL_SMALL_STACK is optimal for positional only
424
+ * This case might have keyword arguments
425
+ * furthermore, it might use extra stack space for temporary key storage
426
+ * thus, double small_stack size is used, which is 10 * 8 = 80 bytes */
427
+ PyObject * small_stack [_PY_FASTCALL_SMALL_STACK * 2 ];
428
+ PyObject * * tmp_stack , * * stack ;
429
+ Py_ssize_t init_stack_size = tot_nargskw ;
430
+ if (pto_nkwds ) {
431
+ // If pto_nkwds, allocate additional space for temporary new keys
432
+ init_stack_size += nkwds ;
433
+ }
434
+ if (init_stack_size <= (Py_ssize_t )Py_ARRAY_LENGTH (small_stack )) {
434
435
stack = small_stack ;
435
436
}
436
437
else {
437
- stack = PyMem_Malloc (tot_nargskw * sizeof (PyObject * ));
438
+ stack = PyMem_Malloc (init_stack_size * sizeof (PyObject * ));
438
439
if (stack == NULL ) {
439
- PyErr_NoMemory ();
440
- return NULL ;
440
+ return PyErr_NoMemory ();
441
441
}
442
442
}
443
443
444
- Py_ssize_t tot_nargs ;
444
+ /* Copy keywords to stack */
445
+ if (!pto_nkwds ) {
446
+ tot_kwnames = kwnames ;
447
+ if (nkwds ) {
448
+ /* if !pto_nkwds & nkwds, then simply append kw */
449
+ memcpy (stack + tot_nargs , args + nargs , nkwds * sizeof (PyObject * ));
450
+ }
451
+ }
452
+ else {
453
+ /* stack is now [<positionals>, <pto_kwds>, <kwds>, <kwds_keys>]
454
+ * Will resize later to [<positionals>, <merged_kwds>] */
455
+ PyObject * key , * val ;
456
+
457
+ /* Merge kw to pto_kw or add to tail (if not duplicate) */
458
+ Py_ssize_t n_tail = 0 ;
459
+ for (Py_ssize_t i = 0 ; i < nkwds ; ++ i ) {
460
+ key = PyTuple_GET_ITEM (kwnames , i );
461
+ val = args [nargs + i ];
462
+ if (PyDict_Contains (pto -> kw , key )) {
463
+ if (pto_kw_merged == NULL ) {
464
+ pto_kw_merged = PyDict_Copy (pto -> kw );
465
+ if (pto_kw_merged == NULL ) {
466
+ goto error ;
467
+ }
468
+ }
469
+ if (PyDict_SetItem (pto_kw_merged , key , val ) < 0 ) {
470
+ Py_DECREF (pto_kw_merged );
471
+ goto error ;
472
+ }
473
+ }
474
+ else {
475
+ /* Copy keyword tail to stack */
476
+ stack [tot_nargs + pto_nkwds + n_tail ] = val ;
477
+ stack [tot_nargskw + n_tail ] = key ;
478
+ n_tail ++ ;
479
+ }
480
+ }
481
+ Py_ssize_t n_merges = nkwds - n_tail ;
482
+
483
+ /* Create total kwnames */
484
+ tot_kwnames = PyTuple_New (tot_nkwds - n_merges );
485
+ if (tot_kwnames == NULL ) {
486
+ Py_XDECREF (pto_kw_merged );
487
+ goto error ;
488
+ }
489
+ for (Py_ssize_t i = 0 ; i < n_tail ; ++ i ) {
490
+ key = Py_NewRef (stack [tot_nargskw + i ]);
491
+ PyTuple_SET_ITEM (tot_kwnames , pto_nkwds + i , key );
492
+ }
493
+
494
+ /* Copy pto_keywords with overlapping call keywords merged
495
+ * Note, tail is already coppied. */
496
+ Py_ssize_t pos = 0 , i = 0 ;
497
+ while (PyDict_Next (n_merges ? pto_kw_merged : pto -> kw , & pos , & key , & val )) {
498
+ assert (i < pto_nkwds );
499
+ PyTuple_SET_ITEM (tot_kwnames , i , Py_NewRef (key ));
500
+ stack [tot_nargs + i ] = val ;
501
+ i ++ ;
502
+ }
503
+ assert (i == pto_nkwds );
504
+ Py_XDECREF (pto_kw_merged );
505
+
506
+ /* Resize Stack if the removing overallocation saves some noticable memory
507
+ * NOTE: This whole block can be removed without breaking anything */
508
+ Py_ssize_t noveralloc = n_merges + nkwds ;
509
+ if (stack != small_stack && noveralloc > 6 && noveralloc > init_stack_size / 10 ) {
510
+ tmp_stack = PyMem_Realloc (stack , (tot_nargskw - n_merges ) * sizeof (PyObject * ));
511
+ if (tmp_stack == NULL ) {
512
+ Py_DECREF (tot_kwnames );
513
+ if (stack != small_stack ) {
514
+ PyMem_Free (stack );
515
+ }
516
+ return PyErr_NoMemory ();
517
+ }
518
+ stack = tmp_stack ;
519
+ }
520
+ }
521
+
522
+ /* Copy Positionals to stack */
445
523
if (pto_phcount ) {
446
- tot_nargs = pto_nargs + nargs - pto_phcount ;
447
524
Py_ssize_t j = 0 ; // New args index
448
525
for (Py_ssize_t i = 0 ; i < pto_nargs ; i ++ ) {
449
526
if (pto_args [i ] == pto -> placeholder ) {
@@ -455,22 +532,31 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
455
532
}
456
533
}
457
534
assert (j == pto_phcount );
458
- if (nargskw > pto_phcount ) {
459
- memcpy (stack + pto_nargs , args + j , (nargskw - j ) * sizeof (PyObject * ));
535
+ /* Add remaining args from new_args */
536
+ if (nargs > pto_phcount ) {
537
+ memcpy (stack + pto_nargs , args + j , (nargs - j ) * sizeof (PyObject * ));
460
538
}
461
539
}
462
540
else {
463
- tot_nargs = pto_nargs + nargs ;
464
- /* Copy to new stack, using borrowed references */
465
541
memcpy (stack , pto_args , pto_nargs * sizeof (PyObject * ));
466
- memcpy (stack + pto_nargs , args , nargskw * sizeof (PyObject * ));
542
+ memcpy (stack + pto_nargs , args , nargs * sizeof (PyObject * ));
467
543
}
468
- PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn ,
469
- stack , tot_nargs , kwnames );
544
+
545
+ PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn , stack ,
546
+ tot_nargs , tot_kwnames );
470
547
if (stack != small_stack ) {
471
548
PyMem_Free (stack );
472
549
}
550
+ if (pto_nkwds ) {
551
+ Py_DECREF (tot_kwnames );
552
+ }
473
553
return ret ;
554
+
555
+ error :
556
+ if (stack != small_stack ) {
557
+ PyMem_Free (stack );
558
+ }
559
+ return NULL ;
474
560
}
475
561
476
562
/* Set pto->vectorcall depending on the parameters of the partial object */
0 commit comments