From 635b4afff1ca1ff4e488995e8a04f24cbbc9e19a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 13 Jul 2025 12:56:59 +0900 Subject: [PATCH 01/81] Fix derive(Traverse) --- derive-impl/src/pytraverse.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/derive-impl/src/pytraverse.rs b/derive-impl/src/pytraverse.rs index 728722b83a..c5c4bbd270 100644 --- a/derive-impl/src/pytraverse.rs +++ b/derive-impl/src/pytraverse.rs @@ -105,8 +105,19 @@ pub(crate) fn impl_pytraverse(mut item: DeriveInput) -> Result { let ty = &item.ident; + // Add Traverse bound to all type parameters + for param in &mut item.generics.params { + if let syn::GenericParam::Type(type_param) = param { + type_param + .bounds + .push(syn::parse_quote!(::rustpython_vm::object::Traverse)); + } + } + + let (impl_generics, ty_generics, where_clause) = item.generics.split_for_impl(); + let ret = quote! { - unsafe impl ::rustpython_vm::object::Traverse for #ty { + unsafe impl #impl_generics ::rustpython_vm::object::Traverse for #ty #ty_generics #where_clause { fn traverse(&self, tracer_fn: &mut ::rustpython_vm::object::TraverseFn) { #trace_code } From 09489712e6d801722e9c1cb30a99701da15009ae Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 13 Jul 2025 14:34:46 +0900 Subject: [PATCH 02/81] PyPayload::payload_type_of --- vm/src/object/core.rs | 2 +- vm/src/object/payload.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index bb057f4906..b4b9557f2a 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -655,7 +655,7 @@ impl PyObject { #[inline(always)] pub fn payload_is(&self) -> bool { - self.0.typeid == TypeId::of::() + self.0.typeid == T::payload_type_id() } /// Force to return payload as T. diff --git a/vm/src/object/payload.rs b/vm/src/object/payload.rs index 6413d6ae06..f223af6e96 100644 --- a/vm/src/object/payload.rs +++ b/vm/src/object/payload.rs @@ -19,6 +19,10 @@ cfg_if::cfg_if! { pub trait PyPayload: std::fmt::Debug + MaybeTraverse + PyThreadingConstraint + Sized + 'static { + #[inline] + fn payload_type_id() -> std::any::TypeId { + std::any::TypeId::of::() + } fn class(ctx: &Context) -> &'static Py; #[inline] @@ -75,7 +79,7 @@ pub trait PyPayload: } pub trait PyObjectPayload: - std::any::Any + std::fmt::Debug + MaybeTraverse + PyThreadingConstraint + 'static + PyPayload + std::any::Any + std::fmt::Debug + MaybeTraverse + PyThreadingConstraint + 'static { } From 14ce76e6c81b59191f14f811621ada0bba599cbb Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sun, 13 Jul 2025 14:39:42 +0900 Subject: [PATCH 03/81] PyTupleTyped as alias of PyTuple --- vm/src/builtins/function.rs | 15 ++-- vm/src/builtins/tuple.rs | 120 ++++++++++++++++++------------- vm/src/builtins/type.rs | 22 +++--- vm/src/convert/try_from.rs | 8 +-- vm/src/frame.rs | 14 ++-- vm/src/object/core.rs | 44 +++++------- vm/src/object/traverse_object.rs | 5 +- vm/src/vm/context.rs | 7 ++ vm/src/vm/mod.rs | 8 +-- 9 files changed, 134 insertions(+), 109 deletions(-) diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index a29a077e50..45917adcf2 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -8,7 +8,6 @@ use super::{ #[cfg(feature = "jit")] use crate::common::lock::OnceCell; use crate::common::lock::PyMutex; -use crate::convert::{ToPyObject, TryFromObject}; use crate::function::ArgMapping; use crate::object::{Traverse, TraverseFn}; use crate::{ @@ -32,7 +31,7 @@ pub struct PyFunction { code: PyRef, globals: PyDictRef, builtins: PyObjectRef, - closure: Option>, + closure: Option>>, defaults_and_kwdefaults: PyMutex<(Option, Option)>, name: PyMutex, qualname: PyMutex, @@ -47,7 +46,9 @@ pub struct PyFunction { unsafe impl Traverse for PyFunction { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.globals.traverse(tracer_fn); - self.closure.traverse(tracer_fn); + if let Some(closure) = self.closure.as_ref() { + closure.as_untyped().traverse(tracer_fn); + } self.defaults_and_kwdefaults.traverse(tracer_fn); } } @@ -58,7 +59,7 @@ impl PyFunction { pub(crate) fn new( code: PyRef, globals: PyDictRef, - closure: Option>, + closure: Option>>, defaults: Option, kw_only_defaults: Option, qualname: PyStrRef, @@ -326,6 +327,7 @@ impl Py { ) -> PyResult { #[cfg(feature = "jit")] if let Some(jitted_code) = self.jitted_code.get() { + use crate::convert::ToPyObject; match jit::get_jit_args(self, &func_args, jitted_code, vm) { Ok(args) => { return Ok(args.invoke().to_pyobject(vm)); @@ -427,7 +429,7 @@ impl PyFunction { #[pymember] fn __closure__(vm: &VirtualMachine, zelf: PyObjectRef) -> PyResult { let zelf = Self::_as_pyref(&zelf, vm)?; - Ok(vm.unwrap_or_none(zelf.closure.clone().map(|x| x.to_pyobject(vm)))) + Ok(vm.unwrap_or_none(zelf.closure.clone().map(|x| x.into()))) } #[pymember] @@ -612,8 +614,7 @@ impl Constructor for PyFunction { } // Validate that all items are cells and create typed tuple - let typed_closure = - PyTupleTyped::::try_from_object(vm, closure_tuple.into())?; + let typed_closure = closure_tuple.try_into_typed::(vm)?; Some(typed_closure) } else if !args.code.freevars.is_empty() { return Err(vm.new_type_error("arg 5 (closure) must be tuple")); diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 2ee8497dda..9f589547f0 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -3,7 +3,7 @@ use crate::common::{ hash::{PyHash, PyUHash}, lock::PyMutex, }; -use crate::object::{Traverse, TraverseFn}; +use crate::object::{MaybeTraverse, Traverse, TraverseFn}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, atomic_func, @@ -449,6 +449,24 @@ impl Representable for PyTuple { } } +impl PyRef { + pub fn try_into_typed( + self, + vm: &VirtualMachine, + ) -> PyResult>>> { + PyRef::>>::try_from_untyped(self, vm) + } + /// # Safety + /// + /// The caller must ensure that all elements in the tuple are valid instances + /// of type `T` before calling this method. This is typically verified by + /// calling `try_into_typed` first. + unsafe fn into_typed_unchecked(self) -> PyRef>> { + let obj: PyObjectRef = self.into(); + unsafe { obj.downcast_unchecked::>>() } + } +} + #[pyclass(module = false, name = "tuple_iterator", traverse)] #[derive(Debug)] pub(crate) struct PyTupleIterator { @@ -500,53 +518,75 @@ pub(crate) fn init(context: &Context) { PyTupleIterator::extend_class(context, context.types.tuple_iterator_type); } -pub struct PyTupleTyped { +#[repr(transparent)] +pub struct PyTupleTyped { // SAFETY INVARIANT: T must be repr(transparent) over PyObjectRef, and the // elements must be logically valid when transmuted to T - tuple: PyTupleRef, - _marker: PhantomData>, + tuple: PyTuple, + _marker: PhantomData, } -unsafe impl Traverse for PyTupleTyped +unsafe impl Traverse for PyTupleTyped where - T: TransmuteFromObject + Traverse, + R: TransmuteFromObject, { fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { self.tuple.traverse(tracer_fn); } } -impl TryFromObject for PyTupleTyped { - fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { - let tuple = PyTupleRef::try_from_object(vm, obj)?; - for elem in &*tuple { - T::check(vm, elem)? - } - // SAFETY: the contract of TransmuteFromObject upholds the variant on `tuple` - Ok(Self { - tuple, - _marker: PhantomData, - }) +impl MaybeTraverse for PyTupleTyped { + const IS_TRACE: bool = true; + fn try_traverse(&self, tracer_fn: &mut TraverseFn<'_>) { + self.traverse(tracer_fn); } } -impl AsRef<[T]> for PyTupleTyped { - fn as_ref(&self) -> &[T] { - self.as_slice() +impl PyTupleTyped> { + pub fn new_ref(elements: Vec>, ctx: &Context) -> PyRef { + // SAFETY: PyRef has the same layout as PyObjectRef + unsafe { + let elements: Vec = + std::mem::transmute::>, Vec>(elements); + let tuple = PyTuple::new_ref(elements, ctx); + tuple.into_typed_unchecked::() + } } } -impl PyTupleTyped { - pub fn empty(vm: &VirtualMachine) -> Self { - Self { - tuple: vm.ctx.empty_tuple.clone(), - _marker: PhantomData, +impl PyRef>> { + pub fn into_untyped(self) -> PyRef { + // SAFETY: PyTupleTyped is transparent over PyTuple + unsafe { std::mem::transmute::>>, PyRef>(self) } + } + + pub fn try_from_untyped(tuple: PyTupleRef, vm: &VirtualMachine) -> PyResult { + // Check that all elements are of the correct type + for elem in tuple.as_slice() { + as TransmuteFromObject>::check(vm, elem)?; } + // SAFETY: We just verified all elements are of type T, and PyTupleTyped has the same layout as PyTuple + Ok(unsafe { std::mem::transmute::, PyRef>>>(tuple) }) } +} +impl Py>> { + pub fn as_untyped(&self) -> &Py { + // SAFETY: PyTupleTyped is transparent over PyTuple + unsafe { std::mem::transmute::<&Py>>, &Py>(self) } + } +} + +impl AsRef<[PyRef]> for PyTupleTyped> { + fn as_ref(&self) -> &[PyRef] { + self.as_slice() + } +} + +impl PyTupleTyped> { #[inline] - pub fn as_slice(&self) -> &[T] { - unsafe { &*(self.tuple.as_slice() as *const [PyObjectRef] as *const [T]) } + pub fn as_slice(&self) -> &[PyRef] { + unsafe { &*(self.tuple.as_slice() as *const [PyObjectRef] as *const [PyRef]) } } #[inline] @@ -560,32 +600,16 @@ impl PyTupleTyped { } } -impl Clone for PyTupleTyped { - fn clone(&self) -> Self { - Self { - tuple: self.tuple.clone(), - _marker: PhantomData, - } - } -} - -impl fmt::Debug for PyTupleTyped { +impl fmt::Debug for PyTupleTyped { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.as_slice().fmt(f) - } -} - -impl From> for PyTupleRef { - #[inline] - fn from(tup: PyTupleTyped) -> Self { - tup.tuple + self.tuple.as_slice().fmt(f) } } -impl ToPyObject for PyTupleTyped { +impl From>>> for PyTupleRef { #[inline] - fn to_pyobject(self, _vm: &VirtualMachine) -> PyObjectRef { - self.tuple.into() + fn from(tup: PyRef>>) -> Self { + tup.into_untyped() } } diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 1e18d6fd63..5a8f853bf1 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -62,7 +62,7 @@ unsafe impl crate::object::Traverse for PyType { pub struct HeapTypeExt { pub name: PyRwLock, pub qualname: PyRwLock, - pub slots: Option>, + pub slots: Option>>, pub sequence_methods: PySequenceMethods, pub mapping_methods: PyMappingMethods, } @@ -1041,15 +1041,13 @@ impl Constructor for PyType { // TODO: Flags is currently initialized with HAS_DICT. Should be // updated when __slots__ are supported (toggling the flag off if // a class has __slots__ defined). - let heaptype_slots: Option> = + let heaptype_slots: Option>> = if let Some(x) = attributes.get(identifier!(vm, __slots__)) { - Some(if x.to_owned().class().is(vm.ctx.types.str_type) { - PyTupleTyped::::try_from_object( - vm, - vec![x.to_owned()].into_pytuple(vm).into(), - )? + let slots = if x.class().is(vm.ctx.types.str_type) { + let x = unsafe { x.downcast_unchecked_ref::() }; + PyTupleTyped::new_ref(vec![x.to_owned()], &vm.ctx) } else { - let iter = x.to_owned().get_iter(vm)?; + let iter = x.get_iter(vm)?; let elements = { let mut elements = Vec::new(); while let PyIterReturn::Return(element) = iter.next(vm)? { @@ -1057,8 +1055,10 @@ impl Constructor for PyType { } elements }; - PyTupleTyped::::try_from_object(vm, elements.into_pytuple(vm).into())? - }) + let tuple = elements.into_pytuple(vm); + tuple.try_into_typed(vm)? + }; + Some(slots) } else { None }; @@ -1082,7 +1082,7 @@ impl Constructor for PyType { let heaptype_ext = HeapTypeExt { name: PyRwLock::new(name), qualname: PyRwLock::new(qualname), - slots: heaptype_slots.to_owned(), + slots: heaptype_slots.clone(), sequence_methods: PySequenceMethods::default(), mapping_methods: PyMappingMethods::default(), }; diff --git a/vm/src/convert/try_from.rs b/vm/src/convert/try_from.rs index d2d83b36e7..a875ffa231 100644 --- a/vm/src/convert/try_from.rs +++ b/vm/src/convert/try_from.rs @@ -78,12 +78,12 @@ where #[inline] fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { let class = T::class(&vm.ctx); - if obj.fast_isinstance(class) { + let result = if obj.fast_isinstance(class) { obj.downcast() - .map_err(|obj| vm.new_downcast_runtime_error(class, &obj)) } else { - Err(vm.new_downcast_type_error(class, &obj)) - } + Err(obj) + }; + result.map_err(|obj| vm.new_downcast_type_error(class, &obj)) } } diff --git a/vm/src/frame.rs b/vm/src/frame.rs index a3e31c5c2b..460ba4392e 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -7,7 +7,7 @@ use crate::{ PySlice, PyStr, PyStrInterned, PyStrRef, PyTraceback, PyType, asyncgenerator::PyAsyncGenWrappedValue, function::{PyCell, PyCellRef, PyFunction}, - tuple::{PyTuple, PyTupleRef, PyTupleTyped}, + tuple::{PyTuple, PyTupleRef}, }, bytecode, convert::{IntoObject, ToPyResult}, @@ -1346,11 +1346,14 @@ impl ExecutingFrame<'_> { #[cfg_attr(feature = "flame-it", flame("Frame"))] fn import(&mut self, vm: &VirtualMachine, module_name: Option<&Py>) -> PyResult<()> { let module_name = module_name.unwrap_or(vm.ctx.empty_str); - let from_list = >>::try_from_object(vm, self.pop_value())? - .unwrap_or_else(|| PyTupleTyped::empty(vm)); + let top = self.pop_value(); + let from_list = match >::try_from_object(vm, top)? { + Some(from_list) => from_list.try_into_typed::(vm)?, + None => vm.ctx.empty_tuple_typed().to_owned(), + }; let level = usize::try_from_object(vm, self.pop_value())?; - let module = vm.import_from(module_name, from_list, level)?; + let module = vm.import_from(module_name, &from_list, level)?; self.push_value(module); Ok(()) @@ -1839,7 +1842,8 @@ impl ExecutingFrame<'_> { .expect("Second to top value on the stack must be a code object"); let closure = if flags.contains(bytecode::MakeFunctionFlags::CLOSURE) { - Some(PyTupleTyped::try_from_object(vm, self.pop_value()).unwrap()) + let tuple = PyTupleRef::try_from_object(vm, self.pop_value()).unwrap(); + Some(tuple.try_into_typed(vm).expect("This is a compiler bug")) } else { None }; diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index b4b9557f2a..5012855133 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -15,7 +15,7 @@ use super::{ ext::{AsObject, PyRefExact, PyResult}, payload::PyObjectPayload, }; -use crate::object::traverse::{Traverse, TraverseFn}; +use crate::object::traverse::{MaybeTraverse, Traverse, TraverseFn}; use crate::object::traverse_object::PyObjVTable; use crate::{ builtins::{PyDictRef, PyType, PyTypeRef}, @@ -121,7 +121,7 @@ impl fmt::Debug for PyInner { } } -unsafe impl Traverse for Py { +unsafe impl Traverse for Py { /// DO notice that call `trace` on `Py` means apply `tracer_fn` on `Py`'s children, /// not like call `trace` on `PyRef` which apply `tracer_fn` on `PyRef` itself fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { @@ -557,7 +557,7 @@ impl PyObjectRef { /// # Safety /// T must be the exact payload type #[inline(always)] - pub unsafe fn downcast_unchecked(self) -> PyRef { + pub unsafe fn downcast_unchecked(self) -> PyRef { // PyRef::from_obj_unchecked(self) // manual impl to avoid assertion let obj = ManuallyDrop::new(self); @@ -893,7 +893,7 @@ impl fmt::Debug for PyObjectRef { } #[repr(transparent)] -pub struct Py(PyInner); +pub struct Py(PyInner); impl Py { pub fn downgrade( @@ -908,7 +908,7 @@ impl Py { } } -impl ToOwned for Py { +impl ToOwned for Py { type Owned = PyRef; #[inline(always)] @@ -920,7 +920,7 @@ impl ToOwned for Py { } } -impl Deref for Py { +impl Deref for Py { type Target = T; #[inline(always)] @@ -984,24 +984,24 @@ impl fmt::Debug for Py { /// situations (such as when implementing in-place methods such as `__iadd__`) /// where a reference to the same object must be returned. #[repr(transparent)] -pub struct PyRef { +pub struct PyRef { ptr: NonNull>, } cfg_if::cfg_if! { if #[cfg(feature = "threading")] { - unsafe impl Send for PyRef {} - unsafe impl Sync for PyRef {} + unsafe impl Send for PyRef {} + unsafe impl Sync for PyRef {} } } -impl fmt::Debug for PyRef { +impl fmt::Debug for PyRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { (**self).fmt(f) } } -impl Drop for PyRef { +impl Drop for PyRef { #[inline] fn drop(&mut self) { if self.0.ref_count.dec() { @@ -1010,7 +1010,7 @@ impl Drop for PyRef { } } -impl Clone for PyRef { +impl Clone for PyRef { #[inline(always)] fn clone(&self) -> Self { (**self).to_owned() @@ -1070,10 +1070,7 @@ where } } -impl From> for PyObjectRef -where - T: PyObjectPayload, -{ +impl From> for PyObjectRef { #[inline] fn from(value: PyRef) -> Self { let me = ManuallyDrop::new(value); @@ -1081,30 +1078,21 @@ where } } -impl Borrow> for PyRef -where - T: PyObjectPayload, -{ +impl Borrow> for PyRef { #[inline(always)] fn borrow(&self) -> &Py { self } } -impl AsRef> for PyRef -where - T: PyObjectPayload, -{ +impl AsRef> for PyRef { #[inline(always)] fn as_ref(&self) -> &Py { self } } -impl Deref for PyRef -where - T: PyObjectPayload, -{ +impl Deref for PyRef { type Target = Py; #[inline(always)] diff --git a/vm/src/object/traverse_object.rs b/vm/src/object/traverse_object.rs index ee32785950..281b0e56eb 100644 --- a/vm/src/object/traverse_object.rs +++ b/vm/src/object/traverse_object.rs @@ -3,7 +3,8 @@ use std::fmt; use crate::{ PyObject, object::{ - Erased, InstanceDict, PyInner, PyObjectPayload, debug_obj, drop_dealloc_obj, try_trace_obj, + Erased, InstanceDict, MaybeTraverse, PyInner, PyObjectPayload, debug_obj, drop_dealloc_obj, + try_trace_obj, }, }; @@ -56,7 +57,7 @@ unsafe impl Traverse for PyInner { } } -unsafe impl Traverse for PyInner { +unsafe impl Traverse for PyInner { /// Type is known, so we can call `try_trace` directly instead of using erased type vtable fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { // 1. trace `dict` and `slots` field(`typ` can't trace for it's a AtomicRef while is leaked by design) diff --git a/vm/src/vm/context.rs b/vm/src/vm/context.rs index d35b5b7f7e..4c673831e0 100644 --- a/vm/src/vm/context.rs +++ b/vm/src/vm/context.rs @@ -11,6 +11,7 @@ use crate::{ }, getset::PyGetSet, object, pystr, + tuple::PyTupleTyped, type_::PyAttributes, }, class::{PyClassImpl, StaticType}, @@ -373,6 +374,12 @@ impl Context { self.not_implemented.clone().into() } + #[inline] + pub fn empty_tuple_typed(&self) -> &Py> { + let py: &Py = &self.empty_tuple; + unsafe { std::mem::transmute(py) } + } + // universal pyref constructor pub fn new_pyref(&self, value: T) -> PyRef

where diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 4a319c9635..dbfa2147b3 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -599,7 +599,7 @@ impl VirtualMachine { #[inline] pub fn import<'a>(&self, module_name: impl AsPyStr<'a>, level: usize) -> PyResult { let module_name = module_name.as_pystr(&self.ctx); - let from_list = PyTupleTyped::empty(self); + let from_list = self.ctx.empty_tuple_typed(); self.import_inner(module_name, from_list, level) } @@ -609,7 +609,7 @@ impl VirtualMachine { pub fn import_from<'a>( &self, module_name: impl AsPyStr<'a>, - from_list: PyTupleTyped, + from_list: &Py>, level: usize, ) -> PyResult { let module_name = module_name.as_pystr(&self.ctx); @@ -619,7 +619,7 @@ impl VirtualMachine { fn import_inner( &self, module: &Py, - from_list: PyTupleTyped, + from_list: &Py>, level: usize, ) -> PyResult { // if the import inputs seem weird, e.g a package import or something, rather than just @@ -657,7 +657,7 @@ impl VirtualMachine { } else { (None, None) }; - let from_list = from_list.to_pyobject(self); + let from_list: PyObjectRef = from_list.to_owned().into(); import_func .call((module.to_owned(), globals, locals, from_list, level), self) .inspect_err(|exc| import::remove_importlib_frames(self, exc)) From 6342ad4fa7ec073aa85967e7f2a4c6bcfd2df103 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 14 Jul 2025 14:27:31 +0900 Subject: [PATCH 04/81] Fully integrate PyTupleTyped into PyTuple --- vm/src/builtins/function.rs | 8 +- vm/src/builtins/tuple.rs | 213 +++++++++++++----------------------- vm/src/builtins/type.rs | 10 +- vm/src/vm/context.rs | 3 +- vm/src/vm/mod.rs | 9 +- 5 files changed, 92 insertions(+), 151 deletions(-) diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index 45917adcf2..16cb3e420f 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -2,8 +2,8 @@ mod jit; use super::{ - PyAsyncGen, PyCode, PyCoroutine, PyDictRef, PyGenerator, PyStr, PyStrRef, PyTupleRef, PyType, - PyTypeRef, tuple::PyTupleTyped, + PyAsyncGen, PyCode, PyCoroutine, PyDictRef, PyGenerator, PyStr, PyStrRef, PyTuple, PyTupleRef, + PyType, PyTypeRef, }; #[cfg(feature = "jit")] use crate::common::lock::OnceCell; @@ -31,7 +31,7 @@ pub struct PyFunction { code: PyRef, globals: PyDictRef, builtins: PyObjectRef, - closure: Option>>, + closure: Option>>, defaults_and_kwdefaults: PyMutex<(Option, Option)>, name: PyMutex, qualname: PyMutex, @@ -59,7 +59,7 @@ impl PyFunction { pub(crate) fn new( code: PyRef, globals: PyDictRef, - closure: Option>>, + closure: Option>>, defaults: Option, kw_only_defaults: Option, qualname: PyStrRef, diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 9f589547f0..2c3255b249 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -3,7 +3,6 @@ use crate::common::{ hash::{PyHash, PyUHash}, lock::PyMutex, }; -use crate::object::{MaybeTraverse, Traverse, TraverseFn}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, atomic_func, @@ -22,14 +21,14 @@ use crate::{ utils::collection_repr, vm::VirtualMachine, }; -use std::{fmt, marker::PhantomData, sync::LazyLock}; +use std::{fmt, sync::LazyLock}; #[pyclass(module = false, name = "tuple", traverse)] -pub struct PyTuple { - elements: Box<[PyObjectRef]>, +pub struct PyTuple { + elements: Box<[R]>, } -impl fmt::Debug for PyTuple { +impl fmt::Debug for PyTuple { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // TODO: implement more informational, non-recursive Debug formatter f.write_str("tuple") @@ -140,39 +139,60 @@ impl Constructor for PyTuple { } } -impl AsRef<[PyObjectRef]> for PyTuple { - fn as_ref(&self) -> &[PyObjectRef] { - self.as_slice() +impl AsRef<[R]> for PyTuple { + fn as_ref(&self) -> &[R] { + &self.elements } } -impl std::ops::Deref for PyTuple { - type Target = [PyObjectRef]; +impl std::ops::Deref for PyTuple { + type Target = [R]; - fn deref(&self) -> &[PyObjectRef] { - self.as_slice() + fn deref(&self) -> &[R] { + &self.elements } } -impl<'a> std::iter::IntoIterator for &'a PyTuple { - type Item = &'a PyObjectRef; - type IntoIter = std::slice::Iter<'a, PyObjectRef>; +impl<'a, R> std::iter::IntoIterator for &'a PyTuple { + type Item = &'a R; + type IntoIter = std::slice::Iter<'a, R>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl<'a> std::iter::IntoIterator for &'a Py { - type Item = &'a PyObjectRef; - type IntoIter = std::slice::Iter<'a, PyObjectRef>; +impl<'a, R> std::iter::IntoIterator for &'a Py> { + type Item = &'a R; + type IntoIter = std::slice::Iter<'a, R>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl PyTuple { +impl PyTuple { + pub const fn as_slice(&self) -> &[R] { + &self.elements + } + + #[inline] + pub fn len(&self) -> usize { + self.elements.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.elements.is_empty() + } + + #[inline] + pub fn iter(&self) -> std::slice::Iter<'_, R> { + self.elements.iter() + } +} + +impl PyTuple { pub fn new_ref(elements: Vec, ctx: &Context) -> PyRef { if elements.is_empty() { ctx.empty_tuple.clone() @@ -189,10 +209,6 @@ impl PyTuple { Self { elements } } - pub const fn as_slice(&self) -> &[PyObjectRef] { - &self.elements - } - fn repeat(zelf: PyRef, value: isize, vm: &VirtualMachine) -> PyResult> { Ok(if zelf.elements.is_empty() || value == 0 { vm.ctx.empty_tuple.clone() @@ -214,6 +230,18 @@ impl PyTuple { } } +impl PyTuple> { + pub fn new_ref_typed(elements: Vec>, ctx: &Context) -> PyRef>> { + // SAFETY: PyRef has the same layout as PyObjectRef + unsafe { + let elements: Vec = + std::mem::transmute::>, Vec>(elements); + let tuple = PyTuple::::new_ref(elements, ctx); + std::mem::transmute::, PyRef>>>(tuple) + } + } +} + #[pyclass( flags(BASETYPE), with( @@ -272,11 +300,6 @@ impl PyTuple { self.elements.len() } - #[inline] - pub const fn is_empty(&self) -> bool { - self.elements.is_empty() - } - #[pymethod(name = "__rmul__")] #[pymethod] fn __mul__(zelf: PyRef, value: ArgSize, vm: &VirtualMachine) -> PyResult> { @@ -449,21 +472,38 @@ impl Representable for PyTuple { } } -impl PyRef { +impl PyRef> { pub fn try_into_typed( self, vm: &VirtualMachine, - ) -> PyResult>>> { - PyRef::>>::try_from_untyped(self, vm) + ) -> PyResult>>> { + // Check that all elements are of the correct type + for elem in self.as_slice() { + as TransmuteFromObject>::check(vm, elem)?; + } + // SAFETY: We just verified all elements are of type T + Ok(unsafe { std::mem::transmute::, PyRef>>>(self) }) + } +} + +impl PyRef>> { + pub fn into_untyped(self) -> PyRef { + // SAFETY: PyTuple> has the same layout as PyTuple + unsafe { std::mem::transmute::>>, PyRef>(self) } } - /// # Safety - /// - /// The caller must ensure that all elements in the tuple are valid instances - /// of type `T` before calling this method. This is typically verified by - /// calling `try_into_typed` first. - unsafe fn into_typed_unchecked(self) -> PyRef>> { - let obj: PyObjectRef = self.into(); - unsafe { obj.downcast_unchecked::>>() } +} + +impl Py>> { + pub fn as_untyped(&self) -> &Py { + // SAFETY: PyTuple> has the same layout as PyTuple + unsafe { std::mem::transmute::<&Py>>, &Py>(self) } + } +} + +impl From>>> for PyTupleRef { + #[inline] + fn from(tup: PyRef>>) -> Self { + tup.into_untyped() } } @@ -518,101 +558,6 @@ pub(crate) fn init(context: &Context) { PyTupleIterator::extend_class(context, context.types.tuple_iterator_type); } -#[repr(transparent)] -pub struct PyTupleTyped { - // SAFETY INVARIANT: T must be repr(transparent) over PyObjectRef, and the - // elements must be logically valid when transmuted to T - tuple: PyTuple, - _marker: PhantomData, -} - -unsafe impl Traverse for PyTupleTyped -where - R: TransmuteFromObject, -{ - fn traverse(&self, tracer_fn: &mut TraverseFn<'_>) { - self.tuple.traverse(tracer_fn); - } -} - -impl MaybeTraverse for PyTupleTyped { - const IS_TRACE: bool = true; - fn try_traverse(&self, tracer_fn: &mut TraverseFn<'_>) { - self.traverse(tracer_fn); - } -} - -impl PyTupleTyped> { - pub fn new_ref(elements: Vec>, ctx: &Context) -> PyRef { - // SAFETY: PyRef has the same layout as PyObjectRef - unsafe { - let elements: Vec = - std::mem::transmute::>, Vec>(elements); - let tuple = PyTuple::new_ref(elements, ctx); - tuple.into_typed_unchecked::() - } - } -} - -impl PyRef>> { - pub fn into_untyped(self) -> PyRef { - // SAFETY: PyTupleTyped is transparent over PyTuple - unsafe { std::mem::transmute::>>, PyRef>(self) } - } - - pub fn try_from_untyped(tuple: PyTupleRef, vm: &VirtualMachine) -> PyResult { - // Check that all elements are of the correct type - for elem in tuple.as_slice() { - as TransmuteFromObject>::check(vm, elem)?; - } - // SAFETY: We just verified all elements are of type T, and PyTupleTyped has the same layout as PyTuple - Ok(unsafe { std::mem::transmute::, PyRef>>>(tuple) }) - } -} - -impl Py>> { - pub fn as_untyped(&self) -> &Py { - // SAFETY: PyTupleTyped is transparent over PyTuple - unsafe { std::mem::transmute::<&Py>>, &Py>(self) } - } -} - -impl AsRef<[PyRef]> for PyTupleTyped> { - fn as_ref(&self) -> &[PyRef] { - self.as_slice() - } -} - -impl PyTupleTyped> { - #[inline] - pub fn as_slice(&self) -> &[PyRef] { - unsafe { &*(self.tuple.as_slice() as *const [PyObjectRef] as *const [PyRef]) } - } - - #[inline] - pub fn len(&self) -> usize { - self.tuple.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.tuple.is_empty() - } -} - -impl fmt::Debug for PyTupleTyped { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.tuple.as_slice().fmt(f) - } -} - -impl From>>> for PyTupleRef { - #[inline] - fn from(tup: PyRef>>) -> Self { - tup.into_untyped() - } -} - pub(super) fn tuple_hash(elements: &[PyObjectRef], vm: &VirtualMachine) -> PyResult { #[cfg(target_pointer_width = "64")] const PRIME1: PyUHash = 11400714785074694791; diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 5a8f853bf1..f2a4fde3b9 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -1,5 +1,5 @@ use super::{ - PyClassMethod, PyDictRef, PyList, PyStr, PyStrInterned, PyStrRef, PyTuple, PyTupleRef, PyWeak, + PyClassMethod, PyDictRef, PyList, PyStr, PyStrInterned, PyStrRef, PyTupleRef, PyWeak, mappingproxy::PyMappingProxy, object, union_, }; use crate::{ @@ -12,7 +12,7 @@ use crate::{ PyMemberDescriptor, }, function::PyCellRef, - tuple::{IntoPyTuple, PyTupleTyped}, + tuple::{IntoPyTuple, PyTuple}, }, class::{PyClassImpl, StaticType}, common::{ @@ -62,7 +62,7 @@ unsafe impl crate::object::Traverse for PyType { pub struct HeapTypeExt { pub name: PyRwLock, pub qualname: PyRwLock, - pub slots: Option>>, + pub slots: Option>>, pub sequence_methods: PySequenceMethods, pub mapping_methods: PyMappingMethods, } @@ -1041,11 +1041,11 @@ impl Constructor for PyType { // TODO: Flags is currently initialized with HAS_DICT. Should be // updated when __slots__ are supported (toggling the flag off if // a class has __slots__ defined). - let heaptype_slots: Option>> = + let heaptype_slots: Option>> = if let Some(x) = attributes.get(identifier!(vm, __slots__)) { let slots = if x.class().is(vm.ctx.types.str_type) { let x = unsafe { x.downcast_unchecked_ref::() }; - PyTupleTyped::new_ref(vec![x.to_owned()], &vm.ctx) + PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx) } else { let iter = x.get_iter(vm)?; let elements = { diff --git a/vm/src/vm/context.rs b/vm/src/vm/context.rs index 4c673831e0..6707288151 100644 --- a/vm/src/vm/context.rs +++ b/vm/src/vm/context.rs @@ -11,7 +11,6 @@ use crate::{ }, getset::PyGetSet, object, pystr, - tuple::PyTupleTyped, type_::PyAttributes, }, class::{PyClassImpl, StaticType}, @@ -375,7 +374,7 @@ impl Context { } #[inline] - pub fn empty_tuple_typed(&self) -> &Py> { + pub fn empty_tuple_typed(&self) -> &Py> { let py: &Py = &self.empty_tuple; unsafe { std::mem::transmute(py) } } diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index dbfa2147b3..498c7e39d1 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -20,10 +20,7 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, builtins::{ PyBaseExceptionRef, PyDictRef, PyInt, PyList, PyModule, PyStr, PyStrInterned, PyStrRef, - PyTypeRef, - code::PyCode, - pystr::AsPyStr, - tuple::{PyTuple, PyTupleTyped}, + PyTypeRef, code::PyCode, pystr::AsPyStr, tuple::PyTuple, }, codecs::CodecsRegistry, common::{hash::HashSecret, lock::PyMutex, rc::PyRc}, @@ -609,7 +606,7 @@ impl VirtualMachine { pub fn import_from<'a>( &self, module_name: impl AsPyStr<'a>, - from_list: &Py>, + from_list: &Py>, level: usize, ) -> PyResult { let module_name = module_name.as_pystr(&self.ctx); @@ -619,7 +616,7 @@ impl VirtualMachine { fn import_inner( &self, module: &Py, - from_list: &Py>, + from_list: &Py>, level: usize, ) -> PyResult { // if the import inputs seem weird, e.g a package import or something, rather than just From 406be9cd15b36e1952ca8eb56408e83281b2aa09 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 14 Jul 2025 20:12:22 +0900 Subject: [PATCH 05/81] Upgrade radium to 1.1.1 --- Cargo.lock | 5 +++-- Cargo.toml | 3 +-- example_projects/barebone/Cargo.toml | 1 - example_projects/frozen_stdlib/Cargo.toml | 1 - wasm/wasm-unknown-test/Cargo.toml | 1 - 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6dee806c7..50ec28b1ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1958,8 +1958,9 @@ checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" [[package]] name = "radium" -version = "1.1.0" -source = "git+https://github.com/youknowone/ferrilab?branch=fix-nightly#4a301c3a223e096626a2773d1a1eed1fc4e21140" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1775bc532a9bfde46e26eba441ca1171b91608d14a3bae71fea371f18a00cffe" dependencies = [ "cfg-if", ] diff --git a/Cargo.toml b/Cargo.toml index 1fdc77d261..440855aba5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,7 +82,6 @@ opt-level = 3 lto = "thin" [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } # REDOX START, Uncomment when you want to compile/check with redoxer # REDOX END @@ -190,7 +189,7 @@ paste = "1.0.15" proc-macro2 = "1.0.93" pymath = "0.0.2" quote = "1.0.38" -radium = "1.1" +radium = "1.1.1" rand = "0.9" rand_core = { version = "0.9", features = ["os_rng"] } rustix = { version = "1.0", features = ["event"] } diff --git a/example_projects/barebone/Cargo.toml b/example_projects/barebone/Cargo.toml index a993277f31..8bc49c237f 100644 --- a/example_projects/barebone/Cargo.toml +++ b/example_projects/barebone/Cargo.toml @@ -9,4 +9,3 @@ rustpython-vm = { path = "../../vm", default-features = false } [workspace] [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } diff --git a/example_projects/frozen_stdlib/Cargo.toml b/example_projects/frozen_stdlib/Cargo.toml index be1b1eb16c..78a88988d8 100644 --- a/example_projects/frozen_stdlib/Cargo.toml +++ b/example_projects/frozen_stdlib/Cargo.toml @@ -11,4 +11,3 @@ rustpython-pylib = { path = "../../pylib", default-features = false, features = [workspace] [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } diff --git a/wasm/wasm-unknown-test/Cargo.toml b/wasm/wasm-unknown-test/Cargo.toml index 5945f69006..ed8c9fcb02 100644 --- a/wasm/wasm-unknown-test/Cargo.toml +++ b/wasm/wasm-unknown-test/Cargo.toml @@ -13,4 +13,3 @@ rustpython-vm = { path = "../../vm", default-features = false, features = ["comp [workspace] [patch.crates-io] -radium = { version = "1.1.0", git = "https://github.com/youknowone/ferrilab", branch = "fix-nightly" } From dd4f0c3a9f48a3ad28b46a11c266112465c39d98 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Mon, 14 Jul 2025 20:21:31 +0900 Subject: [PATCH 06/81] fix lint --- vm/src/vm/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 498c7e39d1..6993897598 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -14,8 +14,6 @@ mod vm_new; mod vm_object; mod vm_ops; -#[cfg(not(feature = "stdio"))] -use crate::builtins::PyNone; use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, builtins::{ @@ -337,7 +335,8 @@ impl VirtualMachine { Ok(stdio) }; #[cfg(not(feature = "stdio"))] - let make_stdio = |_name, _fd, _write| Ok(PyNone.into_pyobject(self)); + let make_stdio = + |_name, _fd, _write| Ok(crate::builtins::PyNone.into_pyobject(self)); let set_stdio = |name, fd, write| { let stdio = make_stdio(name, fd, write)?; From fd35c7a70634b78f85f2b3278f8da57b52ef6734 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:54:44 +0900 Subject: [PATCH 07/81] Impl Drop for PyAtomicRef (#5970) --- vm/src/object/ext.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vm/src/object/ext.rs b/vm/src/object/ext.rs index 2815d2b20e..1e2b78d9a9 100644 --- a/vm/src/object/ext.rs +++ b/vm/src/object/ext.rs @@ -245,6 +245,19 @@ pub struct PyAtomicRef { _phantom: PhantomData, } +impl Drop for PyAtomicRef { + fn drop(&mut self) { + // SAFETY: We are dropping the atomic reference, so we can safely + // release the pointer. + unsafe { + let ptr = Radium::swap(&self.inner, null_mut(), Ordering::Relaxed); + if let Some(ptr) = NonNull::::new(ptr.cast()) { + let _: PyObjectRef = PyObjectRef::from_raw(ptr); + } + } + } +} + cfg_if::cfg_if! { if #[cfg(feature = "threading")] { unsafe impl Send for PyAtomicRef {} From ed433837b309d00f307a1e453fd8d60efce441bd Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Tue, 15 Jul 2025 00:54:42 +0900 Subject: [PATCH 08/81] Introduce PyUtf8Str and fix(sqlite): validate surrogates in SQL statements (#5969) * fix(sqlite): validate surrogates in SQL statements * Add `PyUtf8Str` wrapper for safe conversion --- Lib/test/test_sqlite3/test_regression.py | 2 - stdlib/src/sqlite.rs | 9 +++-- vm/src/builtins/str.rs | 49 +++++++++++++++++++++--- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_sqlite3/test_regression.py b/Lib/test/test_sqlite3/test_regression.py index dfcf3b11f5..870958ceee 100644 --- a/Lib/test/test_sqlite3/test_regression.py +++ b/Lib/test/test_sqlite3/test_regression.py @@ -343,8 +343,6 @@ def test_null_character(self): self.assertRaisesRegex(sqlite.ProgrammingError, "null char", cur.execute, query) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_surrogates(self): con = sqlite.connect(":memory:") self.assertRaises(UnicodeEncodeError, con, "select '\ud8ff'") diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index ce84ac2988..4e9620eeab 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -844,7 +844,7 @@ mod _sqlite { type Args = (PyStrRef,); fn call(zelf: &Py, args: Self::Args, vm: &VirtualMachine) -> PyResult { - if let Some(stmt) = Statement::new(zelf, &args.0, vm)? { + if let Some(stmt) = Statement::new(zelf, args.0, vm)? { Ok(stmt.into_ref(&vm.ctx).into()) } else { Ok(vm.ctx.none()) @@ -1480,7 +1480,7 @@ mod _sqlite { stmt.lock().reset(); } - let Some(stmt) = Statement::new(&zelf.connection, &sql, vm)? else { + let Some(stmt) = Statement::new(&zelf.connection, sql, vm)? else { drop(inner); return Ok(zelf); }; @@ -1552,7 +1552,7 @@ mod _sqlite { stmt.lock().reset(); } - let Some(stmt) = Statement::new(&zelf.connection, &sql, vm)? else { + let Some(stmt) = Statement::new(&zelf.connection, sql, vm)? else { drop(inner); return Ok(zelf); }; @@ -2291,9 +2291,10 @@ mod _sqlite { impl Statement { fn new( connection: &Connection, - sql: &PyStr, + sql: PyStrRef, vm: &VirtualMachine, ) -> PyResult> { + let sql = sql.try_into_utf8(vm)?; let sql_cstr = sql.to_cstring(vm)?; let sql_len = sql.byte_len() + 1; diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 9f86da3da0..73349c6141 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -37,8 +37,8 @@ use rustpython_common::{ str::DeduceStrKind, wtf8::{CodePoint, Wtf8, Wtf8Buf, Wtf8Chunk}, }; -use std::sync::LazyLock; use std::{borrow::Cow, char, fmt, ops::Range}; +use std::{mem, sync::LazyLock}; use unic_ucd_bidi::BidiClass; use unic_ucd_category::GeneralCategory; use unic_ucd_ident::{is_xid_continue, is_xid_start}; @@ -80,6 +80,30 @@ impl fmt::Debug for PyStr { } } +#[repr(transparent)] +#[derive(Debug)] +pub struct PyUtf8Str(PyStr); + +// TODO: Remove this Deref which may hide missing optimized methods of PyUtf8Str +impl std::ops::Deref for PyUtf8Str { + type Target = PyStr; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl PyUtf8Str { + /// Returns the underlying string slice. + pub fn as_str(&self) -> &str { + debug_assert!( + self.0.is_utf8(), + "PyUtf8Str invariant violated: inner string is not valid UTF-8" + ); + // Safety: This is safe because the type invariant guarantees UTF-8 validity. + unsafe { self.0.to_str().unwrap_unchecked() } + } +} + impl AsRef for PyStr { #[track_caller] // <- can remove this once it doesn't panic fn as_ref(&self) -> &str { @@ -433,21 +457,29 @@ impl PyStr { self.data.as_str() } - pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { - self.to_str().ok_or_else(|| { + fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.is_utf8() { + Ok(()) + } else { let start = self .as_wtf8() .code_points() .position(|c| c.to_char().is_none()) .unwrap(); - vm.new_unicode_encode_error_real( + Err(vm.new_unicode_encode_error_real( identifier!(vm, utf_8).to_owned(), vm.ctx.new_str(self.data.clone()), start, start + 1, vm.ctx.new_str("surrogates not allowed"), - ) - }) + )) + } + } + + pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { + self.ensure_valid_utf8(vm)?; + // SAFETY: ensure_valid_utf8 passed, so unwrap is safe. + Ok(unsafe { self.to_str().unwrap_unchecked() }) } pub fn to_string_lossy(&self) -> Cow<'_, str> { @@ -1486,6 +1518,11 @@ impl PyStrRef { s.push_wtf8(other); *self = PyStr::from(s).into_ref(&vm.ctx); } + + pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult> { + self.ensure_valid_utf8(vm)?; + Ok(unsafe { mem::transmute::, PyRef>(self) }) + } } impl Representable for PyStr { From d4f85cf0737974cbae15f38d73cc0245ab59f0f9 Mon Sep 17 00:00:00 2001 From: Jiseok CHOI Date: Tue, 15 Jul 2025 01:45:42 +0900 Subject: [PATCH 09/81] Provide detailed error for circular `from` imports (#5972) --- Lib/test/test_import/__init__.py | 2 -- vm/src/frame.rs | 48 ++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 89e5ec1534..44e7da1033 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -1380,8 +1380,6 @@ def test_crossreference2(self): self.assertIn('partially initialized module', errmsg) self.assertIn('circular import', errmsg) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_circular_from_import(self): with self.assertRaises(ImportError) as cm: import test.test_import.data.circular_imports.from_cycle1 diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 460ba4392e..28a6ece4da 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -1363,19 +1363,38 @@ impl ExecutingFrame<'_> { fn import_from(&mut self, vm: &VirtualMachine, idx: bytecode::NameIdx) -> PyResult { let module = self.top_value(); let name = self.code.names[idx as usize]; - let err = || vm.new_import_error(format!("cannot import name '{name}'"), name.to_owned()); + // Load attribute, and transform any error into import error. if let Some(obj) = vm.get_attribute_opt(module.to_owned(), name)? { return Ok(obj); } // fallback to importing '{module.__name__}.{name}' from sys.modules - let mod_name = module - .get_attr(identifier!(vm, __name__), vm) - .map_err(|_| err())?; - let mod_name = mod_name.downcast::().map_err(|_| err())?; - let full_mod_name = format!("{mod_name}.{name}"); - let sys_modules = vm.sys_module.get_attr("modules", vm).map_err(|_| err())?; - sys_modules.get_item(&full_mod_name, vm).map_err(|_| err()) + let fallback_module = (|| { + let mod_name = module.get_attr(identifier!(vm, __name__), vm).ok()?; + let mod_name = mod_name.downcast_ref::()?; + let full_mod_name = format!("{mod_name}.{name}"); + let sys_modules = vm.sys_module.get_attr("modules", vm).ok()?; + sys_modules.get_item(&full_mod_name, vm).ok() + })(); + + if let Some(sub_module) = fallback_module { + return Ok(sub_module); + } + + if is_module_initializing(module, vm) { + let module_name = module + .get_attr(identifier!(vm, __name__), vm) + .ok() + .and_then(|n| n.downcast_ref::().map(|s| s.as_str().to_owned())) + .unwrap_or_else(|| "".to_owned()); + + let msg = format!( + "cannot import name '{name}' from partially initialized module '{module_name}' (most likely due to a circular import)", + ); + Err(vm.new_import_error(msg, name.to_owned())) + } else { + Err(vm.new_import_error(format!("cannot import name '{name}'"), name.to_owned())) + } } #[cfg_attr(feature = "flame-it", flame("Frame"))] @@ -2372,3 +2391,16 @@ impl fmt::Debug for Frame { ) } } + +fn is_module_initializing(module: &PyObject, vm: &VirtualMachine) -> bool { + let Ok(spec) = module.get_attr(&vm.ctx.new_str("__spec__"), vm) else { + return false; + }; + if vm.is_none(&spec) { + return false; + } + let Ok(initializing_attr) = spec.get_attr(&vm.ctx.new_str("_initializing"), vm) else { + return false; + }; + initializing_attr.try_to_bool(vm).unwrap_or(false) +} From 1d3603419efb83088dcf96bcebebe91ad53ea6a5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Tue, 15 Jul 2025 03:12:23 +0900 Subject: [PATCH 10/81] SetFunctionAttribute (#5968) * PyRef::into_non_null * SetFunctionAttribute * set_function_attribute * frame helper in PyFuncion * remove closure lock * cleanup unused args --- Lib/test/test_funcattrs.py | 2 - Lib/test/test_reprlib.py | 2 - Lib/test/test_typing.py | 2 - compiler/codegen/src/compile.rs | 347 +++++++++++++++++++--------- compiler/codegen/src/symboltable.rs | 1 + compiler/core/src/bytecode.rs | 22 +- jit/tests/common.rs | 45 +++- vm/src/builtins/function.rs | 146 ++++++++---- vm/src/frame.rs | 100 +++----- vm/src/object/core.rs | 23 +- vm/src/vm/mod.rs | 14 +- 11 files changed, 440 insertions(+), 264 deletions(-) diff --git a/Lib/test/test_funcattrs.py b/Lib/test/test_funcattrs.py index 5fd268fd90..3d5378092b 100644 --- a/Lib/test/test_funcattrs.py +++ b/Lib/test/test_funcattrs.py @@ -176,8 +176,6 @@ def test___name__(self): self.assertEqual(self.fi.a.__name__, 'a') self.cannot_set_attr(self.fi.a, "__name__", 'a', AttributeError) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test___qualname__(self): # PEP 3155 self.assertEqual(self.b.__qualname__, 'FuncAttrsTest.setUp..b') diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index 396be4b104..738b48f562 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -176,8 +176,6 @@ def test_instance(self): self.assertTrue(s.endswith(">")) self.assertIn(s.find("..."), [12, 13]) - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_lambda(self): r = repr(lambda x: x) self.assertTrue(r.startswith(". { Ok(()) } - fn enter_function( - &mut self, - name: &str, - parameters: &Parameters, - ) -> CompileResult { - let defaults: Vec<_> = std::iter::empty() - .chain(¶meters.posonlyargs) - .chain(¶meters.args) - .filter_map(|x| x.default.as_deref()) - .collect(); - let have_defaults = !defaults.is_empty(); - if have_defaults { - // Construct a tuple: - let size = defaults.len().to_u32(); - for element in &defaults { - self.compile_expression(element)?; - } - emit!(self, Instruction::BuildTuple { size }); - } - + fn enter_function(&mut self, name: &str, parameters: &Parameters) -> CompileResult<()> { // TODO: partition_in_place let mut kw_without_defaults = vec![]; let mut kw_with_defaults = vec![]; @@ -1513,31 +1494,6 @@ impl Compiler<'_> { } } - // let (kw_without_defaults, kw_with_defaults) = args.split_kwonlyargs(); - if !kw_with_defaults.is_empty() { - let default_kw_count = kw_with_defaults.len(); - for (arg, default) in kw_with_defaults.iter() { - self.emit_load_const(ConstantData::Str { - value: arg.name.as_str().into(), - }); - self.compile_expression(default)?; - } - emit!( - self, - Instruction::BuildMap { - size: default_kw_count.to_u32(), - } - ); - } - - let mut func_flags = bytecode::MakeFunctionFlags::empty(); - if have_defaults { - func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; - } - if !kw_with_defaults.is_empty() { - func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; - } - self.push_output( bytecode::CodeFlags::NEW_LOCALS | bytecode::CodeFlags::IS_OPTIMIZED, parameters.posonlyargs.len().to_u32(), @@ -1565,7 +1521,7 @@ impl Compiler<'_> { self.varname(name.name.as_str())?; } - Ok(func_flags) + Ok(()) } fn prepare_decorators(&mut self, decorator_list: &[Decorator]) -> CompileResult<()> { @@ -1869,7 +1825,57 @@ impl Compiler<'_> { self.push_symbol_table(); } - let mut func_flags = self.enter_function(name, parameters)?; + // Prepare defaults and kwdefaults before entering function + let defaults: Vec<_> = std::iter::empty() + .chain(¶meters.posonlyargs) + .chain(¶meters.args) + .filter_map(|x| x.default.as_deref()) + .collect(); + let have_defaults = !defaults.is_empty(); + + // Compile defaults before entering function scope + if have_defaults { + // Construct a tuple: + let size = defaults.len().to_u32(); + for element in &defaults { + self.compile_expression(element)?; + } + emit!(self, Instruction::BuildTuple { size }); + } + + // Prepare keyword-only defaults + let mut kw_with_defaults = vec![]; + for kwonlyarg in ¶meters.kwonlyargs { + if let Some(default) = &kwonlyarg.default { + kw_with_defaults.push((&kwonlyarg.parameter, default)); + } + } + + let have_kwdefaults = !kw_with_defaults.is_empty(); + if have_kwdefaults { + let default_kw_count = kw_with_defaults.len(); + for (arg, default) in kw_with_defaults.iter() { + self.emit_load_const(ConstantData::Str { + value: arg.name.as_str().into(), + }); + self.compile_expression(default)?; + } + emit!( + self, + Instruction::BuildMap { + size: default_kw_count.to_u32(), + } + ); + } + + self.enter_function(name, parameters)?; + let mut func_flags = bytecode::MakeFunctionFlags::empty(); + if have_defaults { + func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; + } + if have_kwdefaults { + func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; + } self.current_code_info() .flags .set(bytecode::CodeFlags::IS_COROUTINE, is_async); @@ -1888,7 +1894,7 @@ impl Compiler<'_> { }; // Set qualname using the new method - let qualname = self.set_qualname(); + self.set_qualname(); let (doc_str, body) = split_doc(body, &self.opts); @@ -1965,7 +1971,7 @@ impl Compiler<'_> { } // Create function with closure - self.make_closure(code, &qualname, func_flags)?; + self.make_closure(code, func_flags)?; if let Some(value) = doc_str { emit!(self, Instruction::Duplicate); @@ -1982,58 +1988,92 @@ impl Compiler<'_> { self.store_name(name) } + /// Determines if a variable should be CELL or FREE type + // = get_ref_type + fn get_ref_type(&self, name: &str) -> Result { + // Special handling for __class__ and __classdict__ in class scope + if self.ctx.in_class && (name == "__class__" || name == "__classdict__") { + return Ok(SymbolScope::Cell); + } + + let table = self.symbol_table_stack.last().unwrap(); + match table.lookup(name) { + Some(symbol) => match symbol.scope { + SymbolScope::Cell | SymbolScope::TypeParams => Ok(SymbolScope::Cell), + SymbolScope::Free => Ok(SymbolScope::Free), + _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => Ok(SymbolScope::Free), + _ => Err(CodegenErrorType::SyntaxError(format!( + "get_ref_type: invalid scope for '{name}'" + ))), + }, + None => Err(CodegenErrorType::SyntaxError(format!( + "get_ref_type: cannot find symbol '{name}'" + ))), + } + } + /// Loads closure variables if needed and creates a function object // = compiler_make_closure fn make_closure( &mut self, code: CodeObject, - qualname: &str, - mut flags: bytecode::MakeFunctionFlags, + flags: bytecode::MakeFunctionFlags, ) -> CompileResult<()> { // Handle free variables (closure) - if !code.freevars.is_empty() { + let has_freevars = !code.freevars.is_empty(); + if has_freevars { // Build closure tuple by loading free variables + for var in &code.freevars { - let table = self.symbol_table_stack.last().unwrap(); - let symbol = match table.lookup(var) { - Some(s) => s, - None => { - return Err(self.error(CodegenErrorType::SyntaxError(format!( - "compiler_make_closure: cannot find symbol '{var}'", - )))); - } - }; + // Special case: If a class contains a method with a + // free variable that has the same name as a method, + // the name will be considered free *and* local in the + // class. It should be handled by the closure, as + // well as by the normal name lookup logic. + + // Get reference type using our get_ref_type function + let ref_type = self.get_ref_type(var).map_err(|e| self.error(e))?; + // Get parent code info let parent_code = self.code_stack.last().unwrap(); - let vars = match symbol.scope { - SymbolScope::Free => &parent_code.metadata.freevars, - SymbolScope::Cell => &parent_code.metadata.cellvars, - SymbolScope::TypeParams => &parent_code.metadata.cellvars, - _ if symbol.flags.contains(SymbolFlags::FREE_CLASS) => { - &parent_code.metadata.freevars - } + let cellvars_len = parent_code.metadata.cellvars.len(); + + // Look up the variable index based on reference type + let idx = match ref_type { + SymbolScope::Cell => parent_code + .metadata + .cellvars + .get_index_of(var) + .or_else(|| { + parent_code + .metadata + .freevars + .get_index_of(var) + .map(|i| i + cellvars_len) + }) + .ok_or_else(|| { + self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: cannot find '{var}' in parent vars", + ))) + })?, + SymbolScope::Free => parent_code + .metadata + .freevars + .get_index_of(var) + .map(|i| i + cellvars_len) + .or_else(|| parent_code.metadata.cellvars.get_index_of(var)) + .ok_or_else(|| { + self.error(CodegenErrorType::SyntaxError(format!( + "compiler_make_closure: cannot find '{var}' in parent vars", + ))) + })?, _ => { return Err(self.error(CodegenErrorType::SyntaxError(format!( - "compiler_make_closure: invalid scope for '{var}'", + "compiler_make_closure: unexpected ref_type {ref_type:?} for '{var}'", )))); } }; - let idx = match vars.get_index_of(var) { - Some(i) => i, - None => { - return Err(self.error(CodegenErrorType::SyntaxError(format!( - "compiler_make_closure: cannot find '{var}' in parent vars", - )))); - } - }; - - let idx = if let SymbolScope::Free = symbol.scope { - idx + parent_code.metadata.cellvars.len() - } else { - idx - }; - emit!(self, Instruction::LoadClosure(idx.to_u32())); } @@ -2044,22 +2084,73 @@ impl Compiler<'_> { size: code.freevars.len().to_u32(), } ); - - flags |= bytecode::MakeFunctionFlags::CLOSURE; } - // Load code object + // load code object and create function self.emit_load_const(ConstantData::Code { code: Box::new(code), }); - // Load qualified name - self.emit_load_const(ConstantData::Str { - value: qualname.into(), - }); + // Create function with no flags + emit!(self, Instruction::MakeFunction); + + // Now set attributes one by one using SET_FUNCTION_ATTRIBUTE + // Note: The order matters! Values must be on stack before calling SET_FUNCTION_ATTRIBUTE + + // Set closure if needed + if has_freevars { + // Closure tuple is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::CLOSURE + } + ); + } + + // Set annotations if present + if flags.contains(bytecode::MakeFunctionFlags::ANNOTATIONS) { + // Annotations dict is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::ANNOTATIONS + } + ); + } - // Make function with proper flags - emit!(self, Instruction::MakeFunction(flags)); + // Set kwdefaults if present + if flags.contains(bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS) { + // kwdefaults dict is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS + } + ); + } + + // Set defaults if present + if flags.contains(bytecode::MakeFunctionFlags::DEFAULTS) { + // defaults tuple is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::DEFAULTS + } + ); + } + + // Set type_params if present + if flags.contains(bytecode::MakeFunctionFlags::TYPE_PARAMS) { + // type_params tuple is already on stack + emit!( + self, + Instruction::SetFunctionAttribute { + attr: bytecode::MakeFunctionFlags::TYPE_PARAMS + } + ); + } Ok(()) } @@ -2262,7 +2353,7 @@ impl Compiler<'_> { func_flags |= bytecode::MakeFunctionFlags::TYPE_PARAMS; // Create class function with closure - self.make_closure(class_code, name, func_flags)?; + self.make_closure(class_code, func_flags)?; self.emit_load_const(ConstantData::Str { value: name.into() }); // Compile original bases @@ -2311,19 +2402,14 @@ impl Compiler<'_> { let type_params_code = self.exit_scope(); // Execute the type params function - let type_params_name = format!(""); - self.make_closure( - type_params_code, - &type_params_name, - bytecode::MakeFunctionFlags::empty(), - )?; + self.make_closure(type_params_code, bytecode::MakeFunctionFlags::empty())?; emit!(self, Instruction::CallFunctionPositional { nargs: 0 }); } else { // Non-generic class: standard path emit!(self, Instruction::LoadBuildClass); // Create class function with closure - self.make_closure(class_code, name, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(class_code, bytecode::MakeFunctionFlags::empty())?; self.emit_load_const(ConstantData::Str { value: name.into() }); let call = if let Some(arguments) = arguments { @@ -4033,10 +4119,59 @@ impl Compiler<'_> { parameters, body, .. }) => { let prev_ctx = self.ctx; - let name = "".to_owned(); - let func_flags = self - .enter_function(&name, parameters.as_deref().unwrap_or(&Default::default()))?; + let default_params = Default::default(); + let params = parameters.as_deref().unwrap_or(&default_params); + + // Prepare defaults before entering function + let defaults: Vec<_> = std::iter::empty() + .chain(¶ms.posonlyargs) + .chain(¶ms.args) + .filter_map(|x| x.default.as_deref()) + .collect(); + let have_defaults = !defaults.is_empty(); + + if have_defaults { + let size = defaults.len().to_u32(); + for element in &defaults { + self.compile_expression(element)?; + } + emit!(self, Instruction::BuildTuple { size }); + } + + // Prepare keyword-only defaults + let mut kw_with_defaults = vec![]; + for kwonlyarg in ¶ms.kwonlyargs { + if let Some(default) = &kwonlyarg.default { + kw_with_defaults.push((&kwonlyarg.parameter, default)); + } + } + + let have_kwdefaults = !kw_with_defaults.is_empty(); + if have_kwdefaults { + let default_kw_count = kw_with_defaults.len(); + for (arg, default) in kw_with_defaults.iter() { + self.emit_load_const(ConstantData::Str { + value: arg.name.as_str().into(), + }); + self.compile_expression(default)?; + } + emit!( + self, + Instruction::BuildMap { + size: default_kw_count.to_u32(), + } + ); + } + + self.enter_function(&name, params)?; + let mut func_flags = bytecode::MakeFunctionFlags::empty(); + if have_defaults { + func_flags |= bytecode::MakeFunctionFlags::DEFAULTS; + } + if have_kwdefaults { + func_flags |= bytecode::MakeFunctionFlags::KW_ONLY_DEFAULTS; + } // Set qualname for lambda self.set_qualname(); @@ -4057,7 +4192,7 @@ impl Compiler<'_> { let code = self.exit_scope(); // Create lambda function with closure - self.make_closure(code, &name, func_flags)?; + self.make_closure(code, func_flags)?; self.ctx = prev_ctx; } @@ -4602,7 +4737,7 @@ impl Compiler<'_> { self.ctx = prev_ctx; // Create comprehension function with closure - self.make_closure(code, name, bytecode::MakeFunctionFlags::empty())?; + self.make_closure(code, bytecode::MakeFunctionFlags::empty())?; // Evaluate iterated item: self.compile_expression(&generators[0].iter)?; diff --git a/compiler/codegen/src/symboltable.rs b/compiler/codegen/src/symboltable.rs index 16a65bca11..7f7355bd73 100644 --- a/compiler/codegen/src/symboltable.rs +++ b/compiler/codegen/src/symboltable.rs @@ -127,6 +127,7 @@ pub enum SymbolScope { GlobalImplicit, Free, Cell, + // TODO: wrong place. not a symbol scope, but a COMPILER_SCOPE_TYPEPARAMS TypeParams, } diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index 3e74fe6273..0a6f3bf20d 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -528,7 +528,10 @@ pub enum Instruction { JumpIfFalseOrPop { target: Arg

+ ''""" + s = f'