From 61cc0c34a7155fbc2c2eebb7930da91d8b0300a2 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 15 Jul 2025 23:29:15 +0900 Subject: [PATCH 1/4] loose trait bount for PyInterned --- vm/src/intern.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/vm/src/intern.rs b/vm/src/intern.rs index 8463e3a1c1..a5b2a798d5 100644 --- a/vm/src/intern.rs +++ b/vm/src/intern.rs @@ -128,10 +128,7 @@ impl CachedPyStrRef { } } -pub struct PyInterned -where - T: PyPayload, -{ +pub struct PyInterned { inner: Py, } @@ -173,14 +170,14 @@ impl std::hash::Hash for PyInterned { } } -impl AsRef> for PyInterned { +impl AsRef> for PyInterned { #[inline(always)] fn as_ref(&self) -> &Py { &self.inner } } -impl Deref for PyInterned { +impl Deref for PyInterned { type Target = Py; #[inline(always)] fn deref(&self) -> &Self::Target { @@ -197,7 +194,7 @@ impl PartialEq for PyInterned { impl Eq for PyInterned {} -impl std::fmt::Debug for PyInterned { +impl std::fmt::Debug for PyInterned { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Debug::fmt(&**self, f)?; write!(f, "@{:p}", self.as_ptr()) From 6946a14ba0f55fc92780d7d26e1b5b80721315d0 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 15 Jul 2025 23:29:53 +0900 Subject: [PATCH 2/4] Remove Deref from PyUtf8Str --- stdlib/src/sqlite.rs | 2 +- vm/src/builtins/str.rs | 63 ++++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index 7eb8f86db2..917e1187b6 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -60,7 +60,7 @@ mod _sqlite { PyBaseException, PyBaseExceptionRef, PyByteArray, PyBytes, PyDict, PyDictRef, PyFloat, PyInt, PyIntRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, }, - convert::IntoObject, + convert::{IntoObject, ToPyException}, function::{ArgCallable, ArgIterable, FsPath, FuncArgs, OptionalArg, PyComparisonValue}, object::{Traverse, TraverseFn}, protocol::{PyBuffer, PyIterReturn, PyMappingMethods, PySequence, PySequenceMethods}, diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 73349c6141..17f9bc1a18 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -80,30 +80,6 @@ impl fmt::Debug for PyStr { } } -#[repr(transparent)] -#[derive(Debug)] -pub struct PyUtf8Str(PyStr); - -// TODO: Remove this Deref which may hide missing optimized methods of PyUtf8Str -impl std::ops::Deref for PyUtf8Str { - type Target = PyStr; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl PyUtf8Str { - /// Returns the underlying string slice. - pub fn as_str(&self) -> &str { - debug_assert!( - self.0.is_utf8(), - "PyUtf8Str invariant violated: inner string is not valid UTF-8" - ); - // Safety: This is safe because the type invariant guarantees UTF-8 validity. - unsafe { self.0.to_str().unwrap_unchecked() } - } -} - impl AsRef for PyStr { #[track_caller] // <- can remove this once it doesn't panic fn as_ref(&self) -> &str { @@ -1940,6 +1916,45 @@ impl AnyStrWrapper for PyStrRef { } } +#[repr(transparent)] +#[derive(Debug)] +pub struct PyUtf8Str(PyStr); + +impl PyUtf8Str { + /// Returns the underlying string slice. + pub fn as_str(&self) -> &str { + debug_assert!( + self.0.is_utf8(), + "PyUtf8Str invariant violated: inner string is not valid UTF-8" + ); + // Safety: This is safe because the type invariant guarantees UTF-8 validity. + unsafe { self.0.to_str().unwrap_unchecked() } + } +} + +impl Py { + /// Upcast to PyStr. + pub fn as_pystr(&self) -> &Py { + unsafe { + // Safety: PyUtf8Str is a wrapper around PyStr, so this cast is safe. + &*(self as *const Self as *const Py) + } + } +} + +impl PartialEq for PyUtf8Str { + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} +impl Eq for PyUtf8Str {} + +impl std::borrow::Borrow for Py { + fn borrow(&self) -> &PyObject { + self.as_pystr().borrow() + } +} + impl AnyStrContainer for String { fn new() -> Self { Self::new() From eedd7d7eddd41be1fd74c3c481ca4e53763a9b07 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 16 Jul 2025 00:10:27 +0900 Subject: [PATCH 3/4] wtf8 --- vm/src/builtins/str.rs | 84 +++++++++++++++++++++++++++++++++++++-- vm/src/codecs.rs | 2 +- vm/src/protocol/object.rs | 6 +-- 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 17f9bc1a18..c8e0ebfab4 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -15,7 +15,7 @@ use crate::{ format::{format, format_map}, function::{ArgIterable, ArgSize, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue}, intern::PyInterned, - object::{Traverse, TraverseFn}, + object::{MaybeTraverse, Traverse, TraverseFn}, protocol::{PyIterReturn, PyMappingMethods, PyNumberMethods, PySequenceMethods}, sequence::SequenceExt, sliceable::{SequenceIndex, SliceableSequenceOp}, @@ -350,7 +350,7 @@ impl Constructor for PyStr { type Args = StrArgs; fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { - let string: PyStrRef = match args.object { + let string: PyRef = match args.object { OptionalArg::Present(input) => { if let OptionalArg::Present(enc) = args.encoding { vm.state.codec_registry.decode_text( @@ -364,7 +364,7 @@ impl Constructor for PyStr { } } OptionalArg::Missing => { - Self::from(String::new()).into_ref_with_type(vm, cls.clone())? + Self::from(String::new()).into_ref_with_type(vm, cls.clone())?.into_wtf8() } }; if string.class().is(&cls) { @@ -1499,6 +1499,11 @@ impl PyStrRef { self.ensure_valid_utf8(vm)?; Ok(unsafe { mem::transmute::, PyRef>(self) }) } + + pub fn into_wtf8(self) -> PyRef { + // PyStr can always be safely cast to PyWtf8Str + unsafe { mem::transmute::, PyRef>(self) } + } } impl Representable for PyStr { @@ -1955,6 +1960,79 @@ impl std::borrow::Borrow for Py { } } +#[repr(transparent)] +#[derive(Debug)] +pub struct PyWtf8Str(PyStr); + +impl PyWtf8Str { + /// Returns the underlying WTF-8 slice. + pub fn as_wtf8(&self) -> &Wtf8 { + self.0.as_wtf8() + } +} + +impl MaybeTraverse for PyWtf8Str { + fn try_traverse(&self, traverse_fn: &mut TraverseFn<'_>) { + self.0.try_traverse(traverse_fn); + } +} + +impl PyPayload for PyWtf8Str { + fn class(ctx: &Context) -> &'static Py { + ctx.types.str_type + } + fn payload_type_id() -> std::any::TypeId { + std::any::TypeId::of::() + } +} + +impl From for PyWtf8Str { + fn from(s: PyStr) -> Self { + PyWtf8Str(s) + } +} + +impl<'a> From<&'a str> for PyWtf8Str { + fn from(s: &'a str) -> Self { + PyWtf8Str(PyStr::from(s)) + } +} + +impl<'a> From<&'a Wtf8> for PyWtf8Str { + fn from(s: &'a Wtf8) -> Self { + PyWtf8Str(PyStr::from(s)) + } +} + +impl From for PyWtf8Str { + fn from(s: String) -> Self { + PyWtf8Str(PyStr::from(s)) + } +} + +impl From for PyWtf8Str { + fn from(w: Wtf8Buf) -> Self { + PyWtf8Str(PyStr::from(w)) + } +} + +impl Py { + /// Upcast to PyStr. + pub fn as_pystr(&self) -> &Py { + unsafe { + // Safety: PyWtf8Str is a wrapper around PyStr, so this cast is safe. + &*(self as *const Self as *const Py) + } + } +} + +impl PartialEq for PyWtf8Str { + fn eq(&self, other: &Self) -> bool { + self.as_wtf8() == other.as_wtf8() + } +} +impl Eq for PyWtf8Str {} + impl AnyStrContainer for String { fn new() -> Self { Self::new() diff --git a/vm/src/codecs.rs b/vm/src/codecs.rs index b31222cfee..eb5e60f98e 100644 --- a/vm/src/codecs.rs +++ b/vm/src/codecs.rs @@ -326,7 +326,7 @@ impl CodecsRegistry { encoding: &str, errors: Option, vm: &VirtualMachine, - ) -> PyResult { + ) -> PyResult> { let codec = self._lookup_text_encoding(encoding, "codecs.decode()", vm)?; codec.decode(obj, errors, vm)?.downcast().map_err(|obj| { vm.new_type_error(format!( diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 498db0b26e..234083c1a8 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -352,8 +352,8 @@ impl PyObject { } // Container of the virtual machine state: - pub fn str(&self, vm: &VirtualMachine) -> PyResult { - let obj = match self.to_owned().downcast_exact::(vm) { + pub fn str(&self, vm: &VirtualMachine) -> PyResult> { + let obj = match self.to_owned().downcast_exact::(vm) { Ok(s) => return Ok(s.into_pyref()), Err(obj) => obj, }; @@ -363,7 +363,7 @@ impl PyObject { None => return obj.repr(vm), }; let s = str_method.invoke((), vm)?; - s.downcast::().map_err(|obj| { + s.downcast::().map_err(|obj| { vm.new_type_error(format!( "__str__ returned non-string (type {})", obj.class().name() From f08ca8e2b28777307c2c0f5e3498dfdabbaa854a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 16 Jul 2025 00:45:57 +0900 Subject: [PATCH 4/4] going --- stdlib/src/sqlite.rs | 4 +- vm/src/builtins/bool.rs | 10 ++--- vm/src/builtins/bytearray.rs | 4 +- vm/src/builtins/bytes.rs | 4 +- vm/src/builtins/dict.rs | 10 ++--- vm/src/builtins/frame.rs | 6 +-- vm/src/builtins/mod.rs | 2 +- vm/src/builtins/module.rs | 10 +++-- vm/src/builtins/object.rs | 22 ++++----- vm/src/builtins/singletons.rs | 12 ++--- vm/src/builtins/slice.rs | 6 +-- vm/src/builtins/str.rs | 85 ++++++++++++++++++++++++----------- vm/src/builtins/tuple.rs | 6 +-- vm/src/builtins/weakproxy.rs | 6 +-- vm/src/bytes_inner.rs | 7 +-- vm/src/cformat.rs | 4 +- vm/src/codecs.rs | 8 ++-- vm/src/format.rs | 8 ++-- vm/src/frame.rs | 6 +-- vm/src/protocol/object.rs | 20 ++++++--- vm/src/stdlib/builtins.rs | 5 ++- vm/src/stdlib/io.rs | 8 ++-- vm/src/types/slot.rs | 18 ++++---- vm/src/types/structseq.rs | 8 ++-- vm/src/vm/mod.rs | 11 +++-- vm/src/vm/vm_ops.rs | 12 +++-- 26 files changed, 176 insertions(+), 126 deletions(-) diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index 917e1187b6..ed54be5318 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -60,7 +60,7 @@ mod _sqlite { PyBaseException, PyBaseExceptionRef, PyByteArray, PyBytes, PyDict, PyDictRef, PyFloat, PyInt, PyIntRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, }, - convert::{IntoObject, ToPyException}, + convert::IntoObject, function::{ArgCallable, ArgIterable, FsPath, FuncArgs, OptionalArg, PyComparisonValue}, object::{Traverse, TraverseFn}, protocol::{PyBuffer, PyIterReturn, PyMappingMethods, PySequence, PySequenceMethods}, @@ -2301,7 +2301,7 @@ mod _sqlite { sql: PyStrRef, vm: &VirtualMachine, ) -> PyResult> { - let sql = sql.try_into_utf8(vm)?; + let _ = sql.try_to_str(vm)?; if sql.as_str().contains('\0') { return Err(new_programming_error( vm, diff --git a/vm/src/builtins/bool.rs b/vm/src/builtins/bool.rs index c21db81d56..79fd3e03e5 100644 --- a/vm/src/builtins/bool.rs +++ b/vm/src/builtins/bool.rs @@ -1,8 +1,8 @@ -use super::{PyInt, PyStrRef, PyType, PyTypeRef}; +use super::{PyInt, PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::common::format::FormatSpec; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, - VirtualMachine, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromBorrowedObject, VirtualMachine, class::PyClassImpl, convert::{IntoPyException, ToPyObject, ToPyResult}, function::OptionalArg, @@ -182,13 +182,13 @@ impl AsNumber for PyBool { impl Representable for PyBool { #[inline] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let name = if get_value(zelf.as_object()) { vm.ctx.names.True } else { vm.ctx.names.False }; - Ok(name.to_owned()) + Ok(name.to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index ce48b2bd7c..5c64ad3186 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -1,7 +1,7 @@ //! Implementation of the python bytearray object. use super::{ PositionIterInternal, PyBytes, PyBytesRef, PyDictRef, PyGenericAlias, PyIntRef, PyStrRef, - PyTuple, PyTupleRef, PyType, PyTypeRef, + PyTuple, PyTupleRef, PyType, PyTypeRef, pystr::PyWtf8Str, }; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, @@ -673,7 +673,7 @@ impl PyRef { } #[pymethod] - fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult { + fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult> { bytes_decode(self.into(), args, vm) } } diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 22c93ee929..d8b6aa9609 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -1,6 +1,6 @@ use super::{ PositionIterInternal, PyDictRef, PyGenericAlias, PyIntRef, PyStrRef, PyTuple, PyTupleRef, - PyType, PyTypeRef, + PyType, PyTypeRef, PyWtf8Str, }; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, @@ -547,7 +547,7 @@ impl PyRef { /// see https://docs.python.org/3/library/codecs.html#standard-encodings /// currently, only 'utf-8' and 'ascii' implemented #[pymethod] - fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult { + fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult> { bytes_decode(self.into(), args, vm) } } diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index e59aa5bcf7..83d0d53f15 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -1,6 +1,6 @@ use super::{ IterStatus, PositionIterInternal, PyBaseExceptionRef, PyGenericAlias, PyMappingProxy, PySet, - PyStr, PyStrRef, PyTupleRef, PyType, PyTypeRef, set::PySetInner, + PyStr, PyTupleRef, PyType, PyTypeRef, PyWtf8Str, set::PySetInner, }; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult, @@ -506,7 +506,7 @@ impl Iterable for PyDict { impl Representable for PyDict { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let s = if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { let mut str_parts = Vec::with_capacity(zelf.__len__()); for (key, value) in zelf { @@ -519,7 +519,7 @@ impl Representable for PyDict { } else { vm.ctx.intern_str("{...}").to_owned() }; - Ok(s) + Ok(s.into_wtf8()) } #[cold] @@ -812,7 +812,7 @@ macro_rules! dict_view { impl Representable for $name { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let s = if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { let mut str_parts = Vec::with_capacity(zelf.__len__()); for (key, value) in zelf.dict().clone() { @@ -824,7 +824,7 @@ macro_rules! dict_view { } else { vm.ctx.intern_str("{...}").to_owned() }; - Ok(s) + Ok(s.into_wtf8()) } #[cold] diff --git a/vm/src/builtins/frame.rs b/vm/src/builtins/frame.rs index 65ac3e798d..e682c8cfff 100644 --- a/vm/src/builtins/frame.rs +++ b/vm/src/builtins/frame.rs @@ -2,7 +2,7 @@ */ -use super::{PyCode, PyDictRef, PyIntRef, PyStrRef}; +use super::{PyCode, PyDictRef, PyIntRef, PyWtf8Str}; use crate::{ AsObject, Context, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, class::PyClassImpl, @@ -20,9 +20,9 @@ impl Unconstructible for Frame {} impl Representable for Frame { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { const REPR: &str = ""; - Ok(vm.ctx.intern_str(REPR).to_owned()) + Ok(vm.ctx.intern_str(REPR).to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/mod.rs b/vm/src/builtins/mod.rs index 8540e6887c..8f0b23db3d 100644 --- a/vm/src/builtins/mod.rs +++ b/vm/src/builtins/mod.rs @@ -59,7 +59,7 @@ pub(crate) mod bool_; pub use bool_::PyBool; #[path = "str.rs"] pub(crate) mod pystr; -pub use pystr::{PyStr, PyStrInterned, PyStrRef}; +pub use pystr::{PyStr, PyStrInterned, PyStrRef, PyWtf8Str, PyWtf8StrRef}; #[path = "super.rs"] pub(crate) mod super_; pub use super_::PySuper; diff --git a/vm/src/builtins/module.rs b/vm/src/builtins/module.rs index f8e42b28e0..e329cd91d3 100644 --- a/vm/src/builtins/module.rs +++ b/vm/src/builtins/module.rs @@ -1,4 +1,4 @@ -use super::{PyDict, PyDictRef, PyStr, PyStrRef, PyType, PyTypeRef}; +use super::{PyDict, PyDictRef, PyStr, PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, builtins::{PyStrInterned, pystr::AsPyStr}, @@ -207,12 +207,14 @@ impl GetAttr for PyModule { impl Representable for PyModule { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let importlib = vm.import("_frozen_importlib", 0)?; let module_repr = importlib.get_attr("_module_repr", vm)?; let repr = module_repr.call((zelf.to_owned(),), vm)?; - repr.downcast() - .map_err(|_| vm.new_type_error("_module_repr did not return a string")) + Ok(repr + .downcast::() + .map_err(|_| vm.new_type_error("_module_repr did not return a string"))? + .into_wtf8()) } #[cold] diff --git a/vm/src/builtins/object.rs b/vm/src/builtins/object.rs index fc39e2fb08..9e7f2c6a80 100644 --- a/vm/src/builtins/object.rs +++ b/vm/src/builtins/object.rs @@ -1,8 +1,8 @@ -use super::{PyDictRef, PyList, PyStr, PyStrRef, PyType, PyTypeRef}; +use super::{PyDictRef, PyList, PyStr, PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::common::hash::PyHash; use crate::types::PyTypeFlags; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, convert::ToPyResult, function::{Either, FuncArgs, PyArithmeticValue, PyComparisonValue, PySetterValue}, @@ -333,13 +333,13 @@ impl PyBaseObject { /// Return str(self). #[pymethod] - fn __str__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __str__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { // FIXME: try tp_repr first and fallback to object.__repr__ - zelf.repr(vm) + zelf.repr_wtf8(vm) } #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let class = zelf.class(); match ( class @@ -358,19 +358,21 @@ impl PyBaseObject { qualname, zelf.get_id() )) - .into_ref(&vm.ctx)), + .into_ref(&vm.ctx) + .into_wtf8()), _ => Ok(PyStr::from(format!( "<{} object at {:#x}>", class.slot_name(), zelf.get_id() )) - .into_ref(&vm.ctx)), + .into_ref(&vm.ctx) + .into_wtf8()), } } /// Return repr(self). #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } @@ -392,14 +394,14 @@ impl PyBaseObject { obj: PyObjectRef, format_spec: PyStrRef, vm: &VirtualMachine, - ) -> PyResult { + ) -> PyResult> { if !format_spec.is_empty() { return Err(vm.new_type_error(format!( "unsupported format string passed to {}.__format__", obj.class().name() ))); } - obj.str(vm) + obj.str_wtf8(vm) } #[pyslot] diff --git a/vm/src/builtins/singletons.rs b/vm/src/builtins/singletons.rs index 7b674cb35b..7c9097cd1f 100644 --- a/vm/src/builtins/singletons.rs +++ b/vm/src/builtins/singletons.rs @@ -1,6 +1,6 @@ -use super::{PyStrRef, PyType, PyTypeRef}; +use super::{PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::{ - Context, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine, + Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, convert::ToPyObject, protocol::PyNumberMethods, @@ -53,8 +53,8 @@ impl PyNone { impl Representable for PyNone { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { - Ok(vm.ctx.names.None.to_owned()) + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { + Ok(vm.ctx.names.None.to_owned().into_wtf8()) } #[cold] @@ -110,8 +110,8 @@ impl PyNotImplemented { impl Representable for PyNotImplemented { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { - Ok(vm.ctx.names.NotImplemented.to_owned()) + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { + Ok(vm.ctx.names.NotImplemented.to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/slice.rs b/vm/src/builtins/slice.rs index f77c8cb8e8..6cf37a7bae 100644 --- a/vm/src/builtins/slice.rs +++ b/vm/src/builtins/slice.rs @@ -1,6 +1,6 @@ // sliceobject.{h,c} in CPython // spell-checker:ignore sliceobject -use super::{PyGenericAlias, PyStrRef, PyTupleRef, PyType, PyTypeRef}; +use super::{PyGenericAlias, PyStrRef, PyTupleRef, PyType, PyTypeRef, PyWtf8Str}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, @@ -333,8 +333,8 @@ impl PyEllipsis { impl Representable for PyEllipsis { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { - Ok(vm.ctx.names.Ellipsis.to_owned()) + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { + Ok(vm.ctx.names.Ellipsis.to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index c8e0ebfab4..06ffb0d955 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -218,6 +218,7 @@ impl Default for PyStr { } pub type PyStrRef = PyRef; +pub type PyWtf8StrRef = PyRef; impl fmt::Display for PyStr { #[inline] @@ -360,12 +361,12 @@ impl Constructor for PyStr { vm, )? } else { - input.str(vm)? + input.str_wtf8(vm)? } } - OptionalArg::Missing => { - Self::from(String::new()).into_ref_with_type(vm, cls.clone())?.into_wtf8() - } + OptionalArg::Missing => Self::from(String::new()) + .into_ref_with_type(vm, cls.clone())? + .into_wtf8(), }; if string.class().is(&cls) { Ok(string.into()) @@ -433,9 +434,10 @@ impl PyStr { self.data.as_str() } - fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { if self.is_utf8() { - Ok(()) + // SAFETY: is_utf8() passed, so unwrap is safe. + Ok(unsafe { self.to_str().unwrap_unchecked() }) } else { let start = self .as_wtf8() @@ -452,12 +454,6 @@ impl PyStr { } } - pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { - self.ensure_valid_utf8(vm)?; - // SAFETY: ensure_valid_utf8 passed, so unwrap is safe. - Ok(unsafe { self.to_str().unwrap_unchecked() }) - } - pub fn to_string_lossy(&self) -> Cow<'_, str> { self.to_str() .map(Cow::Borrowed) @@ -955,22 +951,26 @@ impl PyStr { } #[pymethod(name = "__format__")] - fn __format__(zelf: PyRef, spec: PyStrRef, vm: &VirtualMachine) -> PyResult { + fn __format__( + zelf: PyRef, + spec: PyStrRef, + vm: &VirtualMachine, + ) -> PyResult> { let spec = spec.as_str(); if spec.is_empty() { return if zelf.class().is(vm.ctx.types.str_type) { Ok(zelf) } else { - zelf.as_object().str(vm) + zelf.as_object().str_wtf8(vm) }; } - + let zelf = zelf.try_into_utf8(vm)?; let s = FormatSpec::parse(spec) .and_then(|format_spec| { format_spec.format_string(&CharLenStr(zelf.as_str(), zelf.char_len())) }) .map_err(|err| err.into_pyexception(vm))?; - Ok(vm.ctx.new_str(s)) + Ok(vm.ctx.new_str(s).into()) } /// Return a titlecased version of the string where words start with an @@ -1496,7 +1496,7 @@ impl PyStrRef { } pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult> { - self.ensure_valid_utf8(vm)?; + self.try_to_str(vm)?; // This will check for surrogates Ok(unsafe { mem::transmute::, PyRef>(self) }) } @@ -1506,6 +1506,21 @@ impl PyStrRef { } } +impl PyRef { + pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult { + // Check if the string contains surrogates + self.ensure_valid_utf8(vm)?; + // If no surrogates, we can safely cast to PyStr + Ok(unsafe { mem::transmute::, PyRef>(self) }) + } +} + +impl From> for PyRef { + fn from(s: PyRef) -> Self { + s.into_wtf8() + } +} + impl Representable for PyStr { #[inline] fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { @@ -1969,6 +1984,25 @@ impl PyWtf8Str { pub fn as_wtf8(&self) -> &Wtf8 { self.0.as_wtf8() } + + fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.0.is_utf8() { + Ok(()) + } else { + let start = self + .as_wtf8() + .code_points() + .position(|c| c.to_char().is_none()) + .unwrap(); + Err(vm.new_unicode_encode_error_real( + identifier!(vm, utf_8).to_owned(), + vm.ctx.new_str(self.0.data.clone()), + start, + start + 1, + vm.ctx.new_str("surrogates not allowed"), + )) + } + } } impl MaybeTraverse for PyWtf8Str { @@ -2016,16 +2050,6 @@ impl From for PyWtf8Str { } } -impl Py { - /// Upcast to PyStr. - pub fn as_pystr(&self) -> &Py { - unsafe { - // Safety: PyWtf8Str is a wrapper around PyStr, so this cast is safe. - &*(self as *const Self as *const Py) - } - } -} - impl PartialEq for PyWtf8Str { fn eq(&self, other: &Self) -> bool { self.as_wtf8() == other.as_wtf8() @@ -2033,6 +2057,13 @@ impl PartialEq for PyWtf8Str { } impl Eq for PyWtf8Str {} +impl fmt::Display for PyWtf8Str { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + impl AnyStrContainer for String { fn new() -> Self { Self::new() diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 2c3255b249..7b3e28895b 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -1,4 +1,4 @@ -use super::{PositionIterInternal, PyGenericAlias, PyStrRef, PyType, PyTypeRef}; +use super::{PositionIterInternal, PyGenericAlias, PyType, PyTypeRef, PyWtf8Str}; use crate::common::{ hash::{PyHash, PyUHash}, lock::PyMutex, @@ -450,7 +450,7 @@ impl Iterable for PyTuple { impl Representable for PyTuple { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let s = if zelf.is_empty() { vm.ctx.intern_str("()").to_owned() } else if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { @@ -463,7 +463,7 @@ impl Representable for PyTuple { } else { vm.ctx.intern_str("(...)").to_owned() }; - Ok(s) + Ok(s.into_wtf8()) } #[cold] diff --git a/vm/src/builtins/weakproxy.rs b/vm/src/builtins/weakproxy.rs index 6f01e5eb22..64931add00 100644 --- a/vm/src/builtins/weakproxy.rs +++ b/vm/src/builtins/weakproxy.rs @@ -1,4 +1,4 @@ -use super::{PyStr, PyStrRef, PyType, PyTypeRef, PyWeak}; +use super::{PyStr, PyStrRef, PyType, PyTypeRef, PyWeak, PyWtf8Str}; use crate::{ Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, atomic_func, class::PyClassImpl, @@ -220,8 +220,8 @@ impl AsMapping for PyWeakProxy { impl Representable for PyWeakProxy { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { - zelf.try_upgrade(vm)?.repr(vm) + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { + zelf.try_upgrade(vm)?.repr_wtf8(vm) } #[cold] diff --git a/vm/src/bytes_inner.rs b/vm/src/bytes_inner.rs index db1e843091..4754252685 100644 --- a/vm/src/bytes_inner.rs +++ b/vm/src/bytes_inner.rs @@ -1,10 +1,11 @@ // spell-checker:ignore unchunked use crate::{ - AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine, + AsObject, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject, + VirtualMachine, anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper}, builtins::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef, - PyTypeRef, pystr, + PyTypeRef, PyWtf8Str, pystr, }, byte::bytes_from_object, cformat::cformat_bytes, @@ -1138,7 +1139,7 @@ pub fn bytes_decode( zelf: PyObjectRef, args: DecodeArgs, vm: &VirtualMachine, -) -> PyResult { +) -> PyResult> { let DecodeArgs { encoding, errors } = args; let encoding = encoding .as_ref() diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index feca96c833..227db41fe6 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -132,8 +132,8 @@ fn spec_format_string( CFormatType::String(conversion) => { let result = match conversion { CFormatConversion::Ascii => builtins::ascii(obj, vm)?.into(), - CFormatConversion::Str => obj.str(vm)?.as_wtf8().to_owned(), - CFormatConversion::Repr => obj.repr(vm)?.as_wtf8().to_owned(), + CFormatConversion::Str => obj.str_wtf8(vm)?.as_wtf8().to_owned(), + CFormatConversion::Repr => obj.repr_wtf8(vm)?.as_wtf8().to_owned(), CFormatConversion::Bytes => { // idx is the position of the %, we want the position of the b return Err(vm.new_value_error(format!( diff --git a/vm/src/codecs.rs b/vm/src/codecs.rs index eb5e60f98e..64ffeb1dca 100644 --- a/vm/src/codecs.rs +++ b/vm/src/codecs.rs @@ -9,9 +9,11 @@ use rustpython_common::{ }; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, - TryFromObject, VirtualMachine, - builtins::{PyBaseExceptionRef, PyBytes, PyBytesRef, PyStr, PyStrRef, PyTuple, PyTupleRef}, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromBorrowedObject, TryFromObject, VirtualMachine, + builtins::{ + PyBaseExceptionRef, PyBytes, PyBytesRef, PyStr, PyStrRef, PyTuple, PyTupleRef, PyWtf8Str, + }, common::{ascii, lock::PyRwLock}, convert::ToPyObject, function::{ArgBytesLike, PyMethodDef}, diff --git a/vm/src/format.rs b/vm/src/format.rs index f95f161f7a..c429af0ce9 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -99,8 +99,8 @@ fn format_internal( let format_spec = format_internal(vm, &nested_format, field_func)?; let argument = match conversion_spec.and_then(FormatConversion::from_char) { - Some(FormatConversion::Str) => argument.str(vm)?.into(), - Some(FormatConversion::Repr) => argument.repr(vm)?.into(), + Some(FormatConversion::Str) => argument.str_wtf8(vm)?.into(), + Some(FormatConversion::Repr) => argument.repr_wtf8(vm)?.into(), Some(FormatConversion::Ascii) => { vm.ctx.new_str(builtins::ascii(argument, vm)?).into() } @@ -111,8 +111,8 @@ fn format_internal( }; // FIXME: compiler can intern specs using parser tree. Then this call can be interned_str - pystr = vm.format(&argument, vm.ctx.new_str(format_spec))?; - pystr.as_ref() + pystr = vm.format_wtf8(&argument, vm.ctx.new_str(format_spec))?; + pystr.as_wtf8() } FormatPart::Literal(literal) => literal, }; diff --git a/vm/src/frame.rs b/vm/src/frame.rs index bf07bcbc2b..d021f0c2e6 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -2033,14 +2033,14 @@ impl ExecutingFrame<'_> { use bytecode::ConversionFlag; let value = self.pop_value(); let value = match conversion { - ConversionFlag::Str => value.str(vm)?.into(), - ConversionFlag::Repr => value.repr(vm)?.into(), + ConversionFlag::Str => value.str_wtf8(vm)?.into(), + ConversionFlag::Repr => value.repr_wtf8(vm)?.into(), ConversionFlag::Ascii => vm.ctx.new_str(builtins::ascii(value, vm)?).into(), ConversionFlag::None => value, }; let spec = self.pop_value(); - let formatted = vm.format(&value, spec.downcast::().unwrap())?; + let formatted = vm.format_wtf8(&value, spec.downcast::().unwrap())?; self.push_value(formatted.into()); Ok(None) } diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 234083c1a8..248f158cc3 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -2,10 +2,10 @@ //! use crate::{ - AsObject, Py, PyObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine, + AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{ - PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyStrRef, - PyTuple, PyTupleRef, PyType, PyTypeRef, pystr::AsPyStr, + PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, + PyTupleRef, PyType, PyTypeRef, PyWtf8Str, pystr::AsPyStr, }, bytes_inner::ByteInnerNewOptions, common::{hash::PyHash, str::to_ascii}, @@ -328,7 +328,11 @@ impl PyObject { } } - pub fn repr(&self, vm: &VirtualMachine) -> PyResult { + pub fn repr(&self, vm: &VirtualMachine) -> PyResult> { + self.repr_wtf8(vm)?.try_into_utf8(vm) + } + + pub fn repr_wtf8(&self, vm: &VirtualMachine) -> PyResult> { vm.with_recursion("while getting the repr of an object", || { // TODO: RustPython does not implement type slots inheritance yet self.class() @@ -351,8 +355,10 @@ impl PyObject { Ok(ascii) } - // Container of the virtual machine state: - pub fn str(&self, vm: &VirtualMachine) -> PyResult> { + pub fn str(&self, vm: &VirtualMachine) -> PyResult> { + self.str_wtf8(vm)?.try_into_utf8(vm) + } + pub fn str_wtf8(&self, vm: &VirtualMachine) -> PyResult> { let obj = match self.to_owned().downcast_exact::(vm) { Ok(s) => return Ok(s.into_pyref()), Err(obj) => obj, @@ -360,7 +366,7 @@ impl PyObject { // TODO: replace to obj.class().slots.str let str_method = match vm.get_special_method(&obj, identifier!(vm, __str__))? { Some(str_method) => str_method, - None => return obj.repr(vm), + None => return obj.repr_wtf8(vm), }; let s = str_method.invoke((), vm)?; s.downcast::().map_err(|obj| { diff --git a/vm/src/stdlib/builtins.rs b/vm/src/stdlib/builtins.rs index 52eb698fbb..d33386df41 100644 --- a/vm/src/stdlib/builtins.rs +++ b/vm/src/stdlib/builtins.rs @@ -13,6 +13,7 @@ mod builtins { AsObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{ PyByteArray, PyBytes, PyDictRef, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, + PyWtf8StrRef, enumerate::PyReverseSequenceIterator, function::{PyCellRef, PyFunction}, int::PyIntRef, @@ -394,8 +395,8 @@ mod builtins { value: PyObjectRef, format_spec: OptionalArg, vm: &VirtualMachine, - ) -> PyResult { - vm.format(&value, format_spec.unwrap_or(vm.ctx.new_str(""))) + ) -> PyResult { + vm.format_wtf8(&value, format_spec.unwrap_or(vm.ctx.new_str(""))) } #[pyfunction] diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index c546a0c46f..3e760994fa 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -120,7 +120,7 @@ mod _io { TryFromBorrowedObject, TryFromObject, builtins::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyIntRef, PyMemoryView, PyStr, - PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, + PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, PyWtf8Str, }, class::StaticType, common::lock::{ @@ -1574,7 +1574,7 @@ mod _io { } #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let name_repr = repr_file_obj_name(zelf, vm)?; let cls = zelf.class(); let slot_name = cls.slot_name(); @@ -1583,11 +1583,11 @@ mod _io { } else { format!("<{slot_name}>") }; - Ok(vm.ctx.new_str(repr)) + Ok(vm.ctx.new_str(repr).into_wtf8()) } #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } diff --git a/vm/src/types/slot.rs b/vm/src/types/slot.rs index 704ec3edf0..4a64e49e47 100644 --- a/vm/src/types/slot.rs +++ b/vm/src/types/slot.rs @@ -1,6 +1,8 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, - builtins::{PyInt, PyStr, PyStrInterned, PyStrRef, PyType, PyTypeRef, type_::PointerSlot}, + builtins::{ + PyInt, PyStr, PyStrInterned, PyStrRef, PyType, PyTypeRef, PyWtf8Str, type_::PointerSlot, + }, bytecode::ComparisonOperator, common::hash::PyHash, convert::{ToPyObject, ToPyResult}, @@ -168,7 +170,7 @@ impl Default for PyTypeFlags { pub(crate) type GenericMethod = fn(&PyObject, FuncArgs, &VirtualMachine) -> PyResult; pub(crate) type HashFunc = fn(&PyObject, &VirtualMachine) -> PyResult; // CallFunc = GenericMethod -pub(crate) type StringifyFunc = fn(&PyObject, &VirtualMachine) -> PyResult; +pub(crate) type StringifyFunc = fn(&PyObject, &VirtualMachine) -> PyResult>; pub(crate) type GetattroFunc = fn(&PyObject, &Py, &VirtualMachine) -> PyResult; pub(crate) type SetattroFunc = fn(&PyObject, &Py, PySetterValue, &VirtualMachine) -> PyResult<()>; @@ -249,9 +251,9 @@ fn setitem_wrapper( .map(drop) } -fn repr_wrapper(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { +fn repr_wrapper(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let ret = vm.call_special_method(zelf, identifier!(vm, __repr__), ())?; - ret.downcast::().map_err(|obj| { + ret.downcast::().map_err(|obj| { vm.new_type_error(format!( "__repr__ returned non-string (type {})", obj.class() @@ -973,7 +975,7 @@ pub trait Hashable: PyPayload { pub trait Representable: PyPayload { #[inline] #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let zelf = zelf .downcast_ref() .ok_or_else(|| vm.new_type_error("unexpected payload for __repr__"))?; @@ -982,14 +984,14 @@ pub trait Representable: PyPayload { #[inline] #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let repr = Self::repr_str(zelf, vm)?; - Ok(vm.ctx.new_str(repr)) + Ok(vm.ctx.new_str(repr).into_wtf8()) } fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult; diff --git a/vm/src/types/structseq.rs b/vm/src/types/structseq.rs index 318280f862..15527fb8d0 100644 --- a/vm/src/types/structseq.rs +++ b/vm/src/types/structseq.rs @@ -1,6 +1,6 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, - builtins::{PyBaseExceptionRef, PyStrRef, PyTuple, PyTupleRef, PyType}, + builtins::{PyBaseExceptionRef, PyTuple, PyTupleRef, PyType, PyWtf8Str}, class::{PyClassImpl, StaticType}, vm::Context, }; @@ -48,7 +48,7 @@ pub trait PyStructSequence: StaticType + PyClassImpl + Sized + 'static { } #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let zelf = zelf .downcast_ref::() .ok_or_else(|| vm.new_type_error("unexpected payload for __repr__"))?; @@ -75,11 +75,11 @@ pub trait PyStructSequence: StaticType + PyClassImpl + Sized + 'static { (String::new(), "...") }; let repr_str = format!("{}({}{})", Self::TP_NAME, body, suffix); - Ok(vm.ctx.new_str(repr_str)) + Ok(vm.ctx.new_str(repr_str).into_wtf8()) } #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index b147f8528b..910ac242cf 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -954,12 +954,11 @@ impl VirtualMachine { Err(s) => { let bytes = self.ctx.new_bytes(s.into_encoded_bytes()); let errors = self.fs_encode_errors().to_owned(); - let res = self.state.codec_registry.decode_text( - bytes.into(), - "utf-8", - Some(errors), - self, - ); + let res = self + .state + .codec_registry + .decode_text(bytes.into(), "utf-8", Some(errors), self) + .and_then(|s| s.try_into_utf8(self)); self.expect_pyresult(res, "fsdecode should be lossless and never fail") } } diff --git a/vm/src/vm/vm_ops.rs b/vm/src/vm/vm_ops.rs index 00ba2d0f66..71ed6733fc 100644 --- a/vm/src/vm/vm_ops.rs +++ b/vm/src/vm/vm_ops.rs @@ -1,7 +1,8 @@ use super::VirtualMachine; use crate::stdlib::warnings; use crate::{ - builtins::{PyInt, PyIntRef, PyStr, PyStrRef}, + PyRef, + builtins::{PyInt, PyIntRef, PyStrRef, pystr::PyWtf8Str}, object::{AsObject, PyObject, PyObjectRef, PyResult}, protocol::{PyIterReturn, PyNumberBinaryOp, PyNumberTernaryOp, PySequence}, types::PyComparisonOp, @@ -491,14 +492,14 @@ impl VirtualMachine { } // PyObject_Format - pub fn format(&self, obj: &PyObject, format_spec: PyStrRef) -> PyResult { + pub fn format_wtf8(&self, obj: &PyObject, format_spec: PyStrRef) -> PyResult> { if format_spec.is_empty() { - let obj = match obj.to_owned().downcast_exact::(self) { + let obj = match obj.to_owned().downcast_exact::(self) { Ok(s) => return Ok(s.into_pyref()), Err(obj) => obj, }; if obj.class().is(self.ctx.types.int_type) { - return obj.str(self); + return obj.str_wtf8(self); } } let bound_format = self @@ -517,6 +518,9 @@ impl VirtualMachine { )) }) } + pub fn format(&self, obj: &PyObject, format_spec: PyStrRef) -> PyResult { + self.format_wtf8(obj, format_spec)?.try_into_utf8(self) + } // https://docs.python.org/3/reference/expressions.html#membership-test-operations fn _membership_iter_search( pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy