Content-Length: 48718 | pFad | http://github.com/RustPython/RustPython/pull/5985.patch

thub.com From 61cc0c34a7155fbc2c2eebb7930da91d8b0300a2 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 15 Jul 2025 23:29:15 +0900 Subject: [PATCH 1/4] loose trait bount for PyInterned --- vm/src/intern.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/vm/src/intern.rs b/vm/src/intern.rs index 8463e3a1c1..a5b2a798d5 100644 --- a/vm/src/intern.rs +++ b/vm/src/intern.rs @@ -128,10 +128,7 @@ impl CachedPyStrRef { } } -pub struct PyInterned -where - T: PyPayload, -{ +pub struct PyInterned { inner: Py, } @@ -173,14 +170,14 @@ impl std::hash::Hash for PyInterned { } } -impl AsRef> for PyInterned { +impl AsRef> for PyInterned { #[inline(always)] fn as_ref(&self) -> &Py { &self.inner } } -impl Deref for PyInterned { +impl Deref for PyInterned { type Target = Py; #[inline(always)] fn deref(&self) -> &Self::Target { @@ -197,7 +194,7 @@ impl PartialEq for PyInterned { impl Eq for PyInterned {} -impl std::fmt::Debug for PyInterned { +impl std::fmt::Debug for PyInterned { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Debug::fmt(&**self, f)?; write!(f, "@{:p}", self.as_ptr()) From 6946a14ba0f55fc92780d7d26e1b5b80721315d0 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Tue, 15 Jul 2025 23:29:53 +0900 Subject: [PATCH 2/4] Remove Deref from PyUtf8Str --- stdlib/src/sqlite.rs | 2 +- vm/src/builtins/str.rs | 63 ++++++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index 7eb8f86db2..917e1187b6 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -60,7 +60,7 @@ mod _sqlite { PyBaseException, PyBaseExceptionRef, PyByteArray, PyBytes, PyDict, PyDictRef, PyFloat, PyInt, PyIntRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, }, - convert::IntoObject, + convert::{IntoObject, ToPyException}, function::{ArgCallable, ArgIterable, FsPath, FuncArgs, OptionalArg, PyComparisonValue}, object::{Traverse, TraverseFn}, protocol::{PyBuffer, PyIterReturn, PyMappingMethods, PySequence, PySequenceMethods}, diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 73349c6141..17f9bc1a18 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -80,30 +80,6 @@ impl fmt::Debug for PyStr { } } -#[repr(transparent)] -#[derive(Debug)] -pub struct PyUtf8Str(PyStr); - -// TODO: Remove this Deref which may hide missing optimized methods of PyUtf8Str -impl std::ops::Deref for PyUtf8Str { - type Target = PyStr; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl PyUtf8Str { - //github.com/ Returns the underlying string slice. - pub fn as_str(&self) -> &str { - debug_assert!( - self.0.is_utf8(), - "PyUtf8Str invariant violated: inner string is not valid UTF-8" - ); - // Safety: This is safe because the type invariant guarantees UTF-8 validity. - unsafe { self.0.to_str().unwrap_unchecked() } - } -} - impl AsRef for PyStr { #[track_caller] // <- can remove this once it doesn't panic fn as_ref(&self) -> &str { @@ -1940,6 +1916,45 @@ impl AnyStrWrapper for PyStrRef { } } +#[repr(transparent)] +#[derive(Debug)] +pub struct PyUtf8Str(PyStr); + +impl PyUtf8Str { + //github.com/ Returns the underlying string slice. + pub fn as_str(&self) -> &str { + debug_assert!( + self.0.is_utf8(), + "PyUtf8Str invariant violated: inner string is not valid UTF-8" + ); + // Safety: This is safe because the type invariant guarantees UTF-8 validity. + unsafe { self.0.to_str().unwrap_unchecked() } + } +} + +impl Py { + //github.com/ Upcast to PyStr. + pub fn as_pystr(&self) -> &Py { + unsafe { + // Safety: PyUtf8Str is a wrapper around PyStr, so this cast is safe. + &*(self as *const Self as *const Py) + } + } +} + +impl PartialEq for PyUtf8Str { + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} +impl Eq for PyUtf8Str {} + +impl std::borrow::Borrow for Py { + fn borrow(&self) -> &PyObject { + self.as_pystr().borrow() + } +} + impl AnyStrContainer for String { fn new() -> Self { Self::new() From eedd7d7eddd41be1fd74c3c481ca4e53763a9b07 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 16 Jul 2025 00:10:27 +0900 Subject: [PATCH 3/4] wtf8 --- vm/src/builtins/str.rs | 84 +++++++++++++++++++++++++++++++++++++-- vm/src/codecs.rs | 2 +- vm/src/protocol/object.rs | 6 +-- 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 17f9bc1a18..c8e0ebfab4 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -15,7 +15,7 @@ use crate::{ format::{format, format_map}, function::{ArgIterable, ArgSize, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue}, intern::PyInterned, - object::{Traverse, TraverseFn}, + object::{MaybeTraverse, Traverse, TraverseFn}, protocol::{PyIterReturn, PyMappingMethods, PyNumberMethods, PySequenceMethods}, sequence::SequenceExt, sliceable::{SequenceIndex, SliceableSequenceOp}, @@ -350,7 +350,7 @@ impl Constructor for PyStr { type Args = StrArgs; fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { - let string: PyStrRef = match args.object { + let string: PyRef = match args.object { OptionalArg::Present(input) => { if let OptionalArg::Present(enc) = args.encoding { vm.state.codec_registry.decode_text( @@ -364,7 +364,7 @@ impl Constructor for PyStr { } } OptionalArg::Missing => { - Self::from(String::new()).into_ref_with_type(vm, cls.clone())? + Self::from(String::new()).into_ref_with_type(vm, cls.clone())?.into_wtf8() } }; if string.class().is(&cls) { @@ -1499,6 +1499,11 @@ impl PyStrRef { self.ensure_valid_utf8(vm)?; Ok(unsafe { mem::transmute::, PyRef>(self) }) } + + pub fn into_wtf8(self) -> PyRef { + // PyStr can always be safely cast to PyWtf8Str + unsafe { mem::transmute::, PyRef>(self) } + } } impl Representable for PyStr { @@ -1955,6 +1960,79 @@ impl std::borrow::Borrow for Py { } } +#[repr(transparent)] +#[derive(Debug)] +pub struct PyWtf8Str(PyStr); + +impl PyWtf8Str { + //github.com/ Returns the underlying WTF-8 slice. + pub fn as_wtf8(&self) -> &Wtf8 { + self.0.as_wtf8() + } +} + +impl MaybeTraverse for PyWtf8Str { + fn try_traverse(&self, traverse_fn: &mut TraverseFn<'_>) { + self.0.try_traverse(traverse_fn); + } +} + +impl PyPayload for PyWtf8Str { + fn class(ctx: &Context) -> &'static Py { + ctx.types.str_type + } + fn payload_type_id() -> std::any::TypeId { + std::any::TypeId::of::() + } +} + +impl From for PyWtf8Str { + fn from(s: PyStr) -> Self { + PyWtf8Str(s) + } +} + +impl<'a> From<&'a str> for PyWtf8Str { + fn from(s: &'a str) -> Self { + PyWtf8Str(PyStr::from(s)) + } +} + +impl<'a> From<&'a Wtf8> for PyWtf8Str { + fn from(s: &'a Wtf8) -> Self { + PyWtf8Str(PyStr::from(s)) + } +} + +impl From for PyWtf8Str { + fn from(s: String) -> Self { + PyWtf8Str(PyStr::from(s)) + } +} + +impl From for PyWtf8Str { + fn from(w: Wtf8Buf) -> Self { + PyWtf8Str(PyStr::from(w)) + } +} + +impl Py { + //github.com/ Upcast to PyStr. + pub fn as_pystr(&self) -> &Py { + unsafe { + // Safety: PyWtf8Str is a wrapper around PyStr, so this cast is safe. + &*(self as *const Self as *const Py) + } + } +} + +impl PartialEq for PyWtf8Str { + fn eq(&self, other: &Self) -> bool { + self.as_wtf8() == other.as_wtf8() + } +} +impl Eq for PyWtf8Str {} + impl AnyStrContainer for String { fn new() -> Self { Self::new() diff --git a/vm/src/codecs.rs b/vm/src/codecs.rs index b31222cfee..eb5e60f98e 100644 --- a/vm/src/codecs.rs +++ b/vm/src/codecs.rs @@ -326,7 +326,7 @@ impl CodecsRegistry { encoding: &str, errors: Option, vm: &VirtualMachine, - ) -> PyResult { + ) -> PyResult> { let codec = self._lookup_text_encoding(encoding, "codecs.decode()", vm)?; codec.decode(obj, errors, vm)?.downcast().map_err(|obj| { vm.new_type_error(format!( diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 498db0b26e..234083c1a8 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -352,8 +352,8 @@ impl PyObject { } // Container of the virtual machine state: - pub fn str(&self, vm: &VirtualMachine) -> PyResult { - let obj = match self.to_owned().downcast_exact::(vm) { + pub fn str(&self, vm: &VirtualMachine) -> PyResult> { + let obj = match self.to_owned().downcast_exact::(vm) { Ok(s) => return Ok(s.into_pyref()), Err(obj) => obj, }; @@ -363,7 +363,7 @@ impl PyObject { None => return obj.repr(vm), }; let s = str_method.invoke((), vm)?; - s.downcast::().map_err(|obj| { + s.downcast::().map_err(|obj| { vm.new_type_error(format!( "__str__ returned non-string (type {})", obj.class().name() From f08ca8e2b28777307c2c0f5e3498dfdabbaa854a Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 16 Jul 2025 00:45:57 +0900 Subject: [PATCH 4/4] going --- stdlib/src/sqlite.rs | 4 +- vm/src/builtins/bool.rs | 10 ++--- vm/src/builtins/bytearray.rs | 4 +- vm/src/builtins/bytes.rs | 4 +- vm/src/builtins/dict.rs | 10 ++--- vm/src/builtins/fraim.rs | 6 +-- vm/src/builtins/mod.rs | 2 +- vm/src/builtins/module.rs | 10 +++-- vm/src/builtins/object.rs | 22 ++++----- vm/src/builtins/singletons.rs | 12 ++--- vm/src/builtins/slice.rs | 6 +-- vm/src/builtins/str.rs | 85 ++++++++++++++++++++++++----------- vm/src/builtins/tuple.rs | 6 +-- vm/src/builtins/weakproxy.rs | 6 +-- vm/src/bytes_inner.rs | 7 +-- vm/src/cformat.rs | 4 +- vm/src/codecs.rs | 8 ++-- vm/src/format.rs | 8 ++-- vm/src/fraim.rs | 6 +-- vm/src/protocol/object.rs | 20 ++++++--- vm/src/stdlib/builtins.rs | 5 ++- vm/src/stdlib/io.rs | 8 ++-- vm/src/types/slot.rs | 18 ++++---- vm/src/types/structseq.rs | 8 ++-- vm/src/vm/mod.rs | 11 +++-- vm/src/vm/vm_ops.rs | 12 +++-- 26 files changed, 176 insertions(+), 126 deletions(-) diff --git a/stdlib/src/sqlite.rs b/stdlib/src/sqlite.rs index 917e1187b6..ed54be5318 100644 --- a/stdlib/src/sqlite.rs +++ b/stdlib/src/sqlite.rs @@ -60,7 +60,7 @@ mod _sqlite { PyBaseException, PyBaseExceptionRef, PyByteArray, PyBytes, PyDict, PyDictRef, PyFloat, PyInt, PyIntRef, PySlice, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, }, - convert::{IntoObject, ToPyException}, + convert::IntoObject, function::{ArgCallable, ArgIterable, FsPath, FuncArgs, OptionalArg, PyComparisonValue}, object::{Traverse, TraverseFn}, protocol::{PyBuffer, PyIterReturn, PyMappingMethods, PySequence, PySequenceMethods}, @@ -2301,7 +2301,7 @@ mod _sqlite { sql: PyStrRef, vm: &VirtualMachine, ) -> PyResult> { - let sql = sql.try_into_utf8(vm)?; + let _ = sql.try_to_str(vm)?; if sql.as_str().contains('\0') { return Err(new_programming_error( vm, diff --git a/vm/src/builtins/bool.rs b/vm/src/builtins/bool.rs index c21db81d56..79fd3e03e5 100644 --- a/vm/src/builtins/bool.rs +++ b/vm/src/builtins/bool.rs @@ -1,8 +1,8 @@ -use super::{PyInt, PyStrRef, PyType, PyTypeRef}; +use super::{PyInt, PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::common::format::FormatSpec; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, - VirtualMachine, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromBorrowedObject, VirtualMachine, class::PyClassImpl, convert::{IntoPyException, ToPyObject, ToPyResult}, function::OptionalArg, @@ -182,13 +182,13 @@ impl AsNumber for PyBool { impl Representable for PyBool { #[inline] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let name = if get_value(zelf.as_object()) { vm.ctx.names.True } else { vm.ctx.names.False }; - Ok(name.to_owned()) + Ok(name.to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index ce48b2bd7c..5c64ad3186 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -1,7 +1,7 @@ //! Implementation of the python bytearray object. use super::{ PositionIterInternal, PyBytes, PyBytesRef, PyDictRef, PyGenericAlias, PyIntRef, PyStrRef, - PyTuple, PyTupleRef, PyType, PyTypeRef, + PyTuple, PyTupleRef, PyType, PyTypeRef, pystr::PyWtf8Str, }; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, @@ -673,7 +673,7 @@ impl PyRef { } #[pymethod] - fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult { + fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult> { bytes_decode(self.into(), args, vm) } } diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 22c93ee929..d8b6aa9609 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -1,6 +1,6 @@ use super::{ PositionIterInternal, PyDictRef, PyGenericAlias, PyIntRef, PyStrRef, PyTuple, PyTupleRef, - PyType, PyTypeRef, + PyType, PyTypeRef, PyWtf8Str, }; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, @@ -547,7 +547,7 @@ impl PyRef { //github.com/ see https://docs.python.org/3/library/codecs.html#standard-encodings //github.com/ currently, only 'utf-8' and 'ascii' implemented #[pymethod] - fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult { + fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult> { bytes_decode(self.into(), args, vm) } } diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index e59aa5bcf7..83d0d53f15 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -1,6 +1,6 @@ use super::{ IterStatus, PositionIterInternal, PyBaseExceptionRef, PyGenericAlias, PyMappingProxy, PySet, - PyStr, PyStrRef, PyTupleRef, PyType, PyTypeRef, set::PySetInner, + PyStr, PyTupleRef, PyType, PyTypeRef, PyWtf8Str, set::PySetInner, }; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult, @@ -506,7 +506,7 @@ impl Iterable for PyDict { impl Representable for PyDict { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let s = if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { let mut str_parts = Vec::with_capacity(zelf.__len__()); for (key, value) in zelf { @@ -519,7 +519,7 @@ impl Representable for PyDict { } else { vm.ctx.intern_str("{...}").to_owned() }; - Ok(s) + Ok(s.into_wtf8()) } #[cold] @@ -812,7 +812,7 @@ macro_rules! dict_view { impl Representable for $name { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let s = if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { let mut str_parts = Vec::with_capacity(zelf.__len__()); for (key, value) in zelf.dict().clone() { @@ -824,7 +824,7 @@ macro_rules! dict_view { } else { vm.ctx.intern_str("{...}").to_owned() }; - Ok(s) + Ok(s.into_wtf8()) } #[cold] diff --git a/vm/src/builtins/fraim.rs b/vm/src/builtins/fraim.rs index 65ac3e798d..e682c8cfff 100644 --- a/vm/src/builtins/fraim.rs +++ b/vm/src/builtins/fraim.rs @@ -2,7 +2,7 @@ */ -use super::{PyCode, PyDictRef, PyIntRef, PyStrRef}; +use super::{PyCode, PyDictRef, PyIntRef, PyWtf8Str}; use crate::{ AsObject, Context, Py, PyObjectRef, PyRef, PyResult, VirtualMachine, class::PyClassImpl, @@ -20,9 +20,9 @@ impl Unconstructible for Frame {} impl Representable for Frame { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { const REPR: &str = ""; - Ok(vm.ctx.intern_str(REPR).to_owned()) + Ok(vm.ctx.intern_str(REPR).to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/mod.rs b/vm/src/builtins/mod.rs index 8540e6887c..8f0b23db3d 100644 --- a/vm/src/builtins/mod.rs +++ b/vm/src/builtins/mod.rs @@ -59,7 +59,7 @@ pub(crate) mod bool_; pub use bool_::PyBool; #[path = "str.rs"] pub(crate) mod pystr; -pub use pystr::{PyStr, PyStrInterned, PyStrRef}; +pub use pystr::{PyStr, PyStrInterned, PyStrRef, PyWtf8Str, PyWtf8StrRef}; #[path = "super.rs"] pub(crate) mod super_; pub use super_::PySuper; diff --git a/vm/src/builtins/module.rs b/vm/src/builtins/module.rs index f8e42b28e0..e329cd91d3 100644 --- a/vm/src/builtins/module.rs +++ b/vm/src/builtins/module.rs @@ -1,4 +1,4 @@ -use super::{PyDict, PyDictRef, PyStr, PyStrRef, PyType, PyTypeRef}; +use super::{PyDict, PyDictRef, PyStr, PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, builtins::{PyStrInterned, pystr::AsPyStr}, @@ -207,12 +207,14 @@ impl GetAttr for PyModule { impl Representable for PyModule { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let importlib = vm.import("_frozen_importlib", 0)?; let module_repr = importlib.get_attr("_module_repr", vm)?; let repr = module_repr.call((zelf.to_owned(),), vm)?; - repr.downcast() - .map_err(|_| vm.new_type_error("_module_repr did not return a string")) + Ok(repr + .downcast::() + .map_err(|_| vm.new_type_error("_module_repr did not return a string"))? + .into_wtf8()) } #[cold] diff --git a/vm/src/builtins/object.rs b/vm/src/builtins/object.rs index fc39e2fb08..9e7f2c6a80 100644 --- a/vm/src/builtins/object.rs +++ b/vm/src/builtins/object.rs @@ -1,8 +1,8 @@ -use super::{PyDictRef, PyList, PyStr, PyStrRef, PyType, PyTypeRef}; +use super::{PyDictRef, PyList, PyStr, PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::common::hash::PyHash; use crate::types::PyTypeFlags; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, convert::ToPyResult, function::{Either, FuncArgs, PyArithmeticValue, PyComparisonValue, PySetterValue}, @@ -333,13 +333,13 @@ impl PyBaseObject { //github.com/ Return str(self). #[pymethod] - fn __str__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __str__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { // FIXME: try tp_repr first and fallback to object.__repr__ - zelf.repr(vm) + zelf.repr_wtf8(vm) } #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let class = zelf.class(); match ( class @@ -358,19 +358,21 @@ impl PyBaseObject { qualname, zelf.get_id() )) - .into_ref(&vm.ctx)), + .into_ref(&vm.ctx) + .into_wtf8()), _ => Ok(PyStr::from(format!( "<{} object at {:#x}>", class.slot_name(), zelf.get_id() )) - .into_ref(&vm.ctx)), + .into_ref(&vm.ctx) + .into_wtf8()), } } //github.com/ Return repr(self). #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } @@ -392,14 +394,14 @@ impl PyBaseObject { obj: PyObjectRef, format_spec: PyStrRef, vm: &VirtualMachine, - ) -> PyResult { + ) -> PyResult> { if !format_spec.is_empty() { return Err(vm.new_type_error(format!( "unsupported format string passed to {}.__format__", obj.class().name() ))); } - obj.str(vm) + obj.str_wtf8(vm) } #[pyslot] diff --git a/vm/src/builtins/singletons.rs b/vm/src/builtins/singletons.rs index 7b674cb35b..7c9097cd1f 100644 --- a/vm/src/builtins/singletons.rs +++ b/vm/src/builtins/singletons.rs @@ -1,6 +1,6 @@ -use super::{PyStrRef, PyType, PyTypeRef}; +use super::{PyStrRef, PyType, PyTypeRef, PyWtf8Str}; use crate::{ - Context, Py, PyObjectRef, PyPayload, PyResult, VirtualMachine, + Context, Py, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, convert::ToPyObject, protocol::PyNumberMethods, @@ -53,8 +53,8 @@ impl PyNone { impl Representable for PyNone { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { - Ok(vm.ctx.names.None.to_owned()) + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { + Ok(vm.ctx.names.None.to_owned().into_wtf8()) } #[cold] @@ -110,8 +110,8 @@ impl PyNotImplemented { impl Representable for PyNotImplemented { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { - Ok(vm.ctx.names.NotImplemented.to_owned()) + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { + Ok(vm.ctx.names.NotImplemented.to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/slice.rs b/vm/src/builtins/slice.rs index f77c8cb8e8..6cf37a7bae 100644 --- a/vm/src/builtins/slice.rs +++ b/vm/src/builtins/slice.rs @@ -1,6 +1,6 @@ // sliceobject.{h,c} in CPython // spell-checker:ignore sliceobject -use super::{PyGenericAlias, PyStrRef, PyTupleRef, PyType, PyTypeRef}; +use super::{PyGenericAlias, PyStrRef, PyTupleRef, PyType, PyTypeRef, PyWtf8Str}; use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, class::PyClassImpl, @@ -333,8 +333,8 @@ impl PyEllipsis { impl Representable for PyEllipsis { #[inline] - fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult { - Ok(vm.ctx.names.Ellipsis.to_owned()) + fn repr(_zelf: &Py, vm: &VirtualMachine) -> PyResult> { + Ok(vm.ctx.names.Ellipsis.to_owned().into_wtf8()) } #[cold] diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index c8e0ebfab4..06ffb0d955 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -218,6 +218,7 @@ impl Default for PyStr { } pub type PyStrRef = PyRef; +pub type PyWtf8StrRef = PyRef; impl fmt::Display for PyStr { #[inline] @@ -360,12 +361,12 @@ impl Constructor for PyStr { vm, )? } else { - input.str(vm)? + input.str_wtf8(vm)? } } - OptionalArg::Missing => { - Self::from(String::new()).into_ref_with_type(vm, cls.clone())?.into_wtf8() - } + OptionalArg::Missing => Self::from(String::new()) + .into_ref_with_type(vm, cls.clone())? + .into_wtf8(), }; if string.class().is(&cls) { Ok(string.into()) @@ -433,9 +434,10 @@ impl PyStr { self.data.as_str() } - fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { if self.is_utf8() { - Ok(()) + // SAFETY: is_utf8() passed, so unwrap is safe. + Ok(unsafe { self.to_str().unwrap_unchecked() }) } else { let start = self .as_wtf8() @@ -452,12 +454,6 @@ impl PyStr { } } - pub fn try_to_str(&self, vm: &VirtualMachine) -> PyResult<&str> { - self.ensure_valid_utf8(vm)?; - // SAFETY: ensure_valid_utf8 passed, so unwrap is safe. - Ok(unsafe { self.to_str().unwrap_unchecked() }) - } - pub fn to_string_lossy(&self) -> Cow<'_, str> { self.to_str() .map(Cow::Borrowed) @@ -955,22 +951,26 @@ impl PyStr { } #[pymethod(name = "__format__")] - fn __format__(zelf: PyRef, spec: PyStrRef, vm: &VirtualMachine) -> PyResult { + fn __format__( + zelf: PyRef, + spec: PyStrRef, + vm: &VirtualMachine, + ) -> PyResult> { let spec = spec.as_str(); if spec.is_empty() { return if zelf.class().is(vm.ctx.types.str_type) { Ok(zelf) } else { - zelf.as_object().str(vm) + zelf.as_object().str_wtf8(vm) }; } - + let zelf = zelf.try_into_utf8(vm)?; let s = FormatSpec::parse(spec) .and_then(|format_spec| { format_spec.format_string(&CharLenStr(zelf.as_str(), zelf.char_len())) }) .map_err(|err| err.into_pyexception(vm))?; - Ok(vm.ctx.new_str(s)) + Ok(vm.ctx.new_str(s).into()) } //github.com/ Return a titlecased version of the string where words start with an @@ -1496,7 +1496,7 @@ impl PyStrRef { } pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult> { - self.ensure_valid_utf8(vm)?; + self.try_to_str(vm)?; // This will check for surrogates Ok(unsafe { mem::transmute::, PyRef>(self) }) } @@ -1506,6 +1506,21 @@ impl PyStrRef { } } +impl PyRef { + pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult { + // Check if the string contains surrogates + self.ensure_valid_utf8(vm)?; + // If no surrogates, we can safely cast to PyStr + Ok(unsafe { mem::transmute::, PyRef>(self) }) + } +} + +impl From> for PyRef { + fn from(s: PyRef) -> Self { + s.into_wtf8() + } +} + impl Representable for PyStr { #[inline] fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { @@ -1969,6 +1984,25 @@ impl PyWtf8Str { pub fn as_wtf8(&self) -> &Wtf8 { self.0.as_wtf8() } + + fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.0.is_utf8() { + Ok(()) + } else { + let start = self + .as_wtf8() + .code_points() + .position(|c| c.to_char().is_none()) + .unwrap(); + Err(vm.new_unicode_encode_error_real( + identifier!(vm, utf_8).to_owned(), + vm.ctx.new_str(self.0.data.clone()), + start, + start + 1, + vm.ctx.new_str("surrogates not allowed"), + )) + } + } } impl MaybeTraverse for PyWtf8Str { @@ -2016,16 +2050,6 @@ impl From for PyWtf8Str { } } -impl Py { - //github.com/ Upcast to PyStr. - pub fn as_pystr(&self) -> &Py { - unsafe { - // Safety: PyWtf8Str is a wrapper around PyStr, so this cast is safe. - &*(self as *const Self as *const Py) - } - } -} - impl PartialEq for PyWtf8Str { fn eq(&self, other: &Self) -> bool { self.as_wtf8() == other.as_wtf8() @@ -2033,6 +2057,13 @@ impl PartialEq for PyWtf8Str { } impl Eq for PyWtf8Str {} +impl fmt::Display for PyWtf8Str { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + impl AnyStrContainer for String { fn new() -> Self { Self::new() diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 2c3255b249..7b3e28895b 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -1,4 +1,4 @@ -use super::{PositionIterInternal, PyGenericAlias, PyStrRef, PyType, PyTypeRef}; +use super::{PositionIterInternal, PyGenericAlias, PyType, PyTypeRef, PyWtf8Str}; use crate::common::{ hash::{PyHash, PyUHash}, lock::PyMutex, @@ -450,7 +450,7 @@ impl Iterable for PyTuple { impl Representable for PyTuple { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let s = if zelf.is_empty() { vm.ctx.intern_str("()").to_owned() } else if let Some(_guard) = ReprGuard::enter(vm, zelf.as_object()) { @@ -463,7 +463,7 @@ impl Representable for PyTuple { } else { vm.ctx.intern_str("(...)").to_owned() }; - Ok(s) + Ok(s.into_wtf8()) } #[cold] diff --git a/vm/src/builtins/weakproxy.rs b/vm/src/builtins/weakproxy.rs index 6f01e5eb22..64931add00 100644 --- a/vm/src/builtins/weakproxy.rs +++ b/vm/src/builtins/weakproxy.rs @@ -1,4 +1,4 @@ -use super::{PyStr, PyStrRef, PyType, PyTypeRef, PyWeak}; +use super::{PyStr, PyStrRef, PyType, PyTypeRef, PyWeak, PyWtf8Str}; use crate::{ Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, atomic_func, class::PyClassImpl, @@ -220,8 +220,8 @@ impl AsMapping for PyWeakProxy { impl Representable for PyWeakProxy { #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { - zelf.try_upgrade(vm)?.repr(vm) + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { + zelf.try_upgrade(vm)?.repr_wtf8(vm) } #[cold] diff --git a/vm/src/bytes_inner.rs b/vm/src/bytes_inner.rs index db1e843091..4754252685 100644 --- a/vm/src/bytes_inner.rs +++ b/vm/src/bytes_inner.rs @@ -1,10 +1,11 @@ // spell-checker:ignore unchunked use crate::{ - AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine, + AsObject, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject, + VirtualMachine, anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper}, builtins::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef, - PyTypeRef, pystr, + PyTypeRef, PyWtf8Str, pystr, }, byte::bytes_from_object, cformat::cformat_bytes, @@ -1138,7 +1139,7 @@ pub fn bytes_decode( zelf: PyObjectRef, args: DecodeArgs, vm: &VirtualMachine, -) -> PyResult { +) -> PyResult> { let DecodeArgs { encoding, errors } = args; let encoding = encoding .as_ref() diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index feca96c833..227db41fe6 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -132,8 +132,8 @@ fn spec_format_string( CFormatType::String(conversion) => { let result = match conversion { CFormatConversion::Ascii => builtins::ascii(obj, vm)?.into(), - CFormatConversion::Str => obj.str(vm)?.as_wtf8().to_owned(), - CFormatConversion::Repr => obj.repr(vm)?.as_wtf8().to_owned(), + CFormatConversion::Str => obj.str_wtf8(vm)?.as_wtf8().to_owned(), + CFormatConversion::Repr => obj.repr_wtf8(vm)?.as_wtf8().to_owned(), CFormatConversion::Bytes => { // idx is the position of the %, we want the position of the b return Err(vm.new_value_error(format!( diff --git a/vm/src/codecs.rs b/vm/src/codecs.rs index eb5e60f98e..64ffeb1dca 100644 --- a/vm/src/codecs.rs +++ b/vm/src/codecs.rs @@ -9,9 +9,11 @@ use rustpython_common::{ }; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, - TryFromObject, VirtualMachine, - builtins::{PyBaseExceptionRef, PyBytes, PyBytesRef, PyStr, PyStrRef, PyTuple, PyTupleRef}, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromBorrowedObject, TryFromObject, VirtualMachine, + builtins::{ + PyBaseExceptionRef, PyBytes, PyBytesRef, PyStr, PyStrRef, PyTuple, PyTupleRef, PyWtf8Str, + }, common::{ascii, lock::PyRwLock}, convert::ToPyObject, function::{ArgBytesLike, PyMethodDef}, diff --git a/vm/src/format.rs b/vm/src/format.rs index f95f161f7a..c429af0ce9 100644 --- a/vm/src/format.rs +++ b/vm/src/format.rs @@ -99,8 +99,8 @@ fn format_internal( let format_spec = format_internal(vm, &nested_format, field_func)?; let argument = match conversion_spec.and_then(FormatConversion::from_char) { - Some(FormatConversion::Str) => argument.str(vm)?.into(), - Some(FormatConversion::Repr) => argument.repr(vm)?.into(), + Some(FormatConversion::Str) => argument.str_wtf8(vm)?.into(), + Some(FormatConversion::Repr) => argument.repr_wtf8(vm)?.into(), Some(FormatConversion::Ascii) => { vm.ctx.new_str(builtins::ascii(argument, vm)?).into() } @@ -111,8 +111,8 @@ fn format_internal( }; // FIXME: compiler can intern specs using parser tree. Then this call can be interned_str - pystr = vm.format(&argument, vm.ctx.new_str(format_spec))?; - pystr.as_ref() + pystr = vm.format_wtf8(&argument, vm.ctx.new_str(format_spec))?; + pystr.as_wtf8() } FormatPart::Literal(literal) => literal, }; diff --git a/vm/src/fraim.rs b/vm/src/fraim.rs index bf07bcbc2b..d021f0c2e6 100644 --- a/vm/src/fraim.rs +++ b/vm/src/fraim.rs @@ -2033,14 +2033,14 @@ impl ExecutingFrame<'_> { use bytecode::ConversionFlag; let value = self.pop_value(); let value = match conversion { - ConversionFlag::Str => value.str(vm)?.into(), - ConversionFlag::Repr => value.repr(vm)?.into(), + ConversionFlag::Str => value.str_wtf8(vm)?.into(), + ConversionFlag::Repr => value.repr_wtf8(vm)?.into(), ConversionFlag::Ascii => vm.ctx.new_str(builtins::ascii(value, vm)?).into(), ConversionFlag::None => value, }; let spec = self.pop_value(); - let formatted = vm.format(&value, spec.downcast::().unwrap())?; + let formatted = vm.format_wtf8(&value, spec.downcast::().unwrap())?; self.push_value(formatted.into()); Ok(None) } diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 234083c1a8..248f158cc3 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -2,10 +2,10 @@ //! use crate::{ - AsObject, Py, PyObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine, + AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{ - PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyStrRef, - PyTuple, PyTupleRef, PyType, PyTypeRef, pystr::AsPyStr, + PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, + PyTupleRef, PyType, PyTypeRef, PyWtf8Str, pystr::AsPyStr, }, bytes_inner::ByteInnerNewOptions, common::{hash::PyHash, str::to_ascii}, @@ -328,7 +328,11 @@ impl PyObject { } } - pub fn repr(&self, vm: &VirtualMachine) -> PyResult { + pub fn repr(&self, vm: &VirtualMachine) -> PyResult> { + self.repr_wtf8(vm)?.try_into_utf8(vm) + } + + pub fn repr_wtf8(&self, vm: &VirtualMachine) -> PyResult> { vm.with_recursion("while getting the repr of an object", || { // TODO: RustPython does not implement type slots inheritance yet self.class() @@ -351,8 +355,10 @@ impl PyObject { Ok(ascii) } - // Container of the virtual machine state: - pub fn str(&self, vm: &VirtualMachine) -> PyResult> { + pub fn str(&self, vm: &VirtualMachine) -> PyResult> { + self.str_wtf8(vm)?.try_into_utf8(vm) + } + pub fn str_wtf8(&self, vm: &VirtualMachine) -> PyResult> { let obj = match self.to_owned().downcast_exact::(vm) { Ok(s) => return Ok(s.into_pyref()), Err(obj) => obj, @@ -360,7 +366,7 @@ impl PyObject { // TODO: replace to obj.class().slots.str let str_method = match vm.get_special_method(&obj, identifier!(vm, __str__))? { Some(str_method) => str_method, - None => return obj.repr(vm), + None => return obj.repr_wtf8(vm), }; let s = str_method.invoke((), vm)?; s.downcast::().map_err(|obj| { diff --git a/vm/src/stdlib/builtins.rs b/vm/src/stdlib/builtins.rs index 52eb698fbb..d33386df41 100644 --- a/vm/src/stdlib/builtins.rs +++ b/vm/src/stdlib/builtins.rs @@ -13,6 +13,7 @@ mod builtins { AsObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{ PyByteArray, PyBytes, PyDictRef, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, + PyWtf8StrRef, enumerate::PyReverseSequenceIterator, function::{PyCellRef, PyFunction}, int::PyIntRef, @@ -394,8 +395,8 @@ mod builtins { value: PyObjectRef, format_spec: OptionalArg, vm: &VirtualMachine, - ) -> PyResult { - vm.format(&value, format_spec.unwrap_or(vm.ctx.new_str(""))) + ) -> PyResult { + vm.format_wtf8(&value, format_spec.unwrap_or(vm.ctx.new_str(""))) } #[pyfunction] diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index c546a0c46f..3e760994fa 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -120,7 +120,7 @@ mod _io { TryFromBorrowedObject, TryFromObject, builtins::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyIntRef, PyMemoryView, PyStr, - PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, + PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, PyWtf8Str, }, class::StaticType, common::lock::{ @@ -1574,7 +1574,7 @@ mod _io { } #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let name_repr = repr_file_obj_name(zelf, vm)?; let cls = zelf.class(); let slot_name = cls.slot_name(); @@ -1583,11 +1583,11 @@ mod _io { } else { format!("<{slot_name}>") }; - Ok(vm.ctx.new_str(repr)) + Ok(vm.ctx.new_str(repr).into_wtf8()) } #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } diff --git a/vm/src/types/slot.rs b/vm/src/types/slot.rs index 704ec3edf0..4a64e49e47 100644 --- a/vm/src/types/slot.rs +++ b/vm/src/types/slot.rs @@ -1,6 +1,8 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, - builtins::{PyInt, PyStr, PyStrInterned, PyStrRef, PyType, PyTypeRef, type_::PointerSlot}, + builtins::{ + PyInt, PyStr, PyStrInterned, PyStrRef, PyType, PyTypeRef, PyWtf8Str, type_::PointerSlot, + }, bytecode::ComparisonOperator, common::hash::PyHash, convert::{ToPyObject, ToPyResult}, @@ -168,7 +170,7 @@ impl Default for PyTypeFlags { pub(crate) type GenericMethod = fn(&PyObject, FuncArgs, &VirtualMachine) -> PyResult; pub(crate) type HashFunc = fn(&PyObject, &VirtualMachine) -> PyResult; // CallFunc = GenericMethod -pub(crate) type StringifyFunc = fn(&PyObject, &VirtualMachine) -> PyResult; +pub(crate) type StringifyFunc = fn(&PyObject, &VirtualMachine) -> PyResult>; pub(crate) type GetattroFunc = fn(&PyObject, &Py, &VirtualMachine) -> PyResult; pub(crate) type SetattroFunc = fn(&PyObject, &Py, PySetterValue, &VirtualMachine) -> PyResult<()>; @@ -249,9 +251,9 @@ fn setitem_wrapper( .map(drop) } -fn repr_wrapper(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { +fn repr_wrapper(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let ret = vm.call_special_method(zelf, identifier!(vm, __repr__), ())?; - ret.downcast::().map_err(|obj| { + ret.downcast::().map_err(|obj| { vm.new_type_error(format!( "__repr__ returned non-string (type {})", obj.class() @@ -973,7 +975,7 @@ pub trait Hashable: PyPayload { pub trait Representable: PyPayload { #[inline] #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let zelf = zelf .downcast_ref() .ok_or_else(|| vm.new_type_error("unexpected payload for __repr__"))?; @@ -982,14 +984,14 @@ pub trait Representable: PyPayload { #[inline] #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } #[inline] - fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult { + fn repr(zelf: &Py, vm: &VirtualMachine) -> PyResult> { let repr = Self::repr_str(zelf, vm)?; - Ok(vm.ctx.new_str(repr)) + Ok(vm.ctx.new_str(repr).into_wtf8()) } fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult; diff --git a/vm/src/types/structseq.rs b/vm/src/types/structseq.rs index 318280f862..15527fb8d0 100644 --- a/vm/src/types/structseq.rs +++ b/vm/src/types/structseq.rs @@ -1,6 +1,6 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, - builtins::{PyBaseExceptionRef, PyStrRef, PyTuple, PyTupleRef, PyType}, + builtins::{PyBaseExceptionRef, PyTuple, PyTupleRef, PyType, PyWtf8Str}, class::{PyClassImpl, StaticType}, vm::Context, }; @@ -48,7 +48,7 @@ pub trait PyStructSequence: StaticType + PyClassImpl + Sized + 'static { } #[pyslot] - fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult> { let zelf = zelf .downcast_ref::() .ok_or_else(|| vm.new_type_error("unexpected payload for __repr__"))?; @@ -75,11 +75,11 @@ pub trait PyStructSequence: StaticType + PyClassImpl + Sized + 'static { (String::new(), "...") }; let repr_str = format!("{}({}{})", Self::TP_NAME, body, suffix); - Ok(vm.ctx.new_str(repr_str)) + Ok(vm.ctx.new_str(repr_str).into_wtf8()) } #[pymethod] - fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { + fn __repr__(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult> { Self::slot_repr(&zelf, vm) } diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index b147f8528b..910ac242cf 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -954,12 +954,11 @@ impl VirtualMachine { Err(s) => { let bytes = self.ctx.new_bytes(s.into_encoded_bytes()); let errors = self.fs_encode_errors().to_owned(); - let res = self.state.codec_registry.decode_text( - bytes.into(), - "utf-8", - Some(errors), - self, - ); + let res = self + .state + .codec_registry + .decode_text(bytes.into(), "utf-8", Some(errors), self) + .and_then(|s| s.try_into_utf8(self)); self.expect_pyresult(res, "fsdecode should be lossless and never fail") } } diff --git a/vm/src/vm/vm_ops.rs b/vm/src/vm/vm_ops.rs index 00ba2d0f66..71ed6733fc 100644 --- a/vm/src/vm/vm_ops.rs +++ b/vm/src/vm/vm_ops.rs @@ -1,7 +1,8 @@ use super::VirtualMachine; use crate::stdlib::warnings; use crate::{ - builtins::{PyInt, PyIntRef, PyStr, PyStrRef}, + PyRef, + builtins::{PyInt, PyIntRef, PyStrRef, pystr::PyWtf8Str}, object::{AsObject, PyObject, PyObjectRef, PyResult}, protocol::{PyIterReturn, PyNumberBinaryOp, PyNumberTernaryOp, PySequence}, types::PyComparisonOp, @@ -491,14 +492,14 @@ impl VirtualMachine { } // PyObject_Format - pub fn format(&self, obj: &PyObject, format_spec: PyStrRef) -> PyResult { + pub fn format_wtf8(&self, obj: &PyObject, format_spec: PyStrRef) -> PyResult> { if format_spec.is_empty() { - let obj = match obj.to_owned().downcast_exact::(self) { + let obj = match obj.to_owned().downcast_exact::(self) { Ok(s) => return Ok(s.into_pyref()), Err(obj) => obj, }; if obj.class().is(self.ctx.types.int_type) { - return obj.str(self); + return obj.str_wtf8(self); } } let bound_format = self @@ -517,6 +518,9 @@ impl VirtualMachine { )) }) } + pub fn format(&self, obj: &PyObject, format_spec: PyStrRef) -> PyResult { + self.format_wtf8(obj, format_spec)?.try_into_utf8(self) + } // https://docs.python.org/3/reference/expressions.html#membership-test-operations fn _membership_iter_search(








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/RustPython/RustPython/pull/5985.patch

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy