Skip to content

Commit f82aeea

Browse files
authored
Simplify UTF8 StrPtr usage (#2374)
* Use non-BOM encodings * Copy potential BOM to the output of PyString_FromString The documentation of the used `PyUnicode_DecodeUTF16` states that not passing `*byteorder` or passing a 0 results in the first two bytes, if they are the BOM (U+FEFF, zero-width no-break space), to be interpreted and skipped, which is incorrect when we convert a known "non BOM" string, which all strings from C# are. * Default to UTF8 for StrPtr
1 parent b112885 commit f82aeea

File tree

4 files changed

+27
-23
lines changed

4 files changed

+27
-23
lines changed

src/embed_tests/TestPyType.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ public void CanCreateHeapType()
2828
const string name = "nÁmæ";
2929
const string docStr = "dÁcæ";
3030

31-
using var doc = new StrPtr(docStr, Encodings.UTF8);
31+
using var doc = new StrPtr(docStr);
32+
3233
var spec = new TypeSpec(
3334
name: name,
3435
basicSize: Util.ReadInt32(Runtime.Runtime.PyBaseObjectType, TypeOffset.tp_basicsize),

src/runtime/Native/NativeTypeSpec.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public NativeTypeSpec(TypeSpec spec)
1717
{
1818
if (spec is null) throw new ArgumentNullException(nameof(spec));
1919

20-
this.Name = new StrPtr(spec.Name, Encodings.UTF8);
20+
this.Name = new StrPtr(spec.Name);
2121
this.BasicSize = spec.BasicSize;
2222
this.ItemSize = spec.ItemSize;
2323
this.Flags = (int)spec.Flags;

src/runtime/Native/StrPtr.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ struct StrPtr : IDisposable
1010
public IntPtr RawPointer { get; set; }
1111
unsafe byte* Bytes => (byte*)this.RawPointer;
1212

13+
public unsafe StrPtr(string value) : this(value, Encodings.UTF8) {}
14+
1315
public unsafe StrPtr(string value, Encoding encoding)
1416
{
1517
if (value is null) throw new ArgumentNullException(nameof(value));

src/runtime/Runtime.cs

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -795,13 +795,13 @@ public static int Py_Main(int argc, string[] argv)
795795

796796
internal static int PyRun_SimpleString(string code)
797797
{
798-
using var codePtr = new StrPtr(code, Encodings.UTF8);
798+
using var codePtr = new StrPtr(code);
799799
return Delegates.PyRun_SimpleStringFlags(codePtr, Utf8String);
800800
}
801801

802802
internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedReference globals, BorrowedReference locals)
803803
{
804-
using var codePtr = new StrPtr(code, Encodings.UTF8);
804+
using var codePtr = new StrPtr(code);
805805
return Delegates.PyRun_StringFlags(codePtr, st, globals, locals, Utf8String);
806806
}
807807

@@ -813,14 +813,15 @@ internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedR
813813
/// </summary>
814814
internal static NewReference Py_CompileString(string str, string file, int start)
815815
{
816-
using var strPtr = new StrPtr(str, Encodings.UTF8);
816+
using var strPtr = new StrPtr(str);
817+
817818
using var fileObj = new PyString(file);
818819
return Delegates.Py_CompileStringObject(strPtr, fileObj, start, Utf8String, -1);
819820
}
820821

821822
internal static NewReference PyImport_ExecCodeModule(string name, BorrowedReference code)
822823
{
823-
using var namePtr = new StrPtr(name, Encodings.UTF8);
824+
using var namePtr = new StrPtr(name);
824825
return Delegates.PyImport_ExecCodeModule(namePtr, code);
825826
}
826827

@@ -867,13 +868,13 @@ internal static bool PyObject_IsIterable(BorrowedReference ob)
867868

868869
internal static int PyObject_HasAttrString(BorrowedReference pointer, string name)
869870
{
870-
using var namePtr = new StrPtr(name, Encodings.UTF8);
871+
using var namePtr = new StrPtr(name);
871872
return Delegates.PyObject_HasAttrString(pointer, namePtr);
872873
}
873874

874875
internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, string name)
875876
{
876-
using var namePtr = new StrPtr(name, Encodings.UTF8);
877+
using var namePtr = new StrPtr(name);
877878
return Delegates.PyObject_GetAttrString(pointer, namePtr);
878879
}
879880

@@ -884,12 +885,12 @@ internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, S
884885
internal static int PyObject_DelAttr(BorrowedReference @object, BorrowedReference name) => Delegates.PyObject_SetAttr(@object, name, null);
885886
internal static int PyObject_DelAttrString(BorrowedReference @object, string name)
886887
{
887-
using var namePtr = new StrPtr(name, Encodings.UTF8);
888+
using var namePtr = new StrPtr(name);
888889
return Delegates.PyObject_SetAttrString(@object, namePtr, null);
889890
}
890891
internal static int PyObject_SetAttrString(BorrowedReference @object, string name, BorrowedReference value)
891892
{
892-
using var namePtr = new StrPtr(name, Encodings.UTF8);
893+
using var namePtr = new StrPtr(name);
893894
return Delegates.PyObject_SetAttrString(@object, namePtr, value);
894895
}
895896

@@ -1071,7 +1072,7 @@ internal static bool PyBool_CheckExact(BorrowedReference ob)
10711072

10721073
internal static NewReference PyLong_FromString(string value, int radix)
10731074
{
1074-
using var valPtr = new StrPtr(value, Encodings.UTF8);
1075+
using var valPtr = new StrPtr(value);
10751076
return Delegates.PyLong_FromString(valPtr, IntPtr.Zero, radix);
10761077
}
10771078

@@ -1274,7 +1275,7 @@ internal static NewReference EmptyPyBytes()
12741275
internal static NewReference PyByteArray_FromStringAndSize(IntPtr strPtr, nint len) => Delegates.PyByteArray_FromStringAndSize(strPtr, len);
12751276
internal static NewReference PyByteArray_FromStringAndSize(string s)
12761277
{
1277-
using var ptr = new StrPtr(s, Encodings.UTF8);
1278+
using var ptr = new StrPtr(s);
12781279
return PyByteArray_FromStringAndSize(ptr.RawPointer, checked((nint)ptr.ByteCount));
12791280
}
12801281

@@ -1302,7 +1303,7 @@ internal static IntPtr PyBytes_AsString(BorrowedReference ob)
13021303

13031304
internal static NewReference PyUnicode_InternFromString(string s)
13041305
{
1305-
using var ptr = new StrPtr(s, Encodings.UTF8);
1306+
using var ptr = new StrPtr(s);
13061307
return Delegates.PyUnicode_InternFromString(ptr);
13071308
}
13081309

@@ -1377,7 +1378,7 @@ internal static bool PyDict_Check(BorrowedReference ob)
13771378

13781379
internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer, string key)
13791380
{
1380-
using var keyStr = new StrPtr(key, Encodings.UTF8);
1381+
using var keyStr = new StrPtr(key);
13811382
return Delegates.PyDict_GetItemString(pointer, keyStr);
13821383
}
13831384

@@ -1393,7 +1394,7 @@ internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer
13931394
/// </summary>
13941395
internal static int PyDict_SetItemString(BorrowedReference dict, string key, BorrowedReference value)
13951396
{
1396-
using var keyPtr = new StrPtr(key, Encodings.UTF8);
1397+
using var keyPtr = new StrPtr(key);
13971398
return Delegates.PyDict_SetItemString(dict, keyPtr, value);
13981399
}
13991400

@@ -1402,7 +1403,7 @@ internal static int PyDict_SetItemString(BorrowedReference dict, string key, Bor
14021403

14031404
internal static int PyDict_DelItemString(BorrowedReference pointer, string key)
14041405
{
1405-
using var keyPtr = new StrPtr(key, Encodings.UTF8);
1406+
using var keyPtr = new StrPtr(key);
14061407
return Delegates.PyDict_DelItemString(pointer, keyPtr);
14071408
}
14081409

@@ -1517,7 +1518,7 @@ internal static bool PyIter_Check(BorrowedReference ob)
15171518

15181519
internal static NewReference PyModule_New(string name)
15191520
{
1520-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1521+
using var namePtr = new StrPtr(name);
15211522
return Delegates.PyModule_New(namePtr);
15221523
}
15231524

@@ -1531,7 +1532,7 @@ internal static NewReference PyModule_New(string name)
15311532
/// <returns>Return -1 on error, 0 on success.</returns>
15321533
internal static int PyModule_AddObject(BorrowedReference module, string name, StolenReference value)
15331534
{
1534-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1535+
using var namePtr = new StrPtr(name);
15351536
IntPtr valueAddr = value.DangerousGetAddressOrNull();
15361537
int res = Delegates.PyModule_AddObject(module, namePtr, valueAddr);
15371538
// We can't just exit here because the reference is stolen only on success.
@@ -1549,7 +1550,7 @@ internal static int PyModule_AddObject(BorrowedReference module, string name, St
15491550

15501551
internal static NewReference PyImport_ImportModule(string name)
15511552
{
1552-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1553+
using var namePtr = new StrPtr(name);
15531554
return Delegates.PyImport_ImportModule(namePtr);
15541555
}
15551556

@@ -1558,7 +1559,7 @@ internal static NewReference PyImport_ImportModule(string name)
15581559

15591560
internal static BorrowedReference PyImport_AddModule(string name)
15601561
{
1561-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1562+
using var namePtr = new StrPtr(name);
15621563
return Delegates.PyImport_AddModule(namePtr);
15631564
}
15641565

@@ -1586,13 +1587,13 @@ internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)
15861587

15871588
internal static BorrowedReference PySys_GetObject(string name)
15881589
{
1589-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1590+
using var namePtr = new StrPtr(name);
15901591
return Delegates.PySys_GetObject(namePtr);
15911592
}
15921593

15931594
internal static int PySys_SetObject(string name, BorrowedReference ob)
15941595
{
1595-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1596+
using var namePtr = new StrPtr(name);
15961597
return Delegates.PySys_SetObject(namePtr, ob);
15971598
}
15981599

@@ -1691,7 +1692,7 @@ internal static IntPtr PyMem_Malloc(long size)
16911692

16921693
internal static void PyErr_SetString(BorrowedReference ob, string message)
16931694
{
1694-
using var msgPtr = new StrPtr(message, Encodings.UTF8);
1695+
using var msgPtr = new StrPtr(message);
16951696
Delegates.PyErr_SetString(ob, msgPtr);
16961697
}
16971698

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy