Skip to content

Commit 91f4380

Browse files
authored
bpo-36785: PEP 574 implementation (GH-7076)
1 parent 22ccb0b commit 91f4380

19 files changed

+1886
-240
lines changed

Doc/library/pickle.rst

Lines changed: 214 additions & 57 deletions
Large diffs are not rendered by default.

Include/Python.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
#include "weakrefobject.h"
125125
#include "structseq.h"
126126
#include "namespaceobject.h"
127+
#include "picklebufobject.h"
127128

128129
#include "codecs.h"
129130
#include "pyerrors.h"

Include/picklebufobject.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* PickleBuffer object. This is built-in for ease of use from third-party
2+
* C extensions.
3+
*/
4+
5+
#ifndef Py_PICKLEBUFOBJECT_H
6+
#define Py_PICKLEBUFOBJECT_H
7+
#ifdef __cplusplus
8+
extern "C" {
9+
#endif
10+
11+
#ifndef Py_LIMITED_API
12+
13+
PyAPI_DATA(PyTypeObject) PyPickleBuffer_Type;
14+
15+
#define PyPickleBuffer_Check(op) (Py_TYPE(op) == &PyPickleBuffer_Type)
16+
17+
/* Create a PickleBuffer redirecting to the given buffer-enabled object */
18+
PyAPI_FUNC(PyObject *) PyPickleBuffer_FromObject(PyObject *);
19+
/* Get the PickleBuffer's underlying view to the original object
20+
* (NULL if released)
21+
*/
22+
PyAPI_FUNC(const Py_buffer *) PyPickleBuffer_GetBuffer(PyObject *);
23+
/* Release the PickleBuffer. Returns 0 on success, -1 on error. */
24+
PyAPI_FUNC(int) PyPickleBuffer_Release(PyObject *);
25+
26+
#endif /* !Py_LIMITED_API */
27+
28+
#ifdef __cplusplus
29+
}
30+
#endif
31+
#endif /* !Py_PICKLEBUFOBJECT_H */

Lib/pickle.py

Lines changed: 139 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@
3636
import codecs
3737
import _compat_pickle
3838

39+
from _pickle import PickleBuffer
40+
3941
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40-
"Unpickler", "dump", "dumps", "load", "loads"]
42+
"Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]
4143

4244
# Shortcut for use in isinstance testing
4345
bytes_types = (bytes, bytearray)
@@ -51,10 +53,11 @@
5153
"2.0", # Protocol 2
5254
"3.0", # Protocol 3
5355
"4.0", # Protocol 4
56+
"5.0", # Protocol 5
5457
] # Old format versions we can read
5558

5659
# This is the highest protocol number we know how to read.
57-
HIGHEST_PROTOCOL = 4
60+
HIGHEST_PROTOCOL = 5
5861

5962
# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
6063
# Only bump this if the oldest still supported version of Python already
@@ -167,6 +170,7 @@ def __init__(self, value):
167170
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
168171

169172
# Protocol 4
173+
170174
SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
171175
BINUNICODE8 = b'\x8d' # push very long string
172176
BINBYTES8 = b'\x8e' # push very long bytes string
@@ -178,6 +182,12 @@ def __init__(self, value):
178182
MEMOIZE = b'\x94' # store top of the stack in memo
179183
FRAME = b'\x95' # indicate the beginning of a new frame
180184

185+
# Protocol 5
186+
187+
BYTEARRAY8 = b'\x96' # push bytearray
188+
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
189+
READONLY_BUFFER = b'\x98' # make top of stack readonly
190+
181191
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
182192

183193

@@ -251,6 +261,23 @@ def __init__(self, file_read, file_readline, file_tell=None):
251261
self.file_readline = file_readline
252262
self.current_frame = None
253263

264+
def readinto(self, buf):
265+
if self.current_frame:
266+
n = self.current_frame.readinto(buf)
267+
if n == 0 and len(buf) != 0:
268+
self.current_frame = None
269+
n = len(buf)
270+
buf[:] = self.file_read(n)
271+
return n
272+
if n < len(buf):
273+
raise UnpicklingError(
274+
"pickle exhausted before end of frame")
275+
return n
276+
else:
277+
n = len(buf)
278+
buf[:] = self.file_read(n)
279+
return n
280+
254281
def read(self, n):
255282
if self.current_frame:
256283
data = self.current_frame.read(n)
@@ -371,7 +398,8 @@ def decode_long(data):
371398

372399
class _Pickler:
373400

374-
def __init__(self, file, protocol=None, *, fix_imports=True):
401+
def __init__(self, file, protocol=None, *, fix_imports=True,
402+
buffer_callback=None):
375403
"""This takes a binary file for writing a pickle data stream.
376404
377405
The optional *protocol* argument tells the pickler to use the
@@ -393,13 +421,27 @@ def __init__(self, file, protocol=None, *, fix_imports=True):
393421
will try to map the new Python 3 names to the old module names
394422
used in Python 2, so that the pickle data stream is readable
395423
with Python 2.
424+
425+
If *buffer_callback* is None (the default), buffer views are
426+
serialized into *file* as part of the pickle stream.
427+
428+
If *buffer_callback* is not None, then it can be called any number
429+
of times with a buffer view. If the callback returns a false value
430+
(such as None), the given buffer is out-of-band; otherwise the
431+
buffer is serialized in-band, i.e. inside the pickle stream.
432+
433+
It is an error if *buffer_callback* is not None and *protocol*
434+
is None or smaller than 5.
396435
"""
397436
if protocol is None:
398437
protocol = DEFAULT_PROTOCOL
399438
if protocol < 0:
400439
protocol = HIGHEST_PROTOCOL
401440
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
402441
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
442+
if buffer_callback is not None and protocol < 5:
443+
raise ValueError("buffer_callback needs protocol >= 5")
444+
self._buffer_callback = buffer_callback
403445
try:
404446
self._file_write = file.write
405447
except AttributeError:
@@ -756,6 +798,46 @@ def save_bytes(self, obj):
756798
self.memoize(obj)
757799
dispatch[bytes] = save_bytes
758800

801+
def save_bytearray(self, obj):
802+
if self.proto < 5:
803+
if not obj: # bytearray is empty
804+
self.save_reduce(bytearray, (), obj=obj)
805+
else:
806+
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
807+
return
808+
n = len(obj)
809+
if n >= self.framer._FRAME_SIZE_TARGET:
810+
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
811+
else:
812+
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
813+
dispatch[bytearray] = save_bytearray
814+
815+
def save_picklebuffer(self, obj):
816+
if self.proto < 5:
817+
raise PicklingError("PickleBuffer can only pickled with "
818+
"protocol >= 5")
819+
with obj.raw() as m:
820+
if not m.contiguous:
821+
raise PicklingError("PickleBuffer can not be pickled when "
822+
"pointing to a non-contiguous buffer")
823+
in_band = True
824+
if self._buffer_callback is not None:
825+
in_band = bool(self._buffer_callback(obj))
826+
if in_band:
827+
# Write data in-band
828+
# XXX The C implementation avoids a copy here
829+
if m.readonly:
830+
self.save_bytes(m.tobytes())
831+
else:
832+
self.save_bytearray(m.tobytes())
833+
else:
834+
# Write data out-of-band
835+
self.write(NEXT_BUFFER)
836+
if m.readonly:
837+
self.write(READONLY_BUFFER)
838+
839+
dispatch[PickleBuffer] = save_picklebuffer
840+
759841
def save_str(self, obj):
760842
if self.bin:
761843
encoded = obj.encode('utf-8', 'surrogatepass')
@@ -1042,7 +1124,7 @@ def save_type(self, obj):
10421124
class _Unpickler:
10431125

10441126
def __init__(self, file, *, fix_imports=True,
1045-
encoding="ASCII", errors="strict"):
1127+
encoding="ASCII", errors="strict", buffers=None):
10461128
"""This takes a binary file for reading a pickle data stream.
10471129
10481130
The protocol version of the pickle is detected automatically, so
@@ -1061,7 +1143,17 @@ def __init__(self, file, *, fix_imports=True,
10611143
reading, a BytesIO object, or any other custom object that
10621144
meets this interface.
10631145
1064-
Optional keyword arguments are *fix_imports*, *encoding* and
1146+
If *buffers* is not None, it should be an iterable of buffer-enabled
1147+
objects that is consumed each time the pickle stream references
1148+
an out-of-band buffer view. Such buffers have been given in order
1149+
to the *buffer_callback* of a Pickler object.
1150+
1151+
If *buffers* is None (the default), then the buffers are taken
1152+
from the pickle stream, assuming they are serialized there.
1153+
It is an error for *buffers* to be None if the pickle stream
1154+
was produced with a non-None *buffer_callback*.
1155+
1156+
Other optional arguments are *fix_imports*, *encoding* and
10651157
*errors*, which are used to control compatibility support for
10661158
pickle stream generated by Python 2. If *fix_imports* is True,
10671159
pickle will try to map the old Python 2 names to the new names
@@ -1070,6 +1162,7 @@ def __init__(self, file, *, fix_imports=True,
10701162
default to 'ASCII' and 'strict', respectively. *encoding* can be
10711163
'bytes' to read theses 8-bit string instances as bytes objects.
10721164
"""
1165+
self._buffers = iter(buffers) if buffers is not None else None
10731166
self._file_readline = file.readline
10741167
self._file_read = file.read
10751168
self.memo = {}
@@ -1090,6 +1183,7 @@ def load(self):
10901183
"%s.__init__()" % (self.__class__.__name__,))
10911184
self._unframer = _Unframer(self._file_read, self._file_readline)
10921185
self.read = self._unframer.read
1186+
self.readinto = self._unframer.readinto
10931187
self.readline = self._unframer.readline
10941188
self.metastack = []
10951189
self.stack = []
@@ -1276,6 +1370,34 @@ def load_binbytes8(self):
12761370
self.append(self.read(len))
12771371
dispatch[BINBYTES8[0]] = load_binbytes8
12781372

1373+
def load_bytearray8(self):
1374+
len, = unpack('<Q', self.read(8))
1375+
if len > maxsize:
1376+
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1377+
"of %d bytes" % maxsize)
1378+
b = bytearray(len)
1379+
self.readinto(b)
1380+
self.append(b)
1381+
dispatch[BYTEARRAY8[0]] = load_bytearray8
1382+
1383+
def load_next_buffer(self):
1384+
if self._buffers is None:
1385+
raise UnpicklingError("pickle stream refers to out-of-band data "
1386+
"but no *buffers* argument was given")
1387+
try:
1388+
buf = next(self._buffers)
1389+
except StopIteration:
1390+
raise UnpicklingError("not enough out-of-band buffers")
1391+
self.append(buf)
1392+
dispatch[NEXT_BUFFER[0]] = load_next_buffer
1393+
1394+
def load_readonly_buffer(self):
1395+
buf = self.stack[-1]
1396+
with memoryview(buf) as m:
1397+
if not m.readonly:
1398+
self.stack[-1] = m.toreadonly()
1399+
dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1400+
12791401
def load_short_binstring(self):
12801402
len = self.read(1)[0]
12811403
data = self.read(len)
@@ -1600,25 +1722,29 @@ def load_stop(self):
16001722

16011723
# Shorthands
16021724

1603-
def _dump(obj, file, protocol=None, *, fix_imports=True):
1604-
_Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
1725+
def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1726+
_Pickler(file, protocol, fix_imports=fix_imports,
1727+
buffer_callback=buffer_callback).dump(obj)
16051728

1606-
def _dumps(obj, protocol=None, *, fix_imports=True):
1729+
def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
16071730
f = io.BytesIO()
1608-
_Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
1731+
_Pickler(f, protocol, fix_imports=fix_imports,
1732+
buffer_callback=buffer_callback).dump(obj)
16091733
res = f.getvalue()
16101734
assert isinstance(res, bytes_types)
16111735
return res
16121736

1613-
def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
1614-
return _Unpickler(file, fix_imports=fix_imports,
1737+
def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1738+
buffers=None):
1739+
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
16151740
encoding=encoding, errors=errors).load()
16161741

1617-
def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
1742+
def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
1743+
buffers=None):
16181744
if isinstance(s, str):
16191745
raise TypeError("Can't load pickle from unicode string")
16201746
file = io.BytesIO(s)
1621-
return _Unpickler(file, fix_imports=fix_imports,
1747+
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
16221748
encoding=encoding, errors=errors).load()
16231749

16241750
# Use the faster _pickle if possible

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy