Skip to content

Commit 878e0fb

Browse files
RogdhamAA-TurnerZeroIntensity
authored
gh-132983: Remove leftovers from EndlessZstdDecompressor (#133856)
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Peter Bierma <zintensitydev@gmail.com>
1 parent 1a87b6e commit 878e0fb

File tree

1 file changed

+30
-150
lines changed

1 file changed

+30
-150
lines changed

Modules/_zstd/decompressor.c

Lines changed: 30 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,11 @@ typedef struct {
4343
PyObject *unused_data;
4444

4545
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
46-
char needs_input;
47-
48-
/* For decompress(), 0 or 1.
49-
1 when both input and output streams are at a frame edge, means a
50-
frame is completely decoded and fully flushed, or the decompressor
51-
just be initialized. */
52-
char at_frame_edge;
46+
bool needs_input;
5347

5448
/* For ZstdDecompressor, 0 or 1.
5549
1 means the end of the first frame has been reached. */
56-
char eof;
57-
58-
/* Used for fast reset above three variables */
59-
char _unused_char_for_align;
50+
bool eof;
6051

6152
/* __init__ has been called, 0 or 1. */
6253
bool initialized;
@@ -258,19 +249,13 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
258249
return 0;
259250
}
260251

261-
typedef enum {
262-
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
263-
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
264-
} decompress_type;
265-
266252
/*
267-
Given the two types of decompressors (defined above),
268-
decompress implementation for <D>, <E>, pseudo code:
253+
Decompress implementation in pseudo code:
269254
270255
initialize_output_buffer
271256
while True:
272257
decompress_data
273-
set_object_flag # .eof for <D>, .at_frame_edge for <E>.
258+
set_object_flag # .eof
274259
275260
if output_buffer_exhausted:
276261
if output_buffer_reached_max_length:
@@ -287,63 +272,19 @@ typedef enum {
287272
flushing to do to complete current frame.
288273
289274
Note, decompressing "an empty input" in any case will make it > 0.
290-
291-
<E> supports multiple frames, has an .at_frame_edge flag, it means both the
292-
input and output streams are at a frame edge. The flag can be set by this
293-
statement:
294-
295-
.at_frame_edge = (zstd_ret == 0) ? 1 : 0
296-
297-
But if decompressing "an empty input" at "a frame edge", zstd_ret will be
298-
non-zero, then .at_frame_edge will be wrongly set to false. To solve this
299-
problem, two AFE checks are needed to ensure that: when at "a frame edge",
300-
empty input will not be decompressed.
301-
302-
// AFE check
303-
if (self->at_frame_edge && in->pos == in->size) {
304-
finish
305-
}
306-
307-
In <E>, if .at_frame_edge is eventually set to true, but input stream has
308-
unconsumed data (in->pos < in->size), then the outer function
309-
stream_decompress() will set .at_frame_edge to false. In this case,
310-
although the output stream is at a frame edge, for the caller, the input
311-
stream is not at a frame edge, see below diagram. This behavior does not
312-
affect the next AFE check, since (in->pos < in->size).
313-
314-
input stream: --------------|---
315-
^
316-
output stream: ====================|
317-
^
318275
*/
319276
static PyObject *
320277
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
321-
Py_ssize_t max_length,
322-
Py_ssize_t initial_size,
323-
decompress_type type)
278+
Py_ssize_t max_length)
324279
{
325280
size_t zstd_ret;
326281
ZSTD_outBuffer out;
327282
_BlocksOutputBuffer buffer = {.list = NULL};
328283
PyObject *ret;
329284

330-
/* The first AFE check for setting .at_frame_edge flag */
331-
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
332-
if (self->at_frame_edge && in->pos == in->size) {
333-
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
334-
}
335-
}
336-
337285
/* Initialize the output buffer */
338-
if (initial_size >= 0) {
339-
if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
340-
goto error;
341-
}
342-
}
343-
else {
344-
if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
345-
goto error;
346-
}
286+
if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
287+
goto error;
347288
}
348289
assert(out.pos == 0);
349290

@@ -362,22 +303,11 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
362303
goto error;
363304
}
364305

365-
/* Set .eof/.af_frame_edge flag */
366-
if (type == TYPE_DECOMPRESSOR) {
367-
/* ZstdDecompressor class stops when a frame is decompressed */
368-
if (zstd_ret == 0) {
369-
self->eof = 1;
370-
break;
371-
}
372-
}
373-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
374-
/* decompress() function supports multiple frames */
375-
self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
376-
377-
/* The second AFE check for setting .at_frame_edge flag */
378-
if (self->at_frame_edge && in->pos == in->size) {
379-
break;
380-
}
306+
/* Set .eof flag */
307+
if (zstd_ret == 0) {
308+
/* Stop when a frame is decompressed */
309+
self->eof = 1;
310+
break;
381311
}
382312

383313
/* Need to check out before in. Maybe zstd's internal buffer still has
@@ -415,8 +345,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
415345
}
416346

417347
static void
418-
decompressor_reset_session(ZstdDecompressor *self,
419-
decompress_type type)
348+
decompressor_reset_session(ZstdDecompressor *self)
420349
{
421350
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
422351
// and ensure lock is always held
@@ -425,56 +354,28 @@ decompressor_reset_session(ZstdDecompressor *self,
425354
self->in_begin = 0;
426355
self->in_end = 0;
427356

428-
if (type == TYPE_DECOMPRESSOR) {
429-
Py_CLEAR(self->unused_data);
430-
}
357+
Py_CLEAR(self->unused_data);
431358

432359
/* Reset variables in one operation */
433360
self->needs_input = 1;
434-
self->at_frame_edge = 1;
435361
self->eof = 0;
436-
self->_unused_char_for_align = 0;
437362

438-
/* Resetting session never fail */
363+
/* Resetting session is guaranteed to never fail */
439364
ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
440365
}
441366

442367
static PyObject *
443-
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
444-
decompress_type type)
368+
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length)
445369
{
446-
Py_ssize_t initial_buffer_size = -1;
447370
ZSTD_inBuffer in;
448371
PyObject *ret = NULL;
449372
int use_input_buffer;
450373

451-
if (type == TYPE_DECOMPRESSOR) {
452-
/* Check .eof flag */
453-
if (self->eof) {
454-
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
455-
assert(ret == NULL);
456-
goto success;
457-
}
458-
}
459-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
460-
/* Fast path for the first frame */
461-
if (self->at_frame_edge && self->in_begin == self->in_end) {
462-
/* Read decompressed size */
463-
uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
464-
465-
/* These two zstd constants always > PY_SSIZE_T_MAX:
466-
ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
467-
ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
468-
469-
Use ZSTD_findFrameCompressedSize() to check complete frame,
470-
prevent allocating too much memory for small input chunk. */
471-
472-
if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
473-
!ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
474-
{
475-
initial_buffer_size = (Py_ssize_t) decompressed_size;
476-
}
477-
}
374+
/* Check .eof flag */
375+
if (self->eof) {
376+
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
377+
assert(ret == NULL);
378+
return NULL;
478379
}
479380

480381
/* Prepare input buffer w/wo unconsumed data */
@@ -561,30 +462,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
561462
assert(in.pos == 0);
562463

563464
/* Decompress */
564-
ret = decompress_impl(self, &in,
565-
max_length, initial_buffer_size,
566-
type);
465+
ret = decompress_impl(self, &in, max_length);
567466
if (ret == NULL) {
568467
goto error;
569468
}
570469

571470
/* Unconsumed input data */
572471
if (in.pos == in.size) {
573-
if (type == TYPE_DECOMPRESSOR) {
574-
if (Py_SIZE(ret) == max_length || self->eof) {
575-
self->needs_input = 0;
576-
}
577-
else {
578-
self->needs_input = 1;
579-
}
472+
if (Py_SIZE(ret) == max_length || self->eof) {
473+
self->needs_input = 0;
580474
}
581-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
582-
if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
583-
self->needs_input = 0;
584-
}
585-
else {
586-
self->needs_input = 1;
587-
}
475+
else {
476+
self->needs_input = 1;
588477
}
589478

590479
if (use_input_buffer) {
@@ -598,10 +487,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
598487

599488
self->needs_input = 0;
600489

601-
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
602-
self->at_frame_edge = 0;
603-
}
604-
605490
if (!use_input_buffer) {
606491
/* Discard buffer if it's too small
607492
(resizing it may needlessly copy the current contents) */
@@ -634,16 +519,14 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
634519
}
635520
}
636521

637-
goto success;
522+
return ret;
638523

639524
error:
640525
/* Reset decompressor's states/session */
641-
decompressor_reset_session(self, type);
526+
decompressor_reset_session(self);
642527

643528
Py_CLEAR(ret);
644-
success:
645-
646-
return ret;
529+
return NULL;
647530
}
648531

649532

@@ -668,9 +551,6 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
668551
/* needs_input flag */
669552
self->needs_input = 1;
670553

671-
/* at_frame_edge flag */
672-
self->at_frame_edge = 1;
673-
674554
/* Decompression context */
675555
self->dctx = ZSTD_createDCtx();
676556
if (self->dctx == NULL) {
@@ -837,7 +717,7 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
837717
/* Thread-safe code */
838718
Py_BEGIN_CRITICAL_SECTION(self);
839719

840-
ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
720+
ret = stream_decompress(self, data, max_length);
841721
Py_END_CRITICAL_SECTION();
842722
return ret;
843723
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy