Skip to content

Commit 0d339a1

Browse files
committed
Support slicing in str_index_to_ptr, and fix a bounds error
1 parent 24371c7 commit 0d339a1

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

py/objstr.c

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -360,8 +360,9 @@ STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
360360
return MP_OBJ_NULL; // op not supported
361361
}
362362

363-
// Convert an index into a pointer to its lead byte, or raise IndexError if out of bounds
364-
STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj_t index) {
363+
// Convert an index into a pointer to its lead byte. Out of bounds indexing will raise IndexError or
364+
// be capped to the first/last character of the string, depending on is_slice.
365+
STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj_t index, bool is_slice) {
365366
machine_int_t i;
366367
// Copied from mp_get_index; I don't want bounds checking, just give me
367368
// the integer as-is. (I can't bounds-check without scanning the whole
@@ -377,29 +378,37 @@ STATIC const char *str_index_to_ptr(const char *self_data, uint self_len, mp_obj
377378
// Negative indexing is performed by counting from the end of the string.
378379
for (s = top - 1; i; --s) {
379380
if (s < self_data) {
381+
if (is_slice) {
382+
return self_data;
383+
}
380384
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range"));
381385
}
382386
if (!UTF8_IS_CONT(*s)) {
383387
++i;
384388
}
385389
}
386390
++s;
391+
} else if (!i) {
392+
return self_data; // Shortcut - str[0] is its base pointer
387393
} else {
388394
// Positive indexing, correspondingly, counts from the start of the string.
389395
// It's assumed that negative indexing will generally be used with small
390396
// absolute values (eg str[-1], not str[-1000000]), which means it'll be
391397
// more efficient this way.
392-
for (s = self_data; i; ++s) {
398+
for (s = self_data; true; ++s) {
393399
if (s >= top) {
400+
if (is_slice) {
401+
while (UTF8_IS_CONT(*--s));
402+
return s;
403+
}
394404
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_IndexError, "string index out of range"));
395405
}
396-
if (!UTF8_IS_CONT(*s)) {
397-
--i;
406+
while (UTF8_IS_CONT(*s)) {
407+
++s;
408+
}
409+
if (!i--) {
410+
return s;
398411
}
399-
}
400-
// Skip continuation bytes after the last lead byte
401-
while (UTF8_IS_CONT(*s)) {
402-
++s;
403412
}
404413
}
405414
return s;
@@ -424,7 +433,7 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
424433
uint index_val = mp_get_index(type, self_len, index, false);
425434
return MP_OBJ_NEW_SMALL_INT((mp_small_int_t)self_data[index_val]);
426435
}
427-
const char *s = str_index_to_ptr((const char *)self_data, self_len, index);
436+
const char *s = str_index_to_ptr((const char *)self_data, self_len, index, false);
428437
int len = 1;
429438
if (UTF8_IS_NONASCII(*s)) {
430439
// Count the number of 1 bits (after the first)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy