@@ -52,9 +52,6 @@ const mp_obj_t mp_const_empty_bytes;
52
52
// use this macro to extract the string data and length
53
53
#define GET_STR_DATA_LEN (str_obj_in , str_data , str_len ) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
54
54
55
- // use this macro to extract the string data and both lengths
56
- #define GET_STR_INFO (str_obj_in , str_data , str_len , str_charlen ) const byte *str_data; uint str_len, str_charlen; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
57
-
58
55
STATIC mp_obj_t mp_obj_new_str_iterator (mp_obj_t str );
59
56
STATIC mp_obj_t mp_obj_new_bytes_iterator (mp_obj_t str );
60
57
STATIC NORETURN void bad_implicit_conversion (mp_obj_t self_in );
@@ -365,7 +362,7 @@ STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
365
362
366
363
STATIC mp_obj_t str_subscr (mp_obj_t self_in , mp_obj_t index , mp_obj_t value ) {
367
364
mp_obj_type_t * type = mp_obj_get_type (self_in );
368
- GET_STR_INFO (self_in , self_data , self_len , self_charlen );
365
+ GET_STR_DATA_LEN (self_in , self_data , self_len );
369
366
if (value == MP_OBJ_SENTINEL ) {
370
367
// load
371
368
#if MICROPY_PY_BUILTINS_SLICE
@@ -378,7 +375,8 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
378
375
return mp_obj_new_str_of_type (type , self_data + slice .start , slice .stop - slice .start );
379
376
}
380
377
#endif
381
- uint index_val = mp_get_index (type , self_charlen , index , false);
378
+ // TODO: Don't use mp_get_index() here
379
+ uint index_val = mp_get_index (type , unichar_charlen ((const char * )self_data , self_len ), index , false);
382
380
if (type == & mp_type_bytes ) {
383
381
return MP_OBJ_NEW_SMALL_INT ((mp_small_int_t )self_data [index_val ]);
384
382
} else {
@@ -1734,7 +1732,7 @@ const mp_obj_type_t mp_type_bytes = {
1734
1732
};
1735
1733
1736
1734
// the zero-length bytes
1737
- STATIC const mp_obj_str_t empty_bytes_obj = {{& mp_type_bytes }, 0 , 0 , 0 , NULL };
1735
+ STATIC const mp_obj_str_t empty_bytes_obj = {{& mp_type_bytes }, 0 , 0 , NULL };
1738
1736
const mp_obj_t mp_const_empty_bytes = (mp_obj_t )& empty_bytes_obj ;
1739
1737
1740
1738
mp_obj_t mp_obj_str_builder_start (const mp_obj_type_t * type , uint len , byte * * data ) {
@@ -1761,20 +1759,6 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin
1761
1759
o -> base .type = type ;
1762
1760
o -> len = len ;
1763
1761
if (data ) {
1764
- if (MP_OBJ_IS_STR (o )) {
1765
- // Count non-continuation bytes so we know how long the string is in characters.
1766
- const byte * endptr , * top = data + len ;
1767
- uint charlen = 0 ;
1768
- for (endptr = data ; endptr < top ; ++ endptr ) {
1769
- if (!UTF8_IS_CONT (* endptr )) {
1770
- ++ charlen ;
1771
- }
1772
- }
1773
- o -> charlen = charlen ;
1774
- } else {
1775
- // For byte strings, the 'character' length (really the "exposed length" or "Python length") equals the byte length.
1776
- o -> charlen = len ;
1777
- }
1778
1762
o -> hash = qstr_compute_hash (data , len );
1779
1763
byte * p = m_new (byte , len + 1 );
1780
1764
o -> data = p ;
@@ -1844,8 +1828,8 @@ uint mp_obj_str_get_hash(mp_obj_t self_in) {
1844
1828
uint mp_obj_str_get_len (mp_obj_t self_in ) {
1845
1829
// TODO This has a double check for the type, one in obj.c and one here
1846
1830
if (MP_OBJ_IS_STR (self_in ) || MP_OBJ_IS_TYPE (self_in , & mp_type_bytes )) {
1847
- GET_STR_INFO (self_in , self_data , self_len , self_charlen ); ( void ) self_data ;
1848
- return self_charlen ;
1831
+ GET_STR_DATA_LEN (self_in , self_data , self_len ) ;
1832
+ return unichar_charlen (( const char * ) self_data , self_len ) ;
1849
1833
} else {
1850
1834
bad_implicit_conversion (self_in );
1851
1835
}
0 commit comments