Skip to content

Commit 5858d8c

Browse files
committed
gh-87790: support underscore for formatting fractional part of floats
```pycon >>> f"{123_456.123_456:_._f}" # Whole and fractional '123_456.123_456' >>> f"{123_456.123_456:_f}" # Integer component only '123_456.123456' >>> f"{123_456.123_456:._f}" # Fractional component only '123456.123_456' >>> f"{123_456.123_456:.4_f}" # with precision '123456.1_235' ```
1 parent a726ce7 commit 5858d8c

File tree

4 files changed

+122
-28
lines changed

4 files changed

+122
-28
lines changed

Doc/library/string.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,12 +312,13 @@ non-empty format specification typically modifies the result.
312312
The general form of a *standard format specifier* is:
313313

314314
.. productionlist:: format-spec
315-
format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision`][`type`]
315+
format_spec: [[`fill`]`align`][`sign`]["z"]["#"]["0"][`width`][`grouping_option`]["." `precision` [`fraction_grouping`]][`type`]
316316
fill: <any character>
317317
align: "<" | ">" | "=" | "^"
318318
sign: "+" | "-" | " "
319319
width: `~python-grammar:digit`+
320320
grouping_option: "_" | ","
321+
fraction_grouping: "_"
321322
precision: `~python-grammar:digit`+
322323
type: "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"
323324

@@ -448,6 +449,13 @@ indicates the maximum field size - in other words, how many characters will be
448449
used from the field content. The *precision* is not allowed for integer
449450
presentation types.
450451

452+
The ``'_'`` option after *precision* means the use of an underscore for a
453+
thousands separator of the fractional part for floating-point presentation
454+
types.
455+
456+
.. versionchanged:: 3.14
457+
Support underscore as a thousands separator for the fractional part.
458+
451459
Finally, the *type* determines how the data should be presented.
452460

453461
The available string presentation types are:

Lib/test/test_float.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,18 @@ def test_format(self):
754754
self.assertEqual(format(INF, 'f'), 'inf')
755755
self.assertEqual(format(INF, 'F'), 'INF')
756756

757+
# underscores
758+
x = 123_456.123_456
759+
self.assertEqual(format(x, '_f'), '123_456.123456')
760+
self.assertEqual(format(x, '._f'), '123456.123_456')
761+
self.assertEqual(format(x, '_._f'), '123_456.123_456')
762+
self.assertEqual(format(x, '.10_f'), '123456.1_234_560_000')
763+
self.assertEqual(format(x, '>21._f'), ' 123456.123_456')
764+
self.assertEqual(format(x, '<21._f'), '123456.123_456 ')
765+
self.assertEqual(format(x, '+.11_e'), '+1.23_456_123_456e+05')
766+
767+
self.assertRaises(ValueError, format, x , '._6f')
768+
757769
@support.requires_IEEE_754
758770
def test_format_testfile(self):
759771
with open(format_testfile, encoding="utf-8") as testfile:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Support underscore as a thousands separator in the fractional part for
2+
floating-point presentation types of the new-style string formatting (with
3+
:func:`format` or :ref:`f-strings`). Patch by Sergey B Kirpichev.

Python/formatter_unicode.c

Lines changed: 98 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ typedef struct {
135135
Py_ssize_t width;
136136
enum LocaleType thousands_separators;
137137
Py_ssize_t precision;
138+
enum LocaleType frac_thousands_separator;
138139
Py_UCS4 type;
139140
} InternalFormatSpec;
140141

@@ -171,6 +172,7 @@ parse_internal_render_format_spec(PyObject *obj,
171172
format->sign = '\0';
172173
format->width = -1;
173174
format->thousands_separators = LT_NO_LOCALE;
175+
format->frac_thousands_separator = LT_NO_LOCALE;
174176
format->precision = -1;
175177
format->type = default_type;
176178

@@ -260,7 +262,16 @@ parse_internal_render_format_spec(PyObject *obj,
260262
/* Overflow error. Exception already set. */
261263
return 0;
262264

263-
/* Not having a precision after a dot is an error. */
265+
if (end-pos && READ_spec(pos) == '_') {
266+
if (consumed == 0) {
267+
format->precision = -1;
268+
}
269+
format->frac_thousands_separator = LT_UNDERSCORE_LOCALE;
270+
++pos;
271+
++consumed;
272+
}
273+
274+
/* Not having a precision or underscore after a dot is an error. */
264275
if (consumed == 0) {
265276
PyErr_Format(PyExc_ValueError,
266277
"Format specifier missing precision");
@@ -402,6 +413,7 @@ fill_padding(_PyUnicodeWriter *writer,
402413
typedef struct {
403414
PyObject *decimal_point;
404415
PyObject *thousands_sep;
416+
PyObject *frac_thousands_sep;
405417
const char *grouping;
406418
char *grouping_buffer;
407419
} LocaleInfo;
@@ -423,6 +435,8 @@ typedef struct {
423435
Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424436
excluding the decimal itself, if
425437
present. */
438+
Py_ssize_t n_frac;
439+
Py_ssize_t n_grouped_frac_digits;
426440

427441
/* These 2 are not the widths of fields, but are needed by
428442
STRINGLIB_GROUPING. */
@@ -445,24 +459,32 @@ typedef struct {
445459
*/
446460
static void
447461
parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448-
Py_ssize_t *n_remainder, int *has_decimal)
462+
Py_ssize_t *n_remainder, Py_ssize_t *n_frac, int *has_decimal)
449463
{
450-
Py_ssize_t remainder;
464+
Py_ssize_t frac;
451465
int kind = PyUnicode_KIND(s);
452466
const void *data = PyUnicode_DATA(s);
453467

454-
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
468+
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) {
455469
++pos;
456-
remainder = pos;
470+
}
471+
frac = pos;
457472

458473
/* Does remainder start with a decimal point? */
459-
*has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
474+
*has_decimal = pos<end && PyUnicode_READ(kind, data, frac) == '.';
460475

461476
/* Skip the decimal point. */
462-
if (*has_decimal)
463-
remainder++;
477+
if (*has_decimal) {
478+
frac++;
479+
pos++;
480+
}
481+
482+
while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos))) {
483+
++pos;
484+
}
464485

465-
*n_remainder = end - remainder;
486+
*n_frac = pos - frac;
487+
*n_remainder = end - pos;
466488
}
467489

468490
/* not all fields of format are used. for example, precision is
@@ -473,18 +495,19 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
473495
static Py_ssize_t
474496
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475497
Py_UCS4 sign_char, Py_ssize_t n_start,
476-
Py_ssize_t n_end, Py_ssize_t n_remainder,
498+
Py_ssize_t n_end, Py_ssize_t n_remainder, Py_ssize_t n_frac,
477499
int has_decimal, const LocaleInfo *locale,
478500
const InternalFormatSpec *format, Py_UCS4 *maxchar)
479501
{
480502
Py_ssize_t n_non_digit_non_padding;
481503
Py_ssize_t n_padding;
482504

483-
spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
505+
spec->n_digits = n_end - n_start - n_frac - n_remainder - (has_decimal?1:0);
484506
spec->n_lpadding = 0;
485507
spec->n_prefix = n_prefix;
486508
spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487509
spec->n_remainder = n_remainder;
510+
spec->n_frac = n_frac;
488511
spec->n_spadding = 0;
489512
spec->n_rpadding = 0;
490513
spec->sign = '\0';
@@ -530,7 +553,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
530553

531554
/* The number of chars used for non-digits and non-padding. */
532555
n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533-
spec->n_remainder;
556+
+ spec->n_frac + spec->n_remainder;
534557

535558
/* min_width can go negative, that's okay. format->width == -1 means
536559
we don't care. */
@@ -557,12 +580,29 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
557580
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
558581
}
559582

583+
if (spec->n_frac == 0) {
584+
spec->n_grouped_frac_digits = 0;
585+
}
586+
else {
587+
Py_UCS4 grouping_maxchar;
588+
spec->n_grouped_frac_digits = _PyUnicode_InsertThousandsGrouping(
589+
NULL, 0,
590+
NULL, 0, spec->n_frac,
591+
spec->n_frac,
592+
locale->grouping, locale->frac_thousands_sep, &grouping_maxchar);
593+
if (spec->n_grouped_frac_digits == -1) {
594+
return -1;
595+
}
596+
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
597+
}
598+
560599
/* Given the desired width and the total of digit and non-digit
561600
space we consume, see if we need any padding. format->width can
562601
be negative (meaning no padding), but this code still works in
563602
that case. */
564603
n_padding = format->width -
565-
(n_non_digit_non_padding + spec->n_grouped_digits);
604+
(n_non_digit_non_padding + spec->n_grouped_digits
605+
+ spec->n_grouped_frac_digits - spec->n_frac);
566606
if (n_padding > 0) {
567607
/* Some padding is needed. Determine if it's left, space, or right. */
568608
switch (format->align) {
@@ -593,7 +633,7 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
593633

594634
return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595635
spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596-
spec->n_remainder + spec->n_rpadding;
636+
spec->n_grouped_frac_digits + spec->n_remainder + spec->n_rpadding;
597637
}
598638

599639
/* Fill in the digit parts of a number's string representation,
@@ -677,6 +717,19 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
677717
d_pos += 1;
678718
}
679719

720+
if (spec->n_frac) {
721+
r = _PyUnicode_InsertThousandsGrouping(
722+
writer, spec->n_grouped_frac_digits,
723+
digits, d_pos, spec->n_frac, spec->n_frac,
724+
locale->grouping, locale->frac_thousands_sep, NULL);
725+
if (r == -1) {
726+
return -1;
727+
}
728+
assert(r == spec->n_grouped_frac_digits);
729+
d_pos += spec->n_frac;
730+
writer->pos += spec->n_grouped_frac_digits;
731+
}
732+
680733
if (spec->n_remainder) {
681734
_PyUnicode_FastCopyCharacters(
682735
writer->buffer, writer->pos,
@@ -701,7 +754,8 @@ static const char no_grouping[1] = {CHAR_MAX};
701754
LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702755
LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703756
static int
704-
get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
757+
get_locale_info(enum LocaleType type, enum LocaleType frac_type,
758+
LocaleInfo *locale_info)
705759
{
706760
switch (type) {
707761
case LT_CURRENT_LOCALE: {
@@ -746,6 +800,15 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
746800
locale_info->grouping = no_grouping;
747801
break;
748802
}
803+
if (frac_type == LT_UNDERSCORE_LOCALE) {
804+
locale_info->frac_thousands_sep = PyUnicode_FromOrdinal('_');
805+
if (locale_info->grouping == no_grouping) {
806+
locale_info->grouping = "\3";
807+
}
808+
}
809+
else {
810+
locale_info->frac_thousands_sep = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
811+
}
749812
return 0;
750813
}
751814

@@ -754,6 +817,7 @@ free_locale_info(LocaleInfo *locale_info)
754817
{
755818
Py_XDECREF(locale_info->decimal_point);
756819
Py_XDECREF(locale_info->thousands_sep);
820+
Py_XDECREF(locale_info->frac_thousands_sep);
757821
PyMem_Free(locale_info->grouping_buffer);
758822
}
759823

@@ -1005,13 +1069,13 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
10051069

10061070
/* Determine the grouping, separator, and decimal point, if any. */
10071071
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008-
format->thousands_separators,
1072+
format->thousands_separators, 0,
10091073
&locale) == -1)
10101074
goto done;
10111075

10121076
/* Calculate how much memory we'll need. */
10131077
n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014-
inumeric_chars + n_digits, n_remainder, 0,
1078+
inumeric_chars + n_digits, n_remainder, 0, 0,
10151079
&locale, format, &maxchar);
10161080
if (n_total == -1) {
10171081
goto done;
@@ -1046,6 +1110,7 @@ format_float_internal(PyObject *value,
10461110
char *buf = NULL; /* buffer returned from PyOS_double_to_string */
10471111
Py_ssize_t n_digits;
10481112
Py_ssize_t n_remainder;
1113+
Py_ssize_t n_frac;
10491114
Py_ssize_t n_total;
10501115
int has_decimal;
10511116
double val;
@@ -1125,7 +1190,8 @@ format_float_internal(PyObject *value,
11251190
if (format->sign != '+' && format->sign != ' '
11261191
&& format->width == -1
11271192
&& format->type != 'n'
1128-
&& !format->thousands_separators)
1193+
&& !format->thousands_separators
1194+
&& !format->frac_thousands_separator)
11291195
{
11301196
/* Fast path */
11311197
result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
@@ -1151,18 +1217,20 @@ format_float_internal(PyObject *value,
11511217

11521218
/* Determine if we have any "remainder" (after the digits, might include
11531219
decimal or exponent or both (or neither)) */
1154-
parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1220+
parse_number(unicode_tmp, index, index + n_digits,
1221+
&n_remainder, &n_frac, &has_decimal);
11551222

11561223
/* Determine the grouping, separator, and decimal point, if any. */
11571224
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
11581225
format->thousands_separators,
1226+
format->frac_thousands_separator,
11591227
&locale) == -1)
11601228
goto done;
11611229

11621230
/* Calculate how much memory we'll need. */
11631231
n_total = calc_number_widths(&spec, 0, sign_char, index,
1164-
index + n_digits, n_remainder, has_decimal,
1165-
&locale, format, &maxchar);
1232+
index + n_digits, n_remainder, n_frac,
1233+
has_decimal, &locale, format, &maxchar);
11661234
if (n_total == -1) {
11671235
goto done;
11681236
}
@@ -1202,6 +1270,8 @@ format_complex_internal(PyObject *value,
12021270
Py_ssize_t n_im_digits;
12031271
Py_ssize_t n_re_remainder;
12041272
Py_ssize_t n_im_remainder;
1273+
Py_ssize_t n_re_frac;
1274+
Py_ssize_t n_im_frac;
12051275
Py_ssize_t n_re_total;
12061276
Py_ssize_t n_im_total;
12071277
int re_has_decimal;
@@ -1330,13 +1400,14 @@ format_complex_internal(PyObject *value,
13301400
/* Determine if we have any "remainder" (after the digits, might include
13311401
decimal or exponent or both (or neither)) */
13321402
parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333-
&n_re_remainder, &re_has_decimal);
1403+
&n_re_remainder, &n_re_frac, &re_has_decimal);
13341404
parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335-
&n_im_remainder, &im_has_decimal);
1405+
&n_im_remainder, &n_im_frac, &im_has_decimal);
13361406

13371407
/* Determine the grouping, separator, and decimal point, if any. */
13381408
if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
13391409
format->thousands_separators,
1410+
format->frac_thousands_separator,
13401411
&locale) == -1)
13411412
goto done;
13421413

@@ -1349,8 +1420,8 @@ format_complex_internal(PyObject *value,
13491420
/* Calculate how much memory we'll need. */
13501421
n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
13511422
i_re, i_re + n_re_digits, n_re_remainder,
1352-
re_has_decimal, &locale, &tmp_format,
1353-
&maxchar);
1423+
n_re_frac, re_has_decimal, &locale,
1424+
&tmp_format, &maxchar);
13541425
if (n_re_total == -1) {
13551426
goto done;
13561427
}
@@ -1362,8 +1433,8 @@ format_complex_internal(PyObject *value,
13621433
tmp_format.sign = '+';
13631434
n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
13641435
i_im, i_im + n_im_digits, n_im_remainder,
1365-
im_has_decimal, &locale, &tmp_format,
1366-
&maxchar);
1436+
n_im_frac, im_has_decimal, &locale,
1437+
&tmp_format, &maxchar);
13671438
if (n_im_total == -1) {
13681439
goto done;
13691440
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy