From 0d49ccb66c544820c8b75e11c5a66eb8973b9e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 21:43:08 +0100 Subject: [PATCH 01/27] improve performance of UUIDs creation --- Lib/uuid.py | 84 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 9c6ad9643cf6d5..96d48ce04730a1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -85,6 +85,14 @@ class SafeUUID: unknown = None +_RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff +_RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 +_RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 +_RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 +_RFC_4122_VERSION_5_FLAGS = 0x0000_0000_0000_5000_8000_0000_0000_0000 +_RFC_4122_VERSION_8_FLAGS = 0x0000_0000_0000_8000_8000_0000_0000_0000 + + class UUID: """Instances of the UUID class represent UUIDs as specified in RFC 4122. UUID objects are immutable, hashable, and usable as dictionary keys. @@ -174,45 +182,49 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if [hex, bytes, bytes_le, fields, int].count(None) != 4: raise TypeError('one of the hex, bytes, bytes_le, fields, ' 'or int arguments must be given') - if hex is not None: + if int is not None: + pass + elif hex is not None: hex = hex.replace('urn:', '').replace('uuid:', '') hex = hex.strip('{}').replace('-', '') if len(hex) != 32: raise ValueError('badly formed hexadecimal UUID string') int = int_(hex, 16) - if bytes_le is not None: + elif bytes_le is not None: if len(bytes_le) != 16: raise ValueError('bytes_le is not a 16-char string') + assert isinstance(bytes_le, bytes_), repr(bytes_le) bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] + bytes_le[8-1:6-1:-1] + bytes_le[8:]) - if bytes is not None: + int = int_.from_bytes(bytes) + elif bytes is not None: if len(bytes) != 16: raise ValueError('bytes is not a 16-char string') assert isinstance(bytes, bytes_), repr(bytes) int = int_.from_bytes(bytes) # big endian - if fields is not None: + elif fields is not None: if len(fields) != 6: raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields - if not 0 <= time_low < 1<<32: + if time_low < 0 or time_low > 0xffff_ffff: raise ValueError('field 1 out of range (need a 32-bit value)') - if not 0 <= time_mid < 1<<16: + if time_mid < 0 or time_mid > 0xffff: raise ValueError('field 2 out of range (need a 16-bit value)') - if not 0 <= time_hi_version < 1<<16: + if time_hi_version < 0 or time_hi_version > 0xffff: raise ValueError('field 3 out of range (need a 16-bit value)') - if not 0 <= clock_seq_hi_variant < 1<<8: + if clock_seq_hi_variant < 0 or clock_seq_hi_variant > 0xff: raise ValueError('field 4 out of range (need an 8-bit value)') - if not 0 <= clock_seq_low < 1<<8: + if clock_seq_low < 0 or clock_seq_low > 0xff: raise ValueError('field 5 out of range (need an 8-bit value)') - if not 0 <= node < 1<<48: + if node < 0 or node > 0xffff_ffff_ffff: raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | (time_hi_version << 64) | (clock_seq << 48) | node) - if int is not None: - if not 0 <= int < 1<<128: - raise ValueError('int is out of range (need a 128-bit value)') + # "x < a or int > b" is slightly faster than "not (a <= x <= b)" + if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: + raise ValueError('int is out of range (need a 128-bit value)') if version is not None: if not 1 <= version <= 8: raise ValueError('illegal version number') @@ -686,38 +698,52 @@ def uuid1(node=None, clock_seq=None): if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage + else: + clock_seq = clock_seq & 0x3fff time_low = timestamp & 0xffffffff time_mid = (timestamp >> 32) & 0xffff time_hi_version = (timestamp >> 48) & 0x0fff - clock_seq_low = clock_seq & 0xff - clock_seq_hi_variant = (clock_seq >> 8) & 0x3f if node is None: node = getnode() - return UUID(fields=(time_low, time_mid, time_hi_version, - clock_seq_hi_variant, clock_seq_low, node), version=1) + int_uuid_1 = ((time_low << 96) | (time_mid << 80) | + (time_hi_version << 64) | (clock_seq << 48) | node) + # by construction, the variant and version bits are already cleared + int_uuid_1 |= _RFC_4122_VERSION_1_FLAGS + return UUID(int=int_uuid_1, version=None) def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - from hashlib import md5 - digest = md5( - namespace.bytes + name, - usedforsecurity=False - ).digest() - return UUID(bytes=digest[:16], version=3) + # HACL*-based MD5 is slightly faster than its OpenSSL version, + # and 'import X; X.Y' is slightly faster than 'from X import Y'. + import _md5 + h = _md5.md5(namespace.bytes + name, usedforsecurity=False) + assert len(h.digest()) == 16 + int_uuid_3 = int_.from_bytes(h.digest()) + int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK + int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS + return UUID(int=int_uuid_3, version=None) def uuid4(): """Generate a random UUID.""" - return UUID(bytes=os.urandom(16), version=4) + int_uuid_4 = int_.from_bytes(os.urandom(16)) + int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK + int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS + return UUID(int=int_uuid_4, version=None) def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - from hashlib import sha1 - hash = sha1(namespace.bytes + name).digest() - return UUID(bytes=hash[:16], version=5) + # OpenSSL-based SHA-1 is slightly faster than its HACL* version, + # and 'import X; X.Y' is slightly faster than 'from X import Y'. + import hashlib + h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) + int_uuid_5 = int_.from_bytes(h.digest()[:16]) + int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK + int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS + return UUID(int=int_uuid_5, version=None) def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -740,7 +766,9 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 = (a & 0xffff_ffff_ffff) << 80 int_uuid_8 |= (b & 0xfff) << 64 int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff - return UUID(int=int_uuid_8, version=8) + # by construction, the variant and version bits are already cleared + int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS + return UUID(int=int_uuid_8, version=None) def main(): """Run the uuid command line interface.""" From 603335fc13429a7dcf3358131557cdca3e8645b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:59:47 +0100 Subject: [PATCH 02/27] add What's New entry --- Doc/whatsnew/3.14.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d13cd2d5173a04..013b7eb085a88e 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -659,6 +659,28 @@ io file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in :gh:`120754` and :gh:`90102`.) + +uuid +---- + +* Improve generations of UUID objects via their dedicated functions: + + * For a given 48-bit hardware address *node* and a given 14-bit + clock sequence *clock_seq*, :func:`uuid1(node=node) ` + and :func:`uuid1(clock_seq=clock_seq) ` are 35% faster. + Performances for :func:`~uuid.uuid1` remain unchanged when neither + the hardware address nor the clock sequence is specified. + * :func:`~uuid.uuid3` is 27% faster for 16-byte names and 8% faster + for 1024-byte names. Performances for longer names remain unchanged. + * :func:`~uuid.uuid5` is 24% faster for 16-byte names and 11% faster + for 1024-byte names. Performances for longer names remain unchanged. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 20% faster. + + Overall, dedicated generation of UUID objects is 20% faster. + + (Contributed by Bénédikt Tran in :gh:`XXX`.) + + Deprecated ========== From 154ff8b7b5f2bf51ecef886422100e1a5e466b60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:13:01 +0100 Subject: [PATCH 03/27] blurb --- .../next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst new file mode 100644 index 00000000000000..b7c98469407fbb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -0,0 +1,2 @@ +Improve generations of UUID objects via their dedicated functions by 20%. +Patch by Bénédikt Tran. From b965887569ad422d304e53df14043a70c8e4030d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:34:43 +0100 Subject: [PATCH 04/27] fix issue number --- Doc/whatsnew/3.14.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 013b7eb085a88e..443636fe1bf408 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -678,7 +678,7 @@ uuid Overall, dedicated generation of UUID objects is 20% faster. - (Contributed by Bénédikt Tran in :gh:`XXX`.) + (Contributed by Bénédikt Tran in :gh:`128150`.) Deprecated From a8a1894a7f4fb4f034036c4ebcee043ffa4d057c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:34:59 +0100 Subject: [PATCH 05/27] fix typos --- Doc/whatsnew/3.14.rst | 3 ++- .../Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 443636fe1bf408..359cd1e26dd744 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -663,7 +663,8 @@ io uuid ---- -* Improve generations of UUID objects via their dedicated functions: +* Improve generation of :class:`~uuid.UUID` objects via their dedicated + functions: * For a given 48-bit hardware address *node* and a given 14-bit clock sequence *clock_seq*, :func:`uuid1(node=node) ` diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index b7c98469407fbb..1a17fa27535004 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ -Improve generations of UUID objects via their dedicated functions by 20%. -Patch by Bénédikt Tran. +Improve generation of :class:`~uuid.UUID` objects via their dedicated +functions by 20%. Patch by Bénédikt Tran. From c8aa75256d80b1de87fb7c874b723e3f73ab3e38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:38:08 +0100 Subject: [PATCH 06/27] ensure 14-bit clock sequence --- Lib/uuid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 96d48ce04730a1..b47d9fc64c0bc4 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -698,13 +698,12 @@ def uuid1(node=None, clock_seq=None): if clock_seq is None: import random clock_seq = random.getrandbits(14) # instead of stable storage - else: - clock_seq = clock_seq & 0x3fff time_low = timestamp & 0xffffffff time_mid = (timestamp >> 32) & 0xffff time_hi_version = (timestamp >> 48) & 0x0fff if node is None: node = getnode() + clock_seq = clock_seq & 0x3fff int_uuid_1 = ((time_low << 96) | (time_mid << 80) | (time_hi_version << 64) | (clock_seq << 48) | node) # by construction, the variant and version bits are already cleared From a2278b88337abfa2ea42bb000a081386fee3adb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 15:18:44 +0100 Subject: [PATCH 07/27] add dedicated private fast constructor --- Lib/uuid.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index b47d9fc64c0bc4..56071b58993f09 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -237,6 +237,15 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, object.__setattr__(self, 'int', int) object.__setattr__(self, 'is_safe', is_safe) + @classmethod + def _from_int(cls, int, *, is_safe=SafeUUID.unknown): + self = cls.__new__(cls) + if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: + raise ValueError('int is out of range (need a 128-bit value)') + object.__setattr__(self, 'int', int) + object.__setattr__(self, 'is_safe', is_safe) + return self + def __getstate__(self): d = {'int': self.int} if self.is_safe != SafeUUID.unknown: @@ -722,14 +731,14 @@ def uuid3(namespace, name): int_uuid_3 = int_.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS - return UUID(int=int_uuid_3, version=None) + return UUID._from_int(int_uuid_3) def uuid4(): """Generate a random UUID.""" int_uuid_4 = int_.from_bytes(os.urandom(16)) int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS - return UUID(int=int_uuid_4, version=None) + return UUID._from_int(int_uuid_4) def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" @@ -742,7 +751,7 @@ def uuid5(namespace, name): int_uuid_5 = int_.from_bytes(h.digest()[:16]) int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS - return UUID(int=int_uuid_5, version=None) + return UUID._from_int(int_uuid_5) def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -767,7 +776,7 @@ def uuid8(a=None, b=None, c=None): int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff # by construction, the variant and version bits are already cleared int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS - return UUID(int=int_uuid_8, version=None) + return UUID._from_int(int_uuid_8) def main(): """Run the uuid command line interface.""" From 0710549d83f22560726e5f09bef0c3013fd838c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 15:18:48 +0100 Subject: [PATCH 08/27] revert UUIDv1 construction --- Doc/whatsnew/3.14.rst | 14 +++++--------- Lib/uuid.py | 10 ++++------ .../2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 4 ++-- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7783064465c745..8d55e9e3b2af2c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -675,18 +675,14 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * For a given 48-bit hardware address *node* and a given 14-bit - clock sequence *clock_seq*, :func:`uuid1(node=node) ` - and :func:`uuid1(clock_seq=clock_seq) ` are 35% faster. - Performances for :func:`~uuid.uuid1` remain unchanged when neither - the hardware address nor the clock sequence is specified. - * :func:`~uuid.uuid3` is 27% faster for 16-byte names and 8% faster + * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid5` is 24% faster for 16-byte names and 11% faster + * :func:`~uuid.uuid5` is 38% faster for 16-byte names and 21% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 20% faster. + * :func:`~uuid.uuid4` is 31% faster and :func:`~uuid.uuid8` is 37% faster. - Overall, dedicated generation of UUID objects is 20% faster. + Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is + roughly 30% faster. (Contributed by Bénédikt Tran in :gh:`128150`.) diff --git a/Lib/uuid.py b/Lib/uuid.py index 56071b58993f09..d68007ba295ca2 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -710,14 +710,12 @@ def uuid1(node=None, clock_seq=None): time_low = timestamp & 0xffffffff time_mid = (timestamp >> 32) & 0xffff time_hi_version = (timestamp >> 48) & 0x0fff + clock_seq_low = clock_seq & 0xff + clock_seq_hi_variant = (clock_seq >> 8) & 0x3f if node is None: node = getnode() - clock_seq = clock_seq & 0x3fff - int_uuid_1 = ((time_low << 96) | (time_mid << 80) | - (time_hi_version << 64) | (clock_seq << 48) | node) - # by construction, the variant and version bits are already cleared - int_uuid_1 |= _RFC_4122_VERSION_1_FLAGS - return UUID(int=int_uuid_1, version=None) + return UUID(fields=(time_low, time_mid, time_hi_version, + clock_seq_hi_variant, clock_seq_low, node), version=1) def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 1a17fa27535004..04c744fb2ba54f 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ -Improve generation of :class:`~uuid.UUID` objects via their dedicated -functions by 20%. Patch by Bénédikt Tran. +Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 +via their dedicated functions by 30%. Patch by Bénédikt Tran. From 5b6922f554f55659944995403a739797c806ef89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Dec 2024 11:08:19 +0100 Subject: [PATCH 09/27] change eager check into an assertion check for internal constructor --- Lib/uuid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index d68007ba295ca2..41283aebce91ca 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -239,9 +239,9 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, @classmethod def _from_int(cls, int, *, is_safe=SafeUUID.unknown): + """Internal use only.""" + assert int >= 0 and int <= 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff self = cls.__new__(cls) - if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: - raise ValueError('int is out of range (need a 128-bit value)') object.__setattr__(self, 'int', int) object.__setattr__(self, 'is_safe', is_safe) return self From e631593bfb46560a42afa27b2bd39c021467a68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Dec 2024 11:19:39 +0100 Subject: [PATCH 10/27] update performance results --- Doc/whatsnew/3.14.rst | 8 ++++---- .../2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8d55e9e3b2af2c..4cfa829530cf6b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -675,14 +675,14 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster + * :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid5` is 38% faster for 16-byte names and 21% faster + * :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster for 1024-byte names. Performances for longer names remain unchanged. - * :func:`~uuid.uuid4` is 31% faster and :func:`~uuid.uuid8` is 37% faster. + * :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is - roughly 30% faster. + roughly 20% faster. (Contributed by Bénédikt Tran in :gh:`128150`.) diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 04c744fb2ba54f..5a1d65f044171e 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 -via their dedicated functions by 30%. Patch by Bénédikt Tran. +via their dedicated functions by 20%. Patch by Bénédikt Tran. From 1c1090163b05b23d46260bc7fe8db00893a0aa16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:29:43 +0100 Subject: [PATCH 11/27] describe constants --- Lib/uuid.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/uuid.py b/Lib/uuid.py index 41283aebce91ca..b35df37fe574ab 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -85,7 +85,14 @@ class SafeUUID: unknown = None +_UINT_128_MAX = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff +# 128-bit mask to clear the variant and version bits of a UUID integral value +# +# This is equivalent to the 2-complement of '(0xc000 << 48) | (0xf000 << 64)'. _RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff +# RFC 4122 variant bits and version bits to activate on a UUID integral value. +# +# The values are equivalent to '(version << 76) | (0x8000 << 48)'. _RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 _RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 _RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 From 0bc7321c7225bc888b060140f2b87305de872581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:30:11 +0100 Subject: [PATCH 12/27] revert UUIDv1 optimizations to reduce the diff --- Lib/uuid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index b35df37fe574ab..03987825617af8 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -214,17 +214,17 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields - if time_low < 0 or time_low > 0xffff_ffff: + if not 0 <= time_low <= 0xffff_ffff: raise ValueError('field 1 out of range (need a 32-bit value)') - if time_mid < 0 or time_mid > 0xffff: + if not 0 <= time_mid <= 0xffff: raise ValueError('field 2 out of range (need a 16-bit value)') - if time_hi_version < 0 or time_hi_version > 0xffff: + if not 0 <= time_hi_version <= 0xffff: raise ValueError('field 3 out of range (need a 16-bit value)') - if clock_seq_hi_variant < 0 or clock_seq_hi_variant > 0xff: + if not 0 <= clock_seq_hi_variant <= 0xff: raise ValueError('field 4 out of range (need an 8-bit value)') - if clock_seq_low < 0 or clock_seq_low > 0xff: + if not 0 <= clock_seq_low <= 0xff: raise ValueError('field 5 out of range (need an 8-bit value)') - if node < 0 or node > 0xffff_ffff_ffff: + if not 0 <= node <= 0xffff_ffff_ffff: raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | From 26b1eb1c6ff0f3ded790e5f80a6020c0a7ab4c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:31:03 +0100 Subject: [PATCH 13/27] simplify `_from_int` private constructor as per Pieter's review --- Lib/uuid.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 03987825617af8..8ad9d1b715e4a0 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -245,12 +245,13 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, object.__setattr__(self, 'is_safe', is_safe) @classmethod - def _from_int(cls, int, *, is_safe=SafeUUID.unknown): + def _from_int(cls, value): """Internal use only.""" - assert int >= 0 and int <= 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff + assert isinstance(value, int), repr(value) + assert 0 <= value <= _UINT_128_MAX, repr(value) self = cls.__new__(cls) - object.__setattr__(self, 'int', int) - object.__setattr__(self, 'is_safe', is_safe) + object.__setattr__(self, 'int', value) + object.__setattr__(self, 'is_safe', SafeUUID.unknown) return self def __getstate__(self): From df50a7a8c819281033a0ec4bff34aa2cab20986c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:31:39 +0100 Subject: [PATCH 14/27] revert micro-optimization of `not a <= x <= b` --- Lib/uuid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 8ad9d1b715e4a0..e63849c04c0812 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -229,8 +229,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | (time_hi_version << 64) | (clock_seq << 48) | node) - # "x < a or int > b" is slightly faster than "not (a <= x <= b)" - if int < 0 or int > 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff: + if not 0 <= int <= _UINT_128_MAX: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: if not 1 <= version <= 8: From c1ffa7dd5f402a266f1c6f9fd6277da4653328b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:31:54 +0100 Subject: [PATCH 15/27] use built-in `int` when it is not shadowed --- Lib/uuid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index e63849c04c0812..201201eadb38de 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -733,14 +733,14 @@ def uuid3(namespace, name): import _md5 h = _md5.md5(namespace.bytes + name, usedforsecurity=False) assert len(h.digest()) == 16 - int_uuid_3 = int_.from_bytes(h.digest()) + int_uuid_3 = int.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS return UUID._from_int(int_uuid_3) def uuid4(): """Generate a random UUID.""" - int_uuid_4 = int_.from_bytes(os.urandom(16)) + int_uuid_4 = int.from_bytes(os.urandom(16)) int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS return UUID._from_int(int_uuid_4) @@ -753,7 +753,7 @@ def uuid5(namespace, name): # and 'import X; X.Y' is slightly faster than 'from X import Y'. import hashlib h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) - int_uuid_5 = int_.from_bytes(h.digest()[:16]) + int_uuid_5 = int.from_bytes(h.digest()[:16]) int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) From cff86e9e8d5318a2f933fafdbf191131c5ebf627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:36:32 +0100 Subject: [PATCH 16/27] remove rationale comment for HACL* MD5 In this commit, we move the rationale for using HACL*-based MD5 instead of its OpenSSL implementation from the code to this note. HACL*-based MD5 is 2x faster than its OpenSSL implementation for creating the hash object via `h = md5(..., usedforsecurity=False)` but `h.digest()` is slightly (yet noticeably) slower. Overall, HACL*-based MD5 still remains faster than its OpenSSL-based implementation, whence the choice of `_md5.md5` over `hashlib.md5`. --- Lib/uuid.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 201201eadb38de..9da2ea83ad41b1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -728,8 +728,6 @@ def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - # HACL*-based MD5 is slightly faster than its OpenSSL version, - # and 'import X; X.Y' is slightly faster than 'from X import Y'. import _md5 h = _md5.md5(namespace.bytes + name, usedforsecurity=False) assert len(h.digest()) == 16 From 7095aa4fa92a8e449a0de7a7503fac4bd7be0d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:41:37 +0100 Subject: [PATCH 17/27] remove rationale comment for OpenSSL SHA-1 In this commit, we move the rationale for using OpenSSL-based SHA-1 instead of its HACL* implementation from the code to this note. HACL*-based SHA-1 is 2x faster than its OpenSSL implementation for creating the hash object via `h = sha1(..., usedforsecurity=False)` but `h.digest()` is almost 3x slower. Unlike HACL* MD5, HACL*-based SHA-1 is slower than its OpenSSL-based implementation, whence the choice of `hashlib.sha1` over `_sha1.sha1`. --- Lib/uuid.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 9da2ea83ad41b1..c8ad77c5c1b80f 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -747,8 +747,6 @@ def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - # OpenSSL-based SHA-1 is slightly faster than its HACL* version, - # and 'import X; X.Y' is slightly faster than 'from X import Y'. import hashlib h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False) int_uuid_5 = int.from_bytes(h.digest()[:16]) From 4af15352d2732af7d4a5464eaa831e623b62ec06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:52:43 +0100 Subject: [PATCH 18/27] clear variant and version bits using dedicated mask --- Lib/uuid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index c8ad77c5c1b80f..21ebad31eee491 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -203,7 +203,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, assert isinstance(bytes_le, bytes_), repr(bytes_le) bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] + bytes_le[8-1:6-1:-1] + bytes_le[8:]) - int = int_.from_bytes(bytes) + int = int_.from_bytes(bytes) # big endian elif bytes is not None: if len(bytes) != 16: raise ValueError('bytes is not a 16-char string') @@ -234,11 +234,11 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, if version is not None: if not 1 <= version <= 8: raise ValueError('illegal version number') + # clear the variant and the version number bits + int &= _RFC_4122_CLEARFLAGS_MASK # Set the variant to RFC 4122/9562. - int &= ~(0xc000 << 48) - int |= 0x8000 << 48 + int |= 0x8000_0000_0000_0000 # (0x8000 << 48) # Set the version number. - int &= ~(0xf000 << 64) int |= version << 76 object.__setattr__(self, 'int', int) object.__setattr__(self, 'is_safe', is_safe) From 0d4c0088b613debd7fbcead030f66795ac4b9b8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Dec 2024 13:35:43 +0100 Subject: [PATCH 19/27] fix typos --- Doc/whatsnew/3.14.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 4cfa829530cf6b..5e8e6630992127 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -676,14 +676,11 @@ uuid functions: * :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster - for 1024-byte names. Performances for longer names remain unchanged. + for 1024-byte names. Performance for longer names remains unchanged. * :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster - for 1024-byte names. Performances for longer names remain unchanged. + for 1024-byte names. Performance for longer names remains unchanged. * :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. - Overall, dedicated generation of UUID objects version 3, 4, 5, and 8 is - roughly 20% faster. - (Contributed by Bénédikt Tran in :gh:`128150`.) From 9854f69e4980d05ec0e4115d7679c973e9a6822b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 25 Dec 2024 13:46:53 +0100 Subject: [PATCH 20/27] update benchmarks --- Doc/whatsnew/3.14.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5e8e6630992127..cd1193e7b0e0fd 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -675,11 +675,11 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 47% faster for 16-byte names and 13% faster + * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid5` is 35% faster for 16-byte names and 24% faster + * :func:`~uuid.uuid5` is 30% faster for 16-byte names and 20% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid4` is 33% faster and :func:`~uuid.uuid8` is 38% faster. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% faster. (Contributed by Bénédikt Tran in :gh:`128150`.) From 897902b799a576bb1f1aed9f73b496e0130e60f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:40:30 +0100 Subject: [PATCH 21/27] remove un-necessary assertions --- Lib/uuid.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 21ebad31eee491..5c934d444a4a77 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -245,8 +245,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, @classmethod def _from_int(cls, value): - """Internal use only.""" - assert isinstance(value, int), repr(value) + """Create a UUID from an integer *value*. Internal use only.""" assert 0 <= value <= _UINT_128_MAX, repr(value) self = cls.__new__(cls) object.__setattr__(self, 'int', value) @@ -730,7 +729,6 @@ def uuid3(namespace, name): name = bytes(name, "utf-8") import _md5 h = _md5.md5(namespace.bytes + name, usedforsecurity=False) - assert len(h.digest()) == 16 int_uuid_3 = int.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS From a8a19e1343e392cef23ab9300bead9866008aa78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 26 Dec 2024 09:40:45 +0100 Subject: [PATCH 22/27] use `object.__new__` instead of `cls.__new__` --- Lib/uuid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 5c934d444a4a77..59b7b082bfb3bb 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -247,7 +247,7 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, def _from_int(cls, value): """Create a UUID from an integer *value*. Internal use only.""" assert 0 <= value <= _UINT_128_MAX, repr(value) - self = cls.__new__(cls) + self = object.__new__(cls) object.__setattr__(self, 'int', value) object.__setattr__(self, 'is_safe', SafeUUID.unknown) return self From e2b8b08b5ff8d5fc29c9cd40f0e5268f69fe8c14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:37:49 +0100 Subject: [PATCH 23/27] remove dedicated constant folding --- Lib/uuid.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 59b7b082bfb3bb..1ef15a575ce00f 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -85,19 +85,15 @@ class SafeUUID: unknown = None -_UINT_128_MAX = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff +_UINT_128_MAX = (1 << 128) - 1 # 128-bit mask to clear the variant and version bits of a UUID integral value -# -# This is equivalent to the 2-complement of '(0xc000 << 48) | (0xf000 << 64)'. -_RFC_4122_CLEARFLAGS_MASK = 0xffff_ffff_ffff_0fff_3fff_ffff_ffff_ffff +_RFC_4122_CLEARFLAGS_MASK = ~((0xf000 << 64) | (0xc000 << 48)) # RFC 4122 variant bits and version bits to activate on a UUID integral value. -# -# The values are equivalent to '(version << 76) | (0x8000 << 48)'. -_RFC_4122_VERSION_1_FLAGS = 0x0000_0000_0000_1000_8000_0000_0000_0000 -_RFC_4122_VERSION_3_FLAGS = 0x0000_0000_0000_3000_8000_0000_0000_0000 -_RFC_4122_VERSION_4_FLAGS = 0x0000_0000_0000_4000_8000_0000_0000_0000 -_RFC_4122_VERSION_5_FLAGS = 0x0000_0000_0000_5000_8000_0000_0000_0000 -_RFC_4122_VERSION_8_FLAGS = 0x0000_0000_0000_8000_8000_0000_0000_0000 +_RFC_4122_VERSION_1_FLAGS = ((1 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) class UUID: @@ -214,17 +210,17 @@ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields - if not 0 <= time_low <= 0xffff_ffff: + if not 0 <= time_low < (1 << 32): raise ValueError('field 1 out of range (need a 32-bit value)') - if not 0 <= time_mid <= 0xffff: + if not 0 <= time_mid < (1 << 16): raise ValueError('field 2 out of range (need a 16-bit value)') - if not 0 <= time_hi_version <= 0xffff: + if not 0 <= time_hi_version < (1 << 16): raise ValueError('field 3 out of range (need a 16-bit value)') - if not 0 <= clock_seq_hi_variant <= 0xff: + if not 0 <= clock_seq_hi_variant < (1 << 8): raise ValueError('field 4 out of range (need an 8-bit value)') - if not 0 <= clock_seq_low <= 0xff: + if not 0 <= clock_seq_low < (1 << 8): raise ValueError('field 5 out of range (need an 8-bit value)') - if not 0 <= node <= 0xffff_ffff_ffff: + if not 0 <= node < (1 << 48): raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low int = ((time_low << 96) | (time_mid << 80) | From 1d4216aebdfed46508f1d6641653a71b7d10b25e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Dec 2024 11:37:55 +0100 Subject: [PATCH 24/27] update benchmarks --- Doc/whatsnew/3.14.rst | 7 ++++--- .../Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f65871a9fc127d..9411586f47d8b1 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -683,11 +683,12 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 40% faster for 16-byte names and 10% faster + * :func:`~uuid.uuid3` is 70% faster for 16-byte names and 20% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid5` is 30% faster for 16-byte names and 20% faster + * :func:`~uuid.uuid5` is 40% faster for 16-byte names and 30% faster for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% faster. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 45% faster + respectively. (Contributed by Bénédikt Tran in :gh:`128150`.) diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 5a1d65f044171e..9bcfc12f5a0a5d 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 -via their dedicated functions by 20%. Patch by Bénédikt Tran. +via their dedicated functions by 25%. Patch by Bénédikt Tran. From 5c87adfad4cf44832df4d0594b6188630906290d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Jan 2025 12:17:37 +0100 Subject: [PATCH 25/27] Always use `hashlib.md5` for consistency as per Petr's comment. --- Lib/uuid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/uuid.py b/Lib/uuid.py index 1ef15a575ce00f..cd1f3530ab63e1 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -723,8 +723,8 @@ def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" if isinstance(name, str): name = bytes(name, "utf-8") - import _md5 - h = _md5.md5(namespace.bytes + name, usedforsecurity=False) + import hashlib + h = hashlib.md5(namespace.bytes + name, usedforsecurity=False) int_uuid_3 = int.from_bytes(h.digest()) int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS From ea23629bb368783e69caafe2c019576c80826358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Jan 2025 12:17:43 +0100 Subject: [PATCH 26/27] update benchmarks --- Doc/whatsnew/3.14.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9411586f47d8b1..66c92cd450cda3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -683,11 +683,10 @@ uuid * Improve generation of :class:`~uuid.UUID` objects via their dedicated functions: - * :func:`~uuid.uuid3` is 70% faster for 16-byte names and 20% faster - for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid5` is 40% faster for 16-byte names and 30% faster - for 1024-byte names. Performance for longer names remains unchanged. - * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 45% faster + * :func:`~uuid.uuid3` and :func:`~uuid.uuid5` are both roughly 40% faster + for 16-byte names and 20% faster for 1024-byte names. Performance for + longer names remains unchanged. + * :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 40% faster respectively. (Contributed by Bénédikt Tran in :gh:`128150`.) From bdf7c6efd123c45b2e6d9f20f52d0e02a2c6bd15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:16:33 +0100 Subject: [PATCH 27/27] update NEWS --- .../next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst index 9bcfc12f5a0a5d..04c744fb2ba54f 100644 --- a/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst +++ b/Misc/NEWS.d/next/Library/2024-12-21-11-12-50.gh-issue-128151.aq7vpG.rst @@ -1,2 +1,2 @@ Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8 -via their dedicated functions by 25%. Patch by Bénédikt Tran. +via their dedicated functions by 30%. Patch by Bénédikt Tran. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy