From 23c468767f6fefba654ee1860a01afe8bf075010 Mon Sep 17 00:00:00 2001 From: daxfo Date: Wed, 1 Jan 2025 11:36:10 -0800 Subject: [PATCH 1/8] Optimize qubit hash for Set operations Improves amortized `Set` operations perf by around 50%, though with the caveat that sets with qudits of different dimensions but the same index will always have the same key (not just the same bucket), and thus have to check `__eq__`, causing degenerate perf impact. It seems unlikely that anyone would intentionally do this though. ```python s = set() for q in cirq.GridQubit.square(100): s = s.union({q}) ``` --- cirq-core/cirq/devices/grid_qubit.py | 31 ++++++++++++++++++++++- cirq-core/cirq/devices/grid_qubit_test.py | 6 +++++ cirq-core/cirq/devices/line_qubit.py | 6 ++++- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 4af18585cef..709d0feefab 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -38,7 +38,36 @@ class _BaseGridQid(ops.Qid): def __hash__(self) -> int: if self._hash is None: - self._hash = hash((self._row, self._col, self._dimension)) + # This approach seems to perform better than traditional "random" hash in `Set` + # operations for typical circuits, as it reduces bucket collisions. Caveat: it does not + # include dimension, so sets with qudits of different dimensions but same location will + # have degenerate performance. + # Indexes the plane by squares around the origin. + # | -2 -1 0 1 2 + # ---+--------------- + # -2 | 9 10 11 12 13 + # -1 | 19 1 2 3 22 + # 0 | 20 7 0 8 23 + # 1 | 21 4 5 6 24 + # 2 | 14 15 16 17 18 + row = self._row + col = self._col + if row == 0 and col == 0: + self._hash = 0 + return 0 + abs_row = abs(row) + abs_col = abs(col) + square_index = max(abs_row, abs_col) + inner_square_side_len = square_index * 2 - 1 + outer_square_side_len = inner_square_side_len + 2 + inner_square_area = inner_square_side_len ** 2 + if abs_row == square_index: + offset = 0 if row < 0 else outer_square_side_len + i = inner_square_area + offset + (col + square_index) + else: + offset = (2 * outer_square_side_len) + (0 if col < 0 else inner_square_side_len) + i = inner_square_area + offset + (row + (square_index - 1)) + self._hash = hash(i) return self._hash def __eq__(self, other) -> bool: diff --git a/cirq-core/cirq/devices/grid_qubit_test.py b/cirq-core/cirq/devices/grid_qubit_test.py index b6c51f68b39..67019005e9d 100644 --- a/cirq-core/cirq/devices/grid_qubit_test.py +++ b/cirq-core/cirq/devices/grid_qubit_test.py @@ -71,6 +71,12 @@ def _test_qid_pickled_hash(q: 'cirq.Qid', q_bad: 'cirq.Qid') -> None: assert hash(q_ok) == hash(q) +def test_hash_fills_plane(): + qubits = cirq.GridQubit.square(7, -3, -3) + hashes = [hash(q) for q in qubits] + assert sorted(hashes) == list(range(49)) + + def test_str(): assert str(cirq.GridQubit(5, 2)) == 'q(5, 2)' assert str(cirq.GridQid(5, 2, dimension=3)) == 'q(5, 2) (d=3)' diff --git a/cirq-core/cirq/devices/line_qubit.py b/cirq-core/cirq/devices/line_qubit.py index c975d35e664..f56fd16b840 100644 --- a/cirq-core/cirq/devices/line_qubit.py +++ b/cirq-core/cirq/devices/line_qubit.py @@ -33,8 +33,12 @@ class _BaseLineQid(ops.Qid): _hash: Optional[int] = None def __hash__(self) -> int: + # This approach seems to perform better than traditional "random" hash in `Set` + # operations for typical circuits, as it reduces bucket collisions. Caveat: it does not + # include dimension, so sets with qudits of different dimensions but same location will + # have degenerate performance. if self._hash is None: - self._hash = hash((self._x, self._dimension)) + self._hash = hash(self._x) # hash(i) returns i except for huge numbers return self._hash def __eq__(self, other) -> bool: From 08100cca8c9ef6cde62c75579bafe8156e5cdd40 Mon Sep 17 00:00:00 2001 From: daxfo Date: Thu, 2 Jan 2025 09:37:32 -0800 Subject: [PATCH 2/8] format --- cirq-core/cirq/devices/grid_qubit.py | 2 +- cirq-core/cirq/devices/line_qubit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 709d0feefab..8a1f8d771c8 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -60,7 +60,7 @@ def __hash__(self) -> int: square_index = max(abs_row, abs_col) inner_square_side_len = square_index * 2 - 1 outer_square_side_len = inner_square_side_len + 2 - inner_square_area = inner_square_side_len ** 2 + inner_square_area = inner_square_side_len**2 if abs_row == square_index: offset = 0 if row < 0 else outer_square_side_len i = inner_square_area + offset + (col + square_index) diff --git a/cirq-core/cirq/devices/line_qubit.py b/cirq-core/cirq/devices/line_qubit.py index f56fd16b840..4b2dc65d3cb 100644 --- a/cirq-core/cirq/devices/line_qubit.py +++ b/cirq-core/cirq/devices/line_qubit.py @@ -38,7 +38,7 @@ def __hash__(self) -> int: # include dimension, so sets with qudits of different dimensions but same location will # have degenerate performance. if self._hash is None: - self._hash = hash(self._x) # hash(i) returns i except for huge numbers + self._hash = hash(self._x) # hash(i) returns i except for huge numbers return self._hash def __eq__(self, other) -> bool: From 08f3b8798ffb35a9c49abb00f2c4de624634084c Mon Sep 17 00:00:00 2001 From: Dax Fohl Date: Thu, 2 Jan 2025 12:52:07 -0800 Subject: [PATCH 3/8] Slightly cleaner code --- cirq-core/cirq/devices/grid_qubit.py | 41 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 8a1f8d771c8..28a62a20af7 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -42,32 +42,37 @@ def __hash__(self) -> int: # operations for typical circuits, as it reduces bucket collisions. Caveat: it does not # include dimension, so sets with qudits of different dimensions but same location will # have degenerate performance. - # Indexes the plane by squares around the origin. + # Indexes the plane by concentric squares around the origin. # | -2 -1 0 1 2 # ---+--------------- # -2 | 9 10 11 12 13 - # -1 | 19 1 2 3 22 - # 0 | 20 7 0 8 23 - # 1 | 21 4 5 6 24 - # 2 | 14 15 16 17 18 + # -1 | 24 1 2 3 14 + # 0 | 23 8 0 4 15 + # 1 | 22 7 6 5 16 + # 2 | 21 20 19 18 17 row = self._row col = self._col if row == 0 and col == 0: self._hash = 0 return 0 - abs_row = abs(row) - abs_col = abs(col) - square_index = max(abs_row, abs_col) - inner_square_side_len = square_index * 2 - 1 - outer_square_side_len = inner_square_side_len + 2 - inner_square_area = inner_square_side_len**2 - if abs_row == square_index: - offset = 0 if row < 0 else outer_square_side_len - i = inner_square_area + offset + (col + square_index) - else: - offset = (2 * outer_square_side_len) + (0 if col < 0 else inner_square_side_len) - i = inner_square_area + offset + (row + (square_index - 1)) - self._hash = hash(i) + + # The index of the square containing this point + n = max(abs(row), abs(col)) + + # Determine the area of the inner square + start = (2 * n - 1)**2 if n > 0 else 0 + + # Determine the offset within the outer square + if row == -n: # Top edge + offset = n + col + elif col == n: # Right edge + offset = 3 * n + row + elif row == n: # Bottom edge + offset = 5 * n - col + else: # Left edge + offset = 7 * n - row + + self._hash = hash(start + offset) return self._hash def __eq__(self, other) -> bool: From ac5a752f8018f6d72c38357a963cdb276f375f6f Mon Sep 17 00:00:00 2001 From: Dax Fohl Date: Thu, 2 Jan 2025 14:41:00 -0800 Subject: [PATCH 4/8] format --- cirq-core/cirq/devices/grid_qubit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 28a62a20af7..834900a134a 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -52,15 +52,15 @@ def __hash__(self) -> int: # 2 | 21 20 19 18 17 row = self._row col = self._col - if row == 0 and col == 0: - self._hash = 0 - return 0 # The index of the square containing this point n = max(abs(row), abs(col)) + if n == 0: + self._hash = 0 + return 0 # Determine the area of the inner square - start = (2 * n - 1)**2 if n > 0 else 0 + start = (2 * n - 1) ** 2 # Determine the offset within the outer square if row == -n: # Top edge From e89ddb333220dc11b92bc27d010e98976d5ea50c Mon Sep 17 00:00:00 2001 From: daxfo Date: Fri, 10 Jan 2025 23:28:14 -0800 Subject: [PATCH 5/8] Improve / simplify GridQubit hash implementation to be the same as a complex number. --- cirq-core/cirq/devices/grid_qubit.py | 42 +++++----------------------- cirq-core/cirq/devices/line_qubit.py | 9 +++--- 2 files changed, 12 insertions(+), 39 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 834900a134a..80914830477 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -14,6 +14,7 @@ import abc import functools +import sys import weakref from typing import Any, Dict, Iterable, List, Optional, Tuple, Set, TYPE_CHECKING, Union from typing_extensions import Self @@ -38,41 +39,12 @@ class _BaseGridQid(ops.Qid): def __hash__(self) -> int: if self._hash is None: - # This approach seems to perform better than traditional "random" hash in `Set` - # operations for typical circuits, as it reduces bucket collisions. Caveat: it does not - # include dimension, so sets with qudits of different dimensions but same location will - # have degenerate performance. - # Indexes the plane by concentric squares around the origin. - # | -2 -1 0 1 2 - # ---+--------------- - # -2 | 9 10 11 12 13 - # -1 | 24 1 2 3 14 - # 0 | 23 8 0 4 15 - # 1 | 22 7 6 5 16 - # 2 | 21 20 19 18 17 - row = self._row - col = self._col - - # The index of the square containing this point - n = max(abs(row), abs(col)) - if n == 0: - self._hash = 0 - return 0 - - # Determine the area of the inner square - start = (2 * n - 1) ** 2 - - # Determine the offset within the outer square - if row == -n: # Top edge - offset = n + col - elif col == n: # Right edge - offset = 3 * n + row - elif row == n: # Bottom edge - offset = 5 * n - col - else: # Left edge - offset = 7 * n - row - - self._hash = hash(start + offset) + # Use the same hash algorithm as complex numbers, which performs better than a tuple + # hash. Note this does not include dimension, so sets with qubits at the same grid + # position but different dimensions will have poor performance, but such use cases are + # not anticipated. (Similarly, sets with grid qubits and raw complex numbers will + # perform poorly, but are not anticipated.) + self._hash = hash(self._row + self._col * sys.hash_info.imag) return self._hash def __eq__(self, other) -> bool: diff --git a/cirq-core/cirq/devices/line_qubit.py b/cirq-core/cirq/devices/line_qubit.py index 4b2dc65d3cb..5a63e58a4e0 100644 --- a/cirq-core/cirq/devices/line_qubit.py +++ b/cirq-core/cirq/devices/line_qubit.py @@ -33,11 +33,12 @@ class _BaseLineQid(ops.Qid): _hash: Optional[int] = None def __hash__(self) -> int: - # This approach seems to perform better than traditional "random" hash in `Set` - # operations for typical circuits, as it reduces bucket collisions. Caveat: it does not - # include dimension, so sets with qudits of different dimensions but same location will - # have degenerate performance. if self._hash is None: + # Use the line index integer itself as the hash. This performs better than a tuple + # hash. Note this does not include dimension, so sets with qubits at the same line + # index but different dimensions will have poor performance, but such use cases are + # not anticipated. (Similarly, sets with line qubits and raw integers will perform + # poorly, but are not anticipated.) self._hash = hash(self._x) # hash(i) returns i except for huge numbers return self._hash From c0ce4a0bb7c41faafcc989ebb333ee54ec47233d Mon Sep 17 00:00:00 2001 From: daxfo Date: Fri, 10 Jan 2025 23:34:53 -0800 Subject: [PATCH 6/8] Remove irrelevant test --- cirq-core/cirq/devices/grid_qubit_test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit_test.py b/cirq-core/cirq/devices/grid_qubit_test.py index 67019005e9d..b6c51f68b39 100644 --- a/cirq-core/cirq/devices/grid_qubit_test.py +++ b/cirq-core/cirq/devices/grid_qubit_test.py @@ -71,12 +71,6 @@ def _test_qid_pickled_hash(q: 'cirq.Qid', q_bad: 'cirq.Qid') -> None: assert hash(q_ok) == hash(q) -def test_hash_fills_plane(): - qubits = cirq.GridQubit.square(7, -3, -3) - hashes = [hash(q) for q in qubits] - assert sorted(hashes) == list(range(49)) - - def test_str(): assert str(cirq.GridQubit(5, 2)) == 'q(5, 2)' assert str(cirq.GridQid(5, 2, dimension=3)) == 'q(5, 2) (d=3)' From 1dbfd4909aa5c7f95eb5295201e8f1dbd50ef4be Mon Sep 17 00:00:00 2001 From: daxfo Date: Sat, 11 Jan 2025 22:21:27 -0800 Subject: [PATCH 7/8] Include dimension --- cirq-core/cirq/devices/grid_qubit.py | 7 +------ cirq-core/cirq/devices/line_qubit.py | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 80914830477..43bdc23a4a3 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -39,12 +39,7 @@ class _BaseGridQid(ops.Qid): def __hash__(self) -> int: if self._hash is None: - # Use the same hash algorithm as complex numbers, which performs better than a tuple - # hash. Note this does not include dimension, so sets with qubits at the same grid - # position but different dimensions will have poor performance, but such use cases are - # not anticipated. (Similarly, sets with grid qubits and raw complex numbers will - # perform poorly, but are not anticipated.) - self._hash = hash(self._row + self._col * sys.hash_info.imag) + self._hash = ((self._dimension - 2) * 1_000_003 + self._col) * 1_000_003 + self._row return self._hash def __eq__(self, other) -> bool: diff --git a/cirq-core/cirq/devices/line_qubit.py b/cirq-core/cirq/devices/line_qubit.py index 5a63e58a4e0..8097730ac64 100644 --- a/cirq-core/cirq/devices/line_qubit.py +++ b/cirq-core/cirq/devices/line_qubit.py @@ -34,12 +34,7 @@ class _BaseLineQid(ops.Qid): def __hash__(self) -> int: if self._hash is None: - # Use the line index integer itself as the hash. This performs better than a tuple - # hash. Note this does not include dimension, so sets with qubits at the same line - # index but different dimensions will have poor performance, but such use cases are - # not anticipated. (Similarly, sets with line qubits and raw integers will perform - # poorly, but are not anticipated.) - self._hash = hash(self._x) # hash(i) returns i except for huge numbers + self._hash = (self._dimension - 2) * 1_000_003 + self._x return self._hash def __eq__(self, other) -> bool: From 04c6436260347722a719df6d0325a38c7a6c9109 Mon Sep 17 00:00:00 2001 From: daxfo Date: Sat, 11 Jan 2025 22:43:41 -0800 Subject: [PATCH 8/8] lint --- cirq-core/cirq/devices/grid_qubit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cirq-core/cirq/devices/grid_qubit.py b/cirq-core/cirq/devices/grid_qubit.py index 43bdc23a4a3..ee2854769ce 100644 --- a/cirq-core/cirq/devices/grid_qubit.py +++ b/cirq-core/cirq/devices/grid_qubit.py @@ -14,7 +14,6 @@ import abc import functools -import sys import weakref from typing import Any, Dict, Iterable, List, Optional, Tuple, Set, TYPE_CHECKING, Union from typing_extensions import Self