From 860e69d76e80ca0628bdec287ffac707a8495ae3 Mon Sep 17 00:00:00 2001 From: jahnavik186 Date: Sun, 16 Nov 2025 09:49:50 -0800 Subject: [PATCH 1/4] Fix: Incorrect Binary Search output for duplicate elements (#13886, #13840) --- matrix/binary_search_matrix.py | 12 +++++++++- other/number_container_system.py | 39 ++++++++++++++++++++------------ searches/binary_search.py | 39 ++++++++++++++++++++++++-------- searches/exponential_search.py | 17 ++++++++++---- searches/simple_binary_search.py | 8 +++++++ sorts/tim_sort.py | 15 ++++++++---- 6 files changed, 97 insertions(+), 33 deletions(-) diff --git a/matrix/binary_search_matrix.py b/matrix/binary_search_matrix.py index 6f203b7a3484..27a4b2392d76 100644 --- a/matrix/binary_search_matrix.py +++ b/matrix/binary_search_matrix.py @@ -9,10 +9,20 @@ def binary_search(array: list, lower_bound: int, upper_bound: int, value: int) - 0 >>> binary_search(matrix, 0, len(matrix) - 1, 23) -1 + >>> matrix_dup = [1, 4, 4, 4, 7, 11, 15] + >>> binary_search(matrix_dup, 0, len(matrix_dup) - 1, 4) + 1 + >>> binary_search(matrix_dup, 0, len(matrix_dup) - 1, 7) + 4 + >>> binary_search(matrix_dup, 0, len(matrix_dup) - 1, 0) + -1 """ - r = int((lower_bound + upper_bound) // 2) + r = (lower_bound + upper_bound) // 2 if array[r] == value: + # Move left to find the first occurrence of duplicates + while r > 0 and array[r - 1] == value: + r -= 1 return r if lower_bound >= upper_bound: return -1 diff --git a/other/number_container_system.py b/other/number_container_system.py index 6c95dd0a3544..010f2b574f63 100644 --- a/other/number_container_system.py +++ b/other/number_container_system.py @@ -50,6 +50,10 @@ def binary_search_delete(self, array: list | str | range, item: int) -> list[int Traceback (most recent call last): ... TypeError: binary_search_delete() only accepts either a list, range or str + >>> NumberContainer().binary_search_delete([1,2,2,3], 2) + [1, 2, 3] + >>> NumberContainer().binary_search_delete([0,0,1,1,1], 1) + [0, 0, 1, 1] """ if isinstance(array, (range, str)): array = list(array) @@ -60,19 +64,27 @@ def binary_search_delete(self, array: list | str | range, item: int) -> list[int low = 0 high = len(array) - 1 + result_index = -1 + # Find the first occurrence of `item` while low <= high: mid = (low + high) // 2 - if array[mid] == item: - array.pop(mid) - return array - elif array[mid] < item: + if array[mid] < item: low = mid + 1 + elif array[mid] > item: + high = mid - 1 else: + # Found the item, move left to find the first occurrence + result_index = mid high = mid - 1 - raise ValueError( - "Either the item is not in the array or the array was unsorted" - ) + + if result_index == -1: + raise ValueError( + "Either the item is not in the array or the array was unsorted" + ) + + array.pop(result_index) + return array def binary_search_insert(self, array: list | str | range, index: int) -> list[int]: """ @@ -95,6 +107,10 @@ def binary_search_insert(self, array: list | str | range, index: int) -> list[in Traceback (most recent call last): ... TypeError: binary_search_insert() only accepts either a list, range or str + >>> NumberContainer().binary_search_insert([1,2,2,3], 2) + [1, 2, 2, 2, 3] + >>> NumberContainer().binary_search_insert([0,0,1,1], 1) + [0, 0, 1, 1, 1] """ if isinstance(array, (range, str)): array = list(array) @@ -106,19 +122,14 @@ def binary_search_insert(self, array: list | str | range, index: int) -> list[in low = 0 high = len(array) - 1 + # Find the correct insertion position while low <= high: mid = (low + high) // 2 - if array[mid] == index: - # If the item already exists in the array, - # insert it after the existing item - array.insert(mid + 1, index) - return array - elif array[mid] < index: + if array[mid] < index: low = mid + 1 else: high = mid - 1 - # If the item doesn't exist in the array, insert it at the appropriate position array.insert(low, index) return array diff --git a/searches/binary_search.py b/searches/binary_search.py index 2e66b672d5b4..37db2e5fe400 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -197,21 +197,28 @@ def binary_search(sorted_collection: list[int], item: int) -> int: 1 >>> binary_search([0, 5, 7, 10, 15], 6) -1 + >>> binary_search([1, 2, 2, 2, 3], 2) + 2 + >>> binary_search([4, 4, 4, 4], 4) + 1 """ if list(sorted_collection) != sorted(sorted_collection): raise ValueError("sorted_collection must be sorted in ascending order") + left = 0 right = len(sorted_collection) - 1 while left <= right: midpoint = left + (right - left) // 2 current_item = sorted_collection[midpoint] + if current_item == item: - return midpoint - elif item < current_item: - right = midpoint - 1 - else: + return midpoint # correct handling even with duplicates + elif current_item < item: left = midpoint + 1 + else: + right = midpoint - 1 + return -1 @@ -234,12 +241,17 @@ def binary_search_std_lib(sorted_collection: list[int], item: int) -> int: 1 >>> binary_search_std_lib([0, 5, 7, 10, 15], 6) -1 + >>> binary_search_std_lib([1, 2, 2, 2, 3], 2) + 1 + >>> binary_search_std_lib([4, 4, 4, 4], 4) + 0 """ if list(sorted_collection) != sorted(sorted_collection): raise ValueError("sorted_collection must be sorted in ascending order") + index = bisect.bisect_left(sorted_collection, item) if index != len(sorted_collection) and sorted_collection[index] == item: - return index + return index # bisect_left handles duplicates correctly return -1 @@ -265,23 +277,30 @@ def binary_search_by_recursion( 1 >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4) -1 + >>> binary_search_by_recursion([1, 2, 2, 2, 3], 2, 0, 4) + 2 + >>> binary_search_by_recursion([4, 4, 4, 4], 4, 0, 3) + 1 """ if right < 0: right = len(sorted_collection) - 1 + if list(sorted_collection) != sorted(sorted_collection): raise ValueError("sorted_collection must be sorted in ascending order") - if right < left: + + if left > right: return -1 midpoint = left + (right - left) // 2 + mid_value = sorted_collection[midpoint] - if sorted_collection[midpoint] == item: - return midpoint - elif sorted_collection[midpoint] > item: + if mid_value == item: + return midpoint # valid index even with duplicates + elif mid_value > item: return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) else: return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right) - + def exponential_search(sorted_collection: list[int], item: int) -> int: """Pure implementation of an exponential search algorithm in Python diff --git a/searches/exponential_search.py b/searches/exponential_search.py index ed09b14e101c..5dab29d08727 100644 --- a/searches/exponential_search.py +++ b/searches/exponential_search.py @@ -39,6 +39,10 @@ def binary_search_by_recursion( 1 >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4) -1 + >>> binary_search_by_recursion([1, 2, 2, 2, 3], 2, 0, 4) + 1 + >>> binary_search_by_recursion([2, 2, 2, 2], 2, 0, 3) + 0 """ if right < 0: right = len(sorted_collection) - 1 @@ -48,10 +52,15 @@ def binary_search_by_recursion( return -1 midpoint = left + (right - left) // 2 - - if sorted_collection[midpoint] == item: - return midpoint - elif sorted_collection[midpoint] > item: + mid_value = sorted_collection[midpoint] + + if mid_value == item: + # check if this is the first occurrence + if midpoint == 0 or sorted_collection[midpoint - 1] < item: + return midpoint + else: + return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) + elif mid_value > item: return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) else: return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right) diff --git a/searches/simple_binary_search.py b/searches/simple_binary_search.py index 00e83ff9e4a3..593941dc0027 100644 --- a/searches/simple_binary_search.py +++ b/searches/simple_binary_search.py @@ -40,6 +40,14 @@ def binary_search(a_list: list[int], item: int) -> bool: False >>> binary_search(range(0, 10000, 5), 2) False + >>> binary_search([1, 1, 1, 2, 2, 3], 1) + True + >>> binary_search([1, 1, 1, 2, 2, 3], 2) + True + >>> binary_search([1, 1, 1, 2, 2, 3], 3) + True + >>> binary_search([1, 1, 1, 2, 2, 3], 4) + False """ if len(a_list) == 0: return False diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 41ab4a10a87b..25f80edc9eee 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -1,17 +1,24 @@ def binary_search(lst, item, start, end): - if start == end: - return start if lst[start] > item else start + 1 + """ + Binary search that returns the index of the first occurrence of `item` if present, + or the correct insertion index if not present, even with duplicates. + """ if start > end: return start mid = (start + end) // 2 + if lst[mid] < item: return binary_search(lst, item, mid + 1, end) elif lst[mid] > item: return binary_search(lst, item, start, mid - 1) else: - return mid - + # Move left to find the first occurrence + if mid == start or lst[mid - 1] != item: + return mid + else: + return binary_search(lst, item, start, mid - 1) + def insertion_sort(lst): length = len(lst) From 42bc80d280b8035cdb5cfd75857918a4df00ef7c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Nov 2025 17:56:58 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- searches/binary_search.py | 2 +- searches/exponential_search.py | 4 +++- sorts/tim_sort.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/searches/binary_search.py b/searches/binary_search.py index 37db2e5fe400..6c864e029c2c 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -300,7 +300,7 @@ def binary_search_by_recursion( return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) else: return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right) - + def exponential_search(sorted_collection: list[int], item: int) -> int: """Pure implementation of an exponential search algorithm in Python diff --git a/searches/exponential_search.py b/searches/exponential_search.py index 5dab29d08727..5fbad406deee 100644 --- a/searches/exponential_search.py +++ b/searches/exponential_search.py @@ -59,7 +59,9 @@ def binary_search_by_recursion( if midpoint == 0 or sorted_collection[midpoint - 1] < item: return midpoint else: - return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) + return binary_search_by_recursion( + sorted_collection, item, left, midpoint - 1 + ) elif mid_value > item: return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) else: diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 25f80edc9eee..cda969c487c4 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -18,7 +18,7 @@ def binary_search(lst, item, start, end): return mid else: return binary_search(lst, item, start, mid - 1) - + def insertion_sort(lst): length = len(lst) From f9c16c680cb708046556e5fc2a320f1a689d3b95 Mon Sep 17 00:00:00 2001 From: jahnavik186 Date: Sun, 16 Nov 2025 10:03:10 -0800 Subject: [PATCH 3/4] update tim_sort.py --- sorts/tim_sort.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 25f80edc9eee..76d0094e0c0f 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -12,13 +12,12 @@ def binary_search(lst, item, start, end): return binary_search(lst, item, mid + 1, end) elif lst[mid] > item: return binary_search(lst, item, start, mid - 1) - else: + elif mid == start or lst[mid - 1] != item: # Move left to find the first occurrence - if mid == start or lst[mid - 1] != item: - return mid - else: - return binary_search(lst, item, start, mid - 1) - + return mid + else: + return binary_search(lst, item, start, mid - 1) + def insertion_sort(lst): length = len(lst) From 45901f3f9f07be12db4bfef4a8f5a5f00d23e0a9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Nov 2025 18:05:55 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/tim_sort.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sorts/tim_sort.py b/sorts/tim_sort.py index 6e3934722e7f..76d0094e0c0f 100644 --- a/sorts/tim_sort.py +++ b/sorts/tim_sort.py @@ -18,6 +18,7 @@ def binary_search(lst, item, start, end): else: return binary_search(lst, item, start, mid - 1) + def insertion_sort(lst): length = len(lst)