The following looks complicated, but if M is the sum of the logs of len(list)+2, then the average case is O(M) and the worst case is O(M^2). (The reason for the +2 is that even if the array has no elements, we need to do work, which we do by making the log to be of at least 2.) The worst case is very unlikely.
The performance is independent of k.
The idea the same as Quickselect. We are picking pivots, and splitting data around the pivot. But we do not look at each elements, we only figure out what chunk of each array that is still under consideration is before/after/landed at the pivot. The average case is because every time we look at an array, with positive probability we get rid of half of what remains. The worst case is because every time we look at the array we got a pivot from, we will get rid of half that array but may have to binary search every other array to decide we got rid of nothing else.
from collections import deque
def kth_of_sorted (k, arrays):
# Initialize some global variables.
known_low = 0
known_high = 0
total_size = 0
# in_flight will be a double-ended queue of
# (array, iteration, i, j, min_i, min_j)
# Where:
# array is an input array
# iteration is which median it was compared to
# low is the lower bound on where kth might be
# high is the upper bound on where kth might be
in_flight = deque()
for a in arrays:
if 0 < len(a):
total_size += len(a)
in_flight.append((a, 0, len(a)-1))
# Sanity check.
if k < 1 or total_size < k:
return None
while 0 < len(in_flight):
start_a, start_low, start_high = in_flight.popleft()
start_mid = (start_low + start_high) // 2
pivot = start_a[start_mid]
# If pivot is placed, how many are known?
maybe_low = start_mid - start_low
maybe_high = start_high - start_mid
# This will be arrays taken from in_flight with:
#
# (array, low, high, orig_low, orig_high)
#
# We are binary searching in these to figure out where the pivot
# is going to go. Then we copy back to in_flight.
to_process = deque()
# This will be arrays taken from in_flight with:
#
# (array, orig_low, mid, orig_high)
#
# where at mid we match the pivot.
is_match = deque()
# And we know an array with a pivot!
is_match.append((start_a, start_low, start_mid, start_high))
# This will be arrays taken from in_flight which we know do not have the pivot:
#
# (array, low, high, orig_low, orig_high)
#
no_pivot = deque()
while 0 < len(in_flight):
a, low, high = in_flight.popleft()
mid = (low + high) // 2
if a[mid] < pivot:
# all of low, low+1, ..., mid are below the pivot
maybe_low += mid + 1 - low
if mid < high:
to_process.append((a, mid+1, high, low, high))
else:
no_pivot.append((a, mid+1, high, low, high))
elif pivot < a[mid]:
# all of mid, mid+1, ..., high are above the pivot.
maybe_high += high + 1 - mid
if low < mid:
to_process.append((a, low, mid-1, low, high))
else:
no_pivot.append((a, low, mid-1, low, high))
else:
# mid is at pivot
maybe_low += mid - low
maybe_high += high - mid
is_match.append((a, low, mid, high))
# We do not yet know where the pivot_pos is.
pivot_pos = None
if k <= known_low + maybe_low:
pivot_pos = 'right'
elif total_size - known_high - maybe_high < k:
pivot_pos = 'left'
elif k <= known_low + maybe_low + len(is_match) and total_size < k + known_high + maybe_high + len(is_match):
return pivot # WE FOUND IT!
while pivot_pos is None:
# This is very similar to how we processed in_flight.
a, low, high, orig_low, orig_high = to_process.popleft()
mid = (low + high) // 2
if a[mid] < pivot:
# all of low, low+1, ..., mid are below the pivot
maybe_low += mid + 1 - low
if mid < high:
to_process.append((a, mid+1, high, orig_low, orig_high))
else:
no_pivot.append((a, mid+1, high, orig_low, orig_high))
elif pivot < a[mid]:
# all of mid, mid+1, ..., high are above the pivot.
maybe_high += high + 1 - mid
if low < mid:
to_process.append((a, low, mid-1, orig_low, orig_high))
else:
no_pivot.append((a, low, mid-1, orig_low, orig_high))
else:
# mid is at pivot
maybe_low += mid - low
maybe_high += high - mid
is_match.append((a, orig_low, mid, orig_high))
if k <= known_low + maybe_low:
pivot_pos = 'right'
elif total_size - known_high - maybe_high < k:
pivot_pos = 'left'
a, low, high = in_flight.popleft()
mid = (low + high) // 2
if a[mid] < pivot:
# all of low, low+1, ..., mid are below the pivot
maybe_low += mid + 1 - low
if mid < high:
to_process.append((a, mid+1, high, low, high))
else:
no_pivot.append((a, mid+1, high, low, high))
elif pivot < a[mid]:
# all of mid, mid+1, ..., high are above the pivot.
maybe_high += high + 1 - mid
if low < mid:
to_process.append((a, low, mid-1, low, high))
else:
no_pivot.append((a, low, mid-1, low, high))
else:
# mid is at pivot
maybe_low += mid - low
maybe_high += high - mid
is_match.append((a, low, mid, high))
# We do not yet know where the pivot_pos is.
pivot_pos = None
if k <= known_low + maybe_low:
pivot_pos = 'right'
elif total_size - known_high - maybe_high < k:
pivot_pos = 'left'
elif k <= known_low + maybe_low + len(is_match) and total_size < k + known_high + maybe_high + len(is_match):
return pivot # WE FOUND IT!
while pivot_pos is None:
# This is very similar to how we processed in_flight.
a, low, high, orig_low, orig_high = to_process.popleft()
mid = (low + high) // 2
if a[mid] < pivot:
# all of low, low+1, ..., mid are below the pivot
maybe_low += mid + 1 - low
if mid < high:
to_process.append((a, mid+1, high, orig_low, orig_high))
else:
no_pivot.append((a, mid+1, high, orig_low, orig_high))
elif pivot < a[mid]:
# all of mid, mid+1, ..., high are above the pivot.
maybe_high += high + 1 - mid
if low < mid:
to_process.append((a, low, mid-1, orig_low, orig_high))
else:
no_pivot.append((a, low, mid-1, orig_low, orig_high))
else:
# mid is at pivot
maybe_low += mid - low
maybe_high += high - mid
is_match.append((a, orig_low, mid, orig_high))
if k <= known_low + maybe_low:
pivot_pos = 'right'
elif total_size - known_high - maybe_high < k:
pivot_pos = 'left'
elif k <= known_low + maybe_low + len(is_match) and total_size < k + known_high + maybe_high + len(is_match):
return pivot # WE FOUND IT!
elif k <= known_low + maybe_low + len(is_match) and total_size < k + known_high + maybe_high + len(is_match):
return pivot # WE FOUND IT!
# And now place the pivot in the right position.
if pivot_pos == 'right':
known_high += maybe_high + len(is_match)
# And put back the left side of each nonemptied array.
for q in (to_process, no_pivot):
while 0 < len(q):
a, low, high, orig_low, orig_high = q.popleft()
if orig_low <= high:
in_flight.append((a, orig_low, high))
while 0 < len(is_match):
a, low, mid, high = is_match.popleft()
if low < mid:
in_flight.append((a, low, mid-1))
else:
known_low += maybe_low + len(is_match)
# And put back the right side of each nonemptied array.
for q in (to_process, no_pivot):
while 0 < len(q):
a, low, high, orig_low, orig_high = q.popleft()
if low <= orig_high:
in_flight.append((a, low, orig_high))
while 0 < len(is_match):
a, low, mid, high = is_match.popleft()
if mid < high:
in_flight.append((a, mid+1, high))
list1 = [2,3,6,7,9]
list2 = [1,4,8,10]
list3 = [2,3,5,7]
print(list1, list2, list3)
for i in range(1, len(list1) + len(list2) + len(list3)):
print(i, kth_of_sorted(i,[list1, list2, list3]))
sorted(array1+array2+array3)[X]