Python3, Score: 762/50 = 15.24 in ~30.51 seconds
import collections, re, random
def test_data():
with open('inp_d.txt') as f:
while True:
l1 = [j.strip('\n') for _ in range(10) if (j:=next(f, None))]
l2 = [j.strip('\n') for _ in range(10) if (j:=next(f, None))]
if len(l1) < 10: return
yield l1, l2
if len(l2) < 10: return
def substrings(l1, l2):
C = collections.defaultdict(set)
C1 = collections.defaultdict(set)
for I, l in enumerate(l1):
for i in range(len(l)):
for j in range(i+1, len(l)):
L = l[i:j]
if all(L not in k for k in l2):
C[j-i].add(L)
C1[L].add(I)
return {a:sorted(b) for a,b in C.items()}, {a:sorted(b) for a, b in C1.items()}
def to_regexp(k):
return ['', '['+''.join(k[0])+']'][k[0]!=tuple()]+k[1]+['', '['+''.join(k[2])+']'][k[2]!=tuple()]
def full_regexp(regexp):
return regexp[0] if len(regexp) == 1 else '('+'|'.join(regexp)+')'
def merges(d, d1, depth = 2):
for D in range(1, depth + 1):
b = d[D]
results = collections.defaultdict(dict)
for x in range(D+1):
for y in range(x+1, D+1):
for i, a in enumerate(b):
if a[x:y] not in results[(x, y)]:
results[(x, y)][a[x:y]] = [i]
else:
results[(x, y)][a[x:y]].append(i)
for (x, y), vals in results.items():
for base, options in vals.items():
for i, a in enumerate(options):
queue = [(options[i+1:], {*b[a][:x]}, base, {*b[a][y:]}, d1[b[a]])]
while queue:
r_options, l, base, r, o = queue.pop(0)
yield (tuple(sorted(filter(None, l))), base, tuple(sorted(filter(None, r)))), o
if r_options:
queue.append((r_options[1:], l, base, r, o))
n_b = b[r_options[0]]
if any(j not in o for j in d1[n_b]):
queue.append((r_options[1:], {*l, n_b[:x]}, base, {*r, n_b[y:]}, {*o, *d1[n_b]}))
def solutions(l1, l2, depth = 2):
d, d1 = substrings(l1, l2)
merge = dict(merges(d, d1))
scores = collections.defaultdict(dict)
for a, b in merge.items():
T = to_regexp(a)
if tuple(b) not in scores[10 - len(b)]:
scores[10 - len(b)][tuple(b)] = [T]
else:
scores[10 - len(b)][tuple(b)].append(T)
new_scores = {}
vals = []
for a, b in scores.items():
subscores = {}
for j, k in b.items():
subscores[j] = min(k, key=len)
vals.append((j, min(k, key=len)))
new_scores[a] = subscores
def contains(j, k, nj, nk):
if nj == nk:
return False
return all(J in nj for J in j) and len(k) > len(nk)
scores = {a:{j:k for j, k in b.items() if not any(contains(j, k, nj, nk) for nj, nk in vals)} for a, b in new_scores.items()}
r_score, r_regexp = None, None
queue = [(score, {*vals}, [regexp]) for score, container in sorted(scores.items(), key=lambda x:x[0]) for vals, regexp in container.items()]
seen = []
while queue:
score, vals, regexp = queue.pop(0)
if r_score is not None:
if len(full_regexp(regexp)) >= r_score:
continue
if not score:
if r_score is None:
r_score, r_regexp = len(T:=full_regexp(regexp)), T
elif len(T:=full_regexp(regexp)) < r_score:
r_score, r_regexp = len(T), T
continue
remainder_vals = {*range(10)} - vals
full_options = []
for score, options in scores.items():
for _vals, r_o in options.items():
if any(i in remainder_vals for i in _vals) and len(r_o) <= len(regexp[-1]) and tuple(P:=sorted(regexp + [r_o])) not in seen:
if r_score is None or len(full_regexp(regexp + [r_o])) < r_score:
full_options.append(({*_vals} - {*vals}, _vals, r_o))
for _, _vals, r_o in sorted(full_options, key=lambda x:len(x[0]), reverse=True)[:5]:
seen.append(tuple(P:=sorted(regexp + [r_o])))
queue.append((len(remainder_vals - {*_vals}), {*vals, *_vals}, regexp + [r_o]))
return r_score + 4, '.*'+r_regexp+'.*'
if __name__ == '__main__':
import time
T = time.time()
s, c = 0, 0
for l1, l2 in test_data():
a, b = solutions(l1, l2)
s += a
c += 1
print(b)
print(time.time() - T)
print(s, c, s/c)
Rather long solution, but more optimized for speed and regexp length minimization.