My objective is to implement a reasonably-efficient algorithm.
Let's first simplify and rearrange the array somewhat.
grapes = [{ part:"toto", grape: "Cabernet" },
{ part:"tutu", grape: "Merlot" },
{ part:"titi", grape: "Cabernet Sauvignon" }]
We can then do the following to obtain the desired array in a reasonably-efficient manner.
grapes.each_with_index.
sort_by { |g,_i| -g[:grape].size }.
each_with_object([]) { |(g,i),a| a << [g,i] unless a.any? { |f,_i|
f[:grape].include?(g[:grape]) } }.
sort_by(&:last).
map(&:first)
#=> [{:part=>"tutu", :grape=>"Merlot"},
# {:part=>"titi", :grape=>"Cabernet Sauvignon"}]
The steps are as follows.
Add an index to each hash so that their original order in grapes can be determined later.
e = grapes.each_with_index
#=> #<Enumerator: [{:part=>"toto", :grape=>"Cabernet"},
# {:part=>"tutu", :grape=>"Merlot"},
# {:part=>"titi", :grape=>"Cabernet Sauvignon"}]:each_with_index>
Sort the hash/index pairs by decreasing size of g[:grape].
b = e.sort_by { |g,_i| -g[:grape].size }
#=> [[{:part=>"titi", :grape=>"Cabernet Sauvignon"}, 2],
# [{:part=>"toto", :grape=>"Cabernet"}, 0],
# [{:part=>"tutu", :grape=>"Merlot"}, 1]]
Add each hash/index pair [g,i] to an initially-empty array a, unless f[:grape] includes g[:grape] for a hash f already in a.
c = b.each_with_object([]) { |(g,i),a| a << [g,i] unless a.any? { |f,_i|
f[:grape].include?(g[:grape]) } }
#=> [[{:part=>"titi", :grape=>"Cabernet Sauvignon"}, 2],
# [{:part=>"tutu", :grape=>"Merlot"}, 1]]
To obtain the desired order of the hashes in c, sort them by their indexes in the original array grapes (which has no effect for this example).
d = c.sort_by(&:last)
#=> [[{:part=>"tutu", :grape=>"Merlot"}, 1],
# [{:part=>"titi", :grape=>"Cabernet Sauvignon"}, 2]]
Remove the indices.
d.map(&:first)
#=> [{:part=>"tutu", :grape=>"Merlot"},
# {:part=>"titi", :grape=>"Cabernet Sauvignon"}]
Depending on requirements, it may be preferable to replace f[:grape].include?(g[:grape]) with f[:grape].begin_with?(g[:grape]) || f[:grape].end_with?(g[:grape]).
A simple benchmark comparing @Max' solution with mine follows.
def max_way(grapes_matched)
grapes_matched.select do |grape_matched|
grapes_matched.none? { |gm| gm[:grape] != grape_matched[:grape] &&
grape_matched[:grape].include?(gm[:grape]) }
end
end
def cary_way(grapes)
grapes.each_with_index.
sort_by { |g,_i| -g[:grape].size }.
each_with_object([]) { |(g,i),a| a << [g,i] unless a.any? { |f,_i|
f[:grape].include?(g[:grape]) } }.
sort_by(&:last).
map(&:first)
end
ALPHA = ('a'..'z').to_a
def rnd5
' '.gsub(' ') { ALPHA.sample }
end
def grapes(n, m)
n.times.each_with_object([]) do |i,a|
s1, s2 = rnd5, rnd5
a << { grape: "%s %s" % [s1, s2] }
a << { grape: i.even? ? s1 : s2 } if i < m
end.shuffle
end
require 'fruity'
def bench(n, m)
(grapes_matched = grapes(n, m)).size
compare do
Max { max_way(grapes_matched) }
Cary { cary_way(grapes_matched) }
end
end
bench 95, 5
Running each test once. Test will take about 1 second.
Cary is faster than Max by 3x ± 1.0
bench 950, 50
Running each test once. Test will take about 13 seconds.
Cary is faster than Max by 3x ± 1.0
bench 950, 500
Running each test once. Test will take about 23 seconds.
Cary is faster than Max by 4x ± 0.1