See if this helps.
solution is to flatten the inner arrays and use org.apache.spark.sql.functions.array_contains function to filter.
If you are using spark 2.4+ you may use higher order function org.apache.spark.sql.functions.flatten instead of UDF as shown in the solution.(spark 2.3)
val df = Seq(
Seq(
("a", Seq(2, 4, 6, 8, 10, 12)),
("b", Seq(3, 6, 9, 12)),
("c", Seq(1, 2, 3, 4))
),
Seq(
("e", Seq(4, 8, 12)),
("f", Seq(1, 3, 6)),
("g", Seq(3, 4, 5, 6))
)
).toDF("lvl_1")
df: org.apache.spark.sql.DataFrame = [lvl_1: array<struct<_1:string,_2:array<int>>>]
scala> df.show(false)
+------------------------------------------------------------------+
|lvl_1 |
+------------------------------------------------------------------+
|[[a, [2, 4, 6, 8, 10, 12]], [b, [3, 6, 9, 12]], [c, [1, 2, 3, 4]]]|
|[[e, [4, 8, 12]], [f, [1, 3, 6]], [g, [3, 4, 5, 6]]] |
+------------------------------------------------------------------+
scala> def flattenSeqOfSeq[S](x:Seq[Seq[S]]): Seq[S] = { x.flatten }
flattenSeqOfSeq: [S](x: Seq[Seq[S]])Seq[S]
scala> val myUdf = udf { flattenSeqOfSeq[Int] _}
myUdf: org.apache.spark.sql.expressions.UserDefinedFunction = UserDefinedFunction(<function1>,ArrayType(IntegerType,false),Some(List(ArrayType(ArrayType(IntegerType,false),true))))
scala> df.withColumn("flattnedinnerarrays", myUdf($"lvl_1".apply("_2")))
res66: org.apache.spark.sql.DataFrame = [lvl_1: array<struct<_1:string,_2:array<int>>>, flattnedinnerarrays: array<int>]
scala> res66.show(false)
+------------------------------------------------------------------+---------------------------------------------+
|lvl_1 |flattnedinnerarrays |
+------------------------------------------------------------------+---------------------------------------------+
|[[a, [2, 4, 6, 8, 10, 12]], [b, [3, 6, 9, 12]], [c, [1, 2, 3, 4]]]|[2, 4, 6, 8, 10, 12, 3, 6, 9, 12, 1, 2, 3, 4]|
|[[e, [4, 8, 12]], [f, [1, 3, 6]], [g, [3, 4, 5, 6]]] |[4, 8, 12, 1, 3, 6, 3, 4, 5, 6] |
+------------------------------------------------------------------+---------------------------------------------+
scala> res66.filter(array_contains($"flattnedinnerarrays", 10)).show(false)
+------------------------------------------------------------------+---------------------------------------------+
|lvl_1 |flattnedinnerarrays |
+------------------------------------------------------------------+---------------------------------------------+
|[[a, [2, 4, 6, 8, 10, 12]], [b, [3, 6, 9, 12]], [c, [1, 2, 3, 4]]]|[2, 4, 6, 8, 10, 12, 3, 6, 9, 12, 1, 2, 3, 4]|
+------------------------------------------------------------------+---------------------------------------------+
scala> res66.filter(array_contains($"flattnedinnerarrays", 3)).show(false)
+------------------------------------------------------------------+---------------------------------------------+
|lvl_1 |flattnedinnerarrays |
+------------------------------------------------------------------+---------------------------------------------+
|[[a, [2, 4, 6, 8, 10, 12]], [b, [3, 6, 9, 12]], [c, [1, 2, 3, 4]]]|[2, 4, 6, 8, 10, 12, 3, 6, 9, 12, 1, 2, 3, 4]|
|[[e, [4, 8, 12]], [f, [1, 3, 6]], [g, [3, 4, 5, 6]]] |[4, 8, 12, 1, 3, 6, 3, 4, 5, 6] |
+------------------------------------------------------------------+---------------------------------------------+