i have json file which looks like this:
{{"name":"jonh", "food":"tomato", "weight": 1},
{"name":"jonh", "food":"carrot", "weight": 4},
{"name":"bill", "food":"apple", "weight": 1},
{"name":"john", "food":"tomato", "weight": 2},
{"name":"bill", "food":"taco", "weight": 2}},
{"name":"bill", "food":"taco", "weight": 4}},
i need to create new json like this:
{
{"name":"jonh",
"buy": [{"tomato": 3},{"carrot": 4}]
},
{"name":"bill",
"buy": [{"apple": 1},{"taco": 6}]
}
}
this is my dataFrame
val df = Seq(
("john", "tomato", 1),
("john", "carrot", 4),
("bill", "apple", 1),
("john", "tomato", 2),
("bill", "taco", 2),
("bill", "taco", 4)
).toDF("name", "food", "weight")
how can i get dataframe with final structure? groupBy and agg gives me wrong structure
import org.apache.spark.sql.functions._
df.groupBy("name", "food").agg(sum("weight").as("weight"))
.groupBy("name").agg(collect_list(struct("food", "weight")).as("acc"))
+----+------------------------+
|name|acc |
+----+------------------------+
|john|[[carrot,4], [tomato,3]]|
|bill|[[taco,6], [apple,1]] |
+----+------------------------+
{"name":"john","acc":[{"food":"carrot","weight":4},{"food":"tomato","weight":3}]}
{"name":"bill","acc":[{"food":"taco","weight":6},{"food":"apple","weight":1}]}
please give me right direction how to solve it.