from pyspark.sql import SparkSession
from pyspark.sql.functions import col
spark = SparkSession.builder.getOrCreate()
simpleData = [("James", "Sales", "NY", 90000, 34, 10000),
("Michael", "Sales", "NY", 86000, 56, 20000),
("Robert", "Sales", "CA", 81000, 30, 23000),
("Maria", "Finance", "CA", 90000, 24, 23000),
("Raman", "Finance", "CA", 99000, 40, 24000),
("Scott", "Finance", "NY", 83000, 36, 19000),
("Jen", "Finance", "NY", 79000, 53, 15000),
("Jeff", "Marketing", "CA", 80000, 25, 18000),
("Kumar", "Marketing", "NY", 91000, 50, 21000)
]
schema = ["employee_name", "department",
"state", "salary", "age", "bonus"]
df = spark.createDataFrame(data=simpleData, schema=schema)
data = df.groupBy("department").count() \
.select(col("department").alias("name"), col("count").alias("value")) \
.toJSON().collect()
print(data)
spark.stop()
When I ran the code it gives an array string:
[
'{"name":"Sales","value":3}',
'{"name":"Finance","value":4}',
'{"name":"Marketing","value":2}'
]
but I don't want an array string, I want an array object to send to frontend
[
{"name":"Sales","value":3},
{"name":"Finance","value":4},
{"name":"Marketing","value":2}
]
Can anyone help me?
.collectbefore calling.toJSON?AttributeError: 'list' object has no attribute 'toJSON'