from
pyspark.sql.types
import
StructType, StructField, StringType, IntegerType
schema
=
StructType([
StructField(
"name"
, StringType()),
StructField(
"age"
, IntegerType()),
StructField(
"address"
, StructType([
StructField(
"street"
, StringType()),
StructField(
"city"
, StringType()),
StructField(
"zip"
, IntegerType())
]))
])
data
=
[(
"Alice"
,
25
, {
"street"
:
"Main St"
,
"city"
:
"Anytown"
,
"zip"
:
12345
}),
(
"Bob"
,
30
, {
"street"
:
"Park Ave"
,
"city"
:
"New York"
,
"zip"
:
56789
})]
df
=
spark.createDataFrame(data, schema)
df.show()
df.printSchema()