import
pyspark
from
pyspark.sql.functions
import
lit
from
pyspark.sql
import
SparkSession
spark
=
SparkSession.builder.appName(
'sparkdf'
).getOrCreate()
data
=
[[
"1"
,
"sravan"
,
"kakumanu"
],
[
"2"
,
"ojaswi"
,
"hyd"
],
[
"3"
,
"rohith"
,
"delhi"
],
[
"4"
,
"sridevi"
,
"kakumanu"
],
[
"5"
,
"bobby"
,
"guntur"
]]
columns
=
[
'ID'
,
'NAME'
,
'Address'
]
dataframe1
=
spark.createDataFrame(data, columns)
data
=
[[
"1"
,
23
],
[
"2"
,
21
],
[
"3"
,
32
],
]
columns
=
[
'ID'
,
'Age'
]
dataframe2
=
spark.createDataFrame(data, columns)
for
column
in
[column
for
column
in
dataframe2.columns
if
column
not
in
dataframe1.columns]:
dataframe1
=
dataframe1.withColumn(column, lit(
None
))
for
column
in
[column
for
column
in
dataframe1.columns
if
column
not
in
dataframe2.columns]:
dataframe2
=
dataframe2.withColumn(column, lit(
None
))
print
(dataframe1.columns)
print
(dataframe2.columns)