structured streaming 一个进程数据写入到mysq两张表中，出现第二张表阻塞

本文介绍了一种使用Spark Streaming从Socket接收数据，并将其转换为Weblog对象进行处理的方法。通过将数据分组并计数，实现了按时间和搜索名称的统计，最后将结果写入MySQL数据库。展示了实时数据处理与数据库集成的技术细节。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

object SocketToMysql {
  def main(args: Array[String]) {
    val spark = SparkSession.builder().master("local[4]").appName("ToMysql")
      .getOrCreate()

    val kafka = spark.readStream
      .format("socket")
      .option("host", "x.x.x.x")
      .option("port", 9999)
      .load()
    import spark.implicits._
    val msg = kafka.selectExpr("CAST(value as String)")
    val words = msg.as[String].map(_.split(",")).map(x => Weblog(x(0), x(1), x(2), x(3), x(4), x(5)))


    val datetime = words.groupBy("datetime").count().toDF("value", "count")

    val url = "jdbc:mysql://bigdata-pro04.mysql.com:3306/test"
    val use = "root"
    val password = "123456"


    val write1 = new JDBCSink(url, use, password, "test", "timecount", "time", "count")
    //

    val update1 = datetime.writeStream.foreach(write1).outputMode("update").option("truncate", "false").start() //后面的阻塞了没有执行


    val write = new JDBCSink(url, use, password, "test", "searchNameCount", "searchName", "count")
    val searchname = words.groupBy("searchname").count().toDF("value", "count")
    val update = searchname.writeStream.foreach(write).outputMode("update")
        .option("truncate", "false")
        .trigger(ProcessingTime("10 seconds"))
        .start()


    update.awaitTermination()
    update1.awaitTermination()

  }
}

控制台打印的日志：