本文共 959 字,大约阅读时间需要 3 分钟。
注意点
使用foreachPartition,减少了直接遍历rdd造成的多次创建JDBC连接开销
使用批量插入,代替单条插入
import org.apache.log4j.{Level, Logger}import org.apache.spark.{SparkConf, SparkContext}object Driver { def main(args: Array[String]): Unit = { Logger.getLogger("org").setLevel(Level.WARN) val conf = new SparkConf().setMaster("local[2]").setAppName("Driver") val ssc = new SparkContext(conf) val rdd = ssc.parallelize(List(1,2,3,4)) try{ rdd.foreachPartition(itor=>{ val url = "jdbc:mysql://hadoop:3306/driver" val con = DriverManager.getConnection(url,"root","1234") val sql = "insert into dr values(?)" val driver = con.prepareStatement(sql) itor.foreach(x=>{ driver.setInt(1,x) driver.addBatch() //driver.executeUpdate() }) driver.executeBatch() driver.close() con.close() }) }catch { case exception: Exception=>println(exception.getMessage) } ssc.stop() }}
转载地址:http://gxazi.baihongyu.com/