
Spark基础算子flatmap的使用
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object RDD算子_flatmap {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[*]")
conf.setAppName("集合映射RDD")
val sc = new SparkContext(conf)
val rdd: RDD[String] = sc.parallelize(Seq("a b c", "b b c", "c c p"))
val rdd1: RDD[Array[String]] = rdd.map(s => s.split("\s+"))
rdd1.foreach(arr=>println(arr.mkString("[",",","]")))
println("--------------华丽的分割线-------------")
val rdd2: RDD[String] = rdd.flatMap(s => s.split("\s+"))
rdd2.foreach(println)
println("--------------华丽的分割线-------------")
val rdd3: RDD[Array[Int]] = sc.parallelize(Seq(
Array(1,2,3),
Array(3,4,5),
Array(2,6,8)))
// 压平
val rdd4: RDD[Int] = rdd3.flatMap(arr=>arr)
println("--------------华丽的分割线-------------")
val rdd5 : RDD[List[Array[Int]]]= sc.parallelize(Seq(
List(Array(1,2),Array(2,3)),
List(Array(2,2),Array(4,3)),
List(Array(10,2),Array(2,30)),
List(Array(51,2),Array(6,3)),
))
val rdd6 :RDD[Array[Int]]= rdd5.flatMap(lst=>lst)
val rdd7 :RDD[Int]= rdd6.flatMap(arr=>arr)
rdd7.foreach(println)
println("--------------华丽的分割线-------------")
val rdd8 = sc.parallelize(Seq(
List(Map("a"->1,"b"->2),Map("c"->3)),
List(Map("d"->6,"c"->4),Map("x"->2))
))
val rdd9:RDD[Map[String,Int]] = rdd8.flatMap(lst=>lst)
sc.stop()
}
}
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)