Scala写法使用spark算子
object Top3GroupBykey {
def main(args: Array[String]):Unit = {
val conf: SparkConf =new SparkConf().setMaster("local[*]").setAppName("Top3")
val sc =new SparkContext(conf)
val rdd1: RDD[String] = sc.textFile("D:\\code\\Spark\\input\\1.txt")
val rdd2: RDD[String] = rdd1.flatMap(_.split(" "))
val rdd3: RDD[(String, Int)] = rdd2.map((_,1))
val rdd4: RDD[(String, Iterable[Int])] = rdd3.groupByKey()
val rdd5: RDD[(String, Int)] = rdd4.mapValues(date => {
date.size
})
val result: Array[(String, Int)] = rdd5.sortBy(_._2,false).take(3)
result.foreach(println)
}
}
网友评论