def distinct(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope {
map(x => (x, null)).reduceByKey((x, y) => x, numPartitions).map(_._1)
}
一个分区就是也一个task
val conf: SparkConf = new SparkConf().setAppName("Trans").setMaster("local[*]")
val sc = new SparkContext(conf)
val numRDD: RDD[Int] = sc.makeRDD(List(1,2,1,1,1,1,2,2,23,3,4,5,6,6,7))
val distinctRDD: RDD[Int] = numRDD.distinct()
distinctRDD.collect().foreach(println)
4
1
5
6
2
23
3
7
网友评论