scala> sc.textFile("hdfs://hadoop21:9000/wc/").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
res10: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[29] at reduceByKey at <console>:25
scala> sc.setCheckpointDir("hdfs://hadoop21:9000/ck0001")
scala> val res = sc.textFile("hdfs://hadoop21:9000/wc/").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
res: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[34] at reduceByKey at <console>:24
scala> res.cache()
res12: res.type = ShuffledRDD[34] at reduceByKey at <console>:24
scala> res.checkpoint()
scala> res.collect
res14: Array[(String, Int)] = Array((tom,11), (hello,22), (jerry,6), (kitty,1), (hanmeimei,2), (lilei,2))
网友评论