解决加载的数据乱码
object test {
def main(args: Array[String]): Unit = {
val inputpath = "path"
val inputpath = "/input/123"
val sparkConf: SparkConf = new SparkConf()
.setMaster("local[2]")
.setAppName(test .getClass.getSimpleName)
val sc: SparkContext = new SparkContext(sparkConf)
// TODO:解决中文乱码
val data: RDD[String] = sc.hadoopFile(inputpath, classOf[TextInputFormat], classOf[LongWritable], classOf[Text])
.map(pair => {new String(pair._2.getBytes, 0, pair._2.getLength, "GBK")})
val line: RDD[String] = data.flatMap(_.split("\t"))
val pari: RDD[(String, Int)] = line.map((_, 1))
val result: RDD[(String, Int)] = pari.reduceByKey(_+_)
result.foreach(println)
}
}
网友评论