1.集合操作练习
//创建一个List
val lst0 = List(1,7,9,8,0,3,5,4,6,2)
//将lst0中每个元素乘以10后生成一个新的集合
lst0.map(x => x*10 ) OR lst0.map(_*10 )
//将lst0中的偶数取出来生成一个新的集合
lst0.filter(x => x%2==0)
lst0.filter(x => x%2==0).map(_*5)
//将lst0排序后生成一个新的集合
lst0.sorted
lst0.sortBy(x => x)
lst0.sortWith((x,y) => x<y) OR lst0.sortWith(_ < _)
//反转顺序
lst0.reverse
//将lst0中的元素4个一组,类型为Iterator[List[Int]]
lst0.grouped(4)
res5: Iterator[List[Int]] = non-empty iterator
//将Iterator转换成List
lst0.grouped(4).toList
res6: List[List[Int]] = List(List(0, 1, 2, 3), List(4, 5, 6, 7), List(8, 9))
//将多个list压扁成一个List
lst0.grouped(4).toList.flatten
res7: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
//遍历数组
val lst0 = List(1,7,9,8,0,3,5,4,6,2)
scala> for(i <-arr)println(i)
1
2
3
4
5
//map:遍历数组,返回值放到新的数组
scala> arr.map(x => println(x))//println方法转成函数
1
2
3
4
5
res5: Array[Unit] = Array((), (), (), (), ())
//foreach:遍历数组,返回值不放到新的数组
scala> arr.foreach(x=>println(x))
1
2
3
4
5
2.聚合
scala> val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
scala> arr.aggregate(0)(_+_.sum,_+_)
res14: Int = 20
scala> arr.aggregate(10)(_+_.sum,_+_)
res16: Int = 30
//4个par,10+10+10+10+20=60
scala> arr.par.aggregate(10)(_+_.sum,_+_)
res17: Int = 60
val l1 = List(5,6,4,7)
val l2 = List(1,2,3,4)
//求并集
scala> l1.union(l2)
res11: List[Int] = List(5, 6, 4, 7, 1, 2, 3, 4)
scala> l1 union l2
res10: List[Int] = List(5, 6, 4, 7, 1, 2, 3, 4)
//求交集
scala> l1.intersect(l2)
res12: List[Int] = List(4)
//求差集
scala> l1.diff(l2)
res13: List[Int] = List(5, 6, 7)
3.单机版WordCount
scala> val lines = List("hello tom hello jerry", "hello jerry", "hello kitty", "hello tom hello")
//先按空格切分,在压平
scala> lines.map(_.split(" "))
res9: List[Array[String]] = List(Array(hello, tom, hello, jerry), Array(hello, jerry), Array(hello, kitty), Array(hello, tom, hello))
scala> res9.flatten
res10: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty, hello, tom, hello)
//map和flatten操作一气呵成flatMap
scala> lines.flatMap(_.split(" "))
res11: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty, hello, tom, hello)
//按照(word,1)形式构建元祖
scala> res11.map((_,1))
res12: List[(String, Int)] = List((hello,1), (tom,1), (hello,1), (jerry,1), (hello,1), (jerry,1), (hello,1), (kitty,1), (hello,1), (tom,1), (hello,1))
//按照单词groupBy
scala> res12.groupBy(_._1)
res13: scala.collection.immutable.Map[String,List[(String, Int)]] = Map(tom -> List((tom,1), (tom,1)), kitty -> List((kitty,1)), jerry -> List((jerry,1), (jerry,1)), hello -> List((hello,1), (hello,1), (hello,1), (hello,1), (hello,1), (hello,1)))
scala> res13.map(_._1)
res14: scala.collection.immutable.Iterable[String] = List(tom, kitty, jerry, hello)
//错误
scala> res13.map(_._1,_._2.size)
<console>:12: error: too many arguments for method map: (f: ((String, List[(String, Int)])) => B)(implicit bf: scala.collection.generic.CanBuildFrom[scala.collection.immutable.Map[String,List[(String, Int)]],B,That])That
res13.map(_._1,_._2.size)
//正确
scala> res13.map(t => (t._1,t._2.size))
res16: scala.collection.immutable.Map[String,Int] = Map(tom -> 2, kitty -> 1, jerry -> 2, hello -> 6)
//错误,map没有sortBy方法
scala> res16.sortBy(_._2)
<console>:13: error: value sortBy is not a member of scala.collection.immutable.Map[String,Int]
res16.sortBy(_._2)
//先转成List
scala> res16.toList.sortBy(_._2)
res18: List[(String, Int)] = List((kitty,1), (tom,2), (jerry,2), (hello,6))
scala> res16.toList.sortBy(_._2).reverse
res19: List[(String, Int)] = List((hello,6), (jerry,2), (tom,2), (kitty,1))
4.mapValues对取出来的每一对K-V的V进行操作
scala> val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
scala> lines.flatMap(_.split(" " )).map((_,1)).groupBy(_._1).mapValues(_.size)
res22: scala.collection.immutable.Map[String,Int] = Map(tom -> 1, kitty -> 1, jerry -> 2, hello -> 4)
课题:对数据处理先局部后整体,每一个元祖里面的V,就不是1,对每一个V取size是不对的。
scala> val a = Array(1,2,3,4,5,6)
a: Array[Int] = Array(1, 2, 3, 4, 5, 6)
scala> a.sum
res23: Int = 21
//整体汇总
//实际上调用的是reduceLeft
scala> a.reduce(_ + _)
res24: Int = 21
//并行支持
scala> a.par
res25: scala.collection.parallel.mutable.ParArray[Int] = ParArray(1, 2, 3, 4, 5, 6)
scala> a.par.reduce(_ + _)
res26: Int = 21
//折叠:有初始值(无特定顺序)
//fold(default value)(_ + _)
scala> a.fold(10)(_ + _)
res27: Int = 31
//双核四线程的电脑环境,四个线程一起计算,21 + 10 *4
scala> a.par.fold(10)(_ + _)
res28: Int = 51
//折叠:有初始值(有特定顺序)
//foldLeft()(_ + _)
//foldRight()(_ + _)
scala> lines.flatMap(_.split(" " )).map((_,1)).groupBy(_._1)
res29: scala.collection.immutable.Map[String,List[(String, Int)]] = Map(tom -> List((tom,1)), kitty -> List((kitty,1)), jerry -> List((jerry,1), (jerry,1)), hello -> List((hello,1), (hello,1), (hello,1), (hello,1)))
scala> res29.mapValues(_.foldLeft(0)(_ + _._2))
res31: scala.collection.immutable.Map[String,Int] = Map(tom -> 1, kitty -> 1, jerry -> 2, hello -> 4)
网友评论