Spark 自定义排序
第一种
package day02
import org.apache.spark.{SparkConf, SparkContext}
//sort=>规则 先按照faveValue 如果faveValue相等再比较age
//name faveValue age
object User_D_Sort {
def main(args: Array[String]): Unit = {
val conf=new SparkConf().setAppName("UrlCount").setMaster("local[2]")
val sc=new SparkContext(conf)
val rdd1 = sc.parallelize(List(("yuihatano", 90, 28, 1), ("angelababy", 90, 27, 2),("JuJingYi", 95, 22, 3)))
//val rdd2=rdd1.sortBy(x=>Girl(x._2,x._3),false)
val rdd2=rdd1.sortBy(x=>Girl(x._2,x._3),true)
println(rdd2.collect().toBuffer)
}
}
//第一种方式
case class Girl(val favevalue:Int,val age:Int) extends Ordered[Girl] with Serializable{
override def compare(that: Girl) : Int={
if(this.favevalue==that.favevalue){
that.age-this.age
}else{
this.favevalue-that.favevalue
}
}
}
第二种
import org.apache.spark.{SparkConf, SparkContext}
object OrderContext {
implicit val girlOrdering = new Ordering[Girl] {
override def compare(x: Girl, y: Girl): Int = {
if(x.faceValue > y.faceValue) 1
else if (x.faceValue == y.faceValue) {
if(x.age > y.age) -1 else 1
} else -1
}
}
}
//sort =>规则 先按faveValue,比较年龄
//name,faveValue,age
object CustomSort {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("CustomSort").setMaster("local[2]")
val sc = new SparkContext(conf)
val rdd1 = sc.parallelize(List(("yuihatano", 90, 28, 1), ("angelababy", 90, 27, 2),("JuJingYi", 95, 22, 3)))
import OrderContext._
val rdd2 = rdd1.sortBy(x => Girl(x._2, x._3), false)
println(rdd2.collect().toBuffer)
sc.stop()
}
}
/**
* 第二种,通过隐式转换完成排序
/
case class Girl(faceValue: Int, age: Int) extends Serializable