FP-Growth算法用代码实现:
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.fpm.FPGrowth
object FP {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("local").setAppName("FP") //设定名称
val sc = new SparkContext(conf) //创建环境变量实例
val data_path = "D:\\a\\b\\c\\abc.txt"
val data = sc.textFile(data_path)
val examples = data.map(_.split(" "))
val minSupport = 0.2
val model = new FPGrowth().setMinSupport(minSupport).run(examples) //打印结果
println(s"Number of frequent itemsets: ${model.freqItemsets.count()}")
//输出满足最小置信度的关联规则及置信度
model.generateAssociationRules(0.8).collect().foreach
{
rule =>
println("[" + rule.antecedent.mkString(",")
+ "=>"
+ rule.consequent.mkString(",") + "]," + rule.confidence)
}
//输出所有的频繁项
model.freqItemsets.collect().foreach {
itemset =>
println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
}
}
}
Apriori算法用代码实现:
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.fpm.AssociationRules
import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
object AssociationRule{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("ap")
val sc = new SparkContext(conf)
val freqItemsets = sc.parallelize(Seq(
new FreqItemset(Array("a"), 15L),
new FreqItemset(Array("b"), 35L),
new FreqItemset(Array("a", "b"), 12L)
))
val ar = new AssociationRules().setMinConfidence(0.8)
val results = ar.run(freqItemsets)
results.collect().foreach {
rule =>
println("[" + rule.antecedent.mkString(",")
+ "=>"
+ rule.consequent.mkString(",") + "]," + rule.confidence)
}
}
}