package fptree;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
public class FPTree {
private int minSup; // 最小支持度
private static int sum = 0;
public int getMinSup() {
return minSup;
}
public void setMinSup(int minSup) {
this.minSup = minSup;
}
// public static void constructF1(TreeNode node, List<String> l){
// List<TreeNode> children = node.getChildren();
// if (children != null && children.size() > 0){
// for(TreeNode child : children){
// if(children.size() >= 2&& node.getName()!= null){
// List<String> templ = new ArrayList<String>(l);
// templ.add(child.getName());
// constructF1(child, templ);
// } else {
// l.add(child.getName());
// constructF1(child,l);
// }
// }
// } else {
// for(int i =0; i < l.size(); i++)
// {
// System.out.print(l.get(i) + " ");
// }
// System.out.println("\n");
// }
// }
/**
* 1.读入事务记录
*
* @param filenames
* @return
*/
public List<List<String>> readTransData(String... filenames) {
List<List<String>> records = new LinkedList<List<String>>();
List<String> record;
// 从文件读入
if (filenames.length > 0) {
for (String filename : filenames) {
try {
FileReader fr = new FileReader(new File(filename));
BufferedReader br = new BufferedReader(fr);
String line = null;
while ((line = br.readLine()) != null) {
if (line.trim() != "") {
record = new LinkedList<String>();
String[] items = line.split("[,|,]");
for (String item : items) {
record.add(item);
}
records.add(record);
}
}
} catch (IOException e) {
System.out.println("读取事务数据库失败。");
System.exit(-2);
}
}
}
// 直接在代码里指定
else {
record = new LinkedList<String>();
String[] trans = new String[] { "f", "a", "c", "d", "g", "i", "m",
"p" };
for (String t : trans)
record.add(t);
records.add(record);
record = new LinkedList<String>();
trans = new String[] { "a", "b", "c", "f", "l", "m", "o" };
for (String t : trans)
record.add(t);
records.add(record);
record = new LinkedList<String>();
trans = new String[] { "b", "f", "h", "j", "o" };
for (String t : trans)
record.add(t);
records.add(record);
record = new LinkedList<String>();
trans = new String[] { "b", "c", "k", "s", "p" };
for (String t : trans)
record.add(t);
records.add(record);
record = new LinkedList<String>();
trans = new String[] { "a", "f", "c", "e", "l", "p", "m", "n" };
for (String t : trans)
record.add(t);
records.add(record);
}
return records;
}
/**
* 2.构造频繁1项集
*
* @param transRecords
* @return
*/
public ArrayList<TreeNode> buildF1Items(List<List<String>> transRecords) {
ArrayList<TreeNode> F1 = null;
if (transRecords.size() > 0) {
F1 = new ArrayList<TreeNode>();
Map<String, TreeNode> map = new HashMap<String, TreeNode>();
// 计算事务数据库中各项的支持度
for (List<String> record : transRecords) {
for (String item : record) {
if (!map.keySet().contains(item)) {
TreeNode node = new TreeNode(item);
node.setCount(1);
map.put(item, node);
} else {
map.get(item).countIncrement(1);
}
}
}
// 把支持度大于(或等于)minSup的项加入到F1中
Set<String> names = map.keySet();
for (String name : names) {
TreeNode tnode = map.get(name);
if (tnode.getCount() >= minSup) {
F1.add(tnode);
}
}
Collections.sort(F1);
return F1;
} else {
return null;
}
}
/**
* 3.建立FP-Tree
*
* @param transRecords
* @param F1
* @return
*/
public TreeNode buildFPTree(List<List<String>> transRecords,
ArrayList<TreeNode> F1) {
TreeNode root = new TreeNode(); // 创建树的根节点
for (List<String> transRecord : transRecords) {
LinkedList<String> record = sortByF1(transRecord, F1);
TreeNode subTreeRoot = root;
TreeNode tmpRoot = null;
if (root.getChildren() != null) {
while (!record.isEmpty()
&& (tmpRoot = subTreeRoot.findChild(record.peek())) != null) {
tmpRoot.countIncrement(1);
subTreeRoot = tmpRoot;
record.poll();
}
}
addNodes(subTreeRoot, record, F1);
}
return root;
}
/**
* 3.1把事务数据库中的一条记录按照F1(频繁1项集)中的顺序排序
*
* @param transRecord
* @param F1
* @return
*/
public LinkedList<String> sortByF1(List<String> transRecord,
ArrayList<TreeNode> F1) {
Map<String, Integer> map = new HashMap<String, Integer>();
for (String item : transRecord) {
// 由于F1已经是按降序排列的,
for (int i = 0; i < F1.size(); i++) {
TreeNode tnode = F1.get(i);
if (tnode.getName().equals(item)) {
map.put(item, i);
}
}
}
ArrayList<Entry<String, Integer>> al = new ArrayList<Entry<String, Integer>>(
map.entrySet());
Collections.sort(al, new Comparator<Map.Entry<String, Integer>>() {
// @Override
public int compare(Entry<String, Integer> arg0,Entry<String, Integer> arg1) {
// 降序排列
return arg0.getValue() - arg1.getValue();
}
});
LinkedList<String> rest = new LinkedList<String>();
for (Entry<String, Integer> entry : al) {
rest.add(entry.getKey());
}
return rest;
}
/**
* 3.2 把若干个节点作为指定指定节点的后代插入树中
*
* @param ancestor
* @param record
* @param F1
*/
public void addNodes(TreeNode ancestor, LinkedList<String> record,
ArrayList<TreeNode> F1) {
if (record.size() > 0) {
while (record.size() > 0) {
String item = record.poll();
TreeNode leafnode = new TreeNode(item);
leafnode.setCount(1);