HanLP HMM 代码,包括概率计算(计算观测序列的条件概率)、学习(最有可能的模型参数)、预测问题(给定观测序列和模型参数,最有可能的状态序列)。
/*
* <author>Han He</author>
* <email>[email protected]</email>
* <create-date>2018-06-09 7:47 PM</create-date>
*
* <copyright file="HiddenMarkovModel.java">
* Copyright (c) 2018, Han He. All Rights Reserved, http://www.hankcs.com/
* This source is subject to Han He. Please contact Han He for more information.
* </copyright>
*/
package com.hankcs.hanlp.model.hmm;
import com.hankcs.hanlp.utility.MathUtility;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
/**
* @author hankcs
*/
public abstract class HiddenMarkovModel
{
/**
* 初始状态概率向量
*/
public float[] start_probability;
/**
* 观测概率矩阵
*/
public float[][] emission_probability;
/**
* 状态转移概率矩阵
*/
public float[][] transition_probability;
/**
* 构造隐马模型
*
* @param start_probability 初始状态概率向量
* @param transition_probability 状态转移概率矩阵
* @param emission_probability 观测概率矩阵
*/
public HiddenMarkovModel(float[] start_probability, float[][] transition_probability, float[][] emission_probability)
{
this.start_probability = (float[]) deepCopy(start_probability);
this.transition_probability = (float[][]) deepCopy(transition_probability);
this.emission_probability = (float[][]) deepCopy(emission_probability);
}
/**
* 对数概率转为累积分布函数
*
* @param log
* @return
*/
protected static double[] logToCdf(float[] log)
{
double[] cdf = new double[log.length];
cdf[0] = Math.exp(log[0]);
for (int i = 1; i < cdf.length - 1; i++)
{
cdf[i] = cdf[i - 1] + Math.exp(log[i]);
}
cdf[cdf.length - 1] = 1.0;
return cdf;
}
/**
* 对数概率转化为累积分布函数
*
* @param log
* @return
*/
protected static double[][] logToCdf(float[][] log)
{
double[][] cdf = new double[log.length][log[0].length];
for (int i = 0; i < log.length; i++)