获取文字或者段落的拼音算法
public class HanDict {
public static final char HAN_MIN = '一';
public static final char HAN_MAX = '龥';
public static final String[] HAN_DATA = new String[HAN_MAX - HAN_MIN + 1];
private static final String HAN_DATA_FILE = "远程文档地址";
private static final Charset FILE_CHARSET = Charset.forName("utf-8");
private static final int INDEX_PY = 0;
private static final int INDEX_BS = 1;
private static final int INDEX_BH = 2;
private static final int INDEX_PY_HAN = 0;
private static final int INDEX_PY_EN = 1;
static {
try {
loadHanData();
} catch (IOException e) {
System.err.println("载入汉字数据错误:" + e.getMessage());
}
}
public static String getBH(String str) {
if (str == null || str.isEmpty()) {
return "";
}
return getBH(str.charAt(0));
}
public static String getBH(char ch) {
if (isHan(ch)) {
return HAN_DATA[ch - HAN_MIN].split("\\|")[INDEX_BH];
}
return "";
}
public static String getBS(String str) {
if (str == null || str.isEmpty()) {
return "";
}
return getBS(str.charAt(0));
}
public static String getBS(char ch) {
if (isHan(ch)) {
return HAN_DATA[ch - HAN_MIN].split("\\|")[INDEX_BS];
}
return "";
}
public static List<String> getPY(char ch, boolean useHanFormat) {
List<String> list = new ArrayList<String>();
if (isHan(ch)) {
int i = useHanFormat ? INDEX_PY_HAN : INDEX_PY_EN;
String pyStr = HAN_DATA[ch - HAN_MIN].split("\\|")[INDEX_PY];
for (String py : pyStr.split(";")) {
list.add(py.split(",")[i]);
}
}
return list;
}
public static String getPY(String str, boolean useHanFormat) {
if (str == null) {
return "";
}
boolean lastBlank = true;
StringBuffer sb = new StringBuffer();
for (char ch : str.toCharArray()) {
if (isHan(ch)) {
List<String> pyList = getPY(ch, useHanFormat);
if (!pyList.isEmpty()) {
if (!lastBlank) {
sb.append(" ");
}
sb.append(pyList.get(0)).append(' ');
lastBlank = true;
}
} else {
sb.append(ch);
lastBlank = false;
}
}
return sb.toString();
}
private static boolean isHan(char ch) {
if (ch >= HAN_MIN && ch <= HAN_MAX) {
return true;
}
return false;
}
private static void loadHanData() throws IOException {
URL url = new URL(HAN_DATA_FILE);
InputStream in = url.openStream();
if (in == null) {
throw new IOException(HAN_DATA_FILE + "汉字数据文件不存在!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(in, FILE_CHARSET));
String line = null;
int index = 0;
while ((line = br.readLine()) != null) {
HAN_DATA[index++] = line;
}
} finally {
if (in != null) {
in.close();
}
}
}
public static void main(String[] args) {
char ch = '法';
System.out.println(ch + "的拼音(中式注音)为:" + HanDict.getPY(ch, true));
System.out.println(ch + "的拼音(英式注音)为:" + HanDict.getPY(ch, false));
System.out.println(ch + "的部首为 :" + HanDict.getBS(ch));
System.out.println(ch + "的部首笔画为 :" + HanDict.getBH(HanDict.getBS(ch)));
System.out.println(ch + "的笔画顺序为 :" + HanDict.getBH(ch));
System.out.println(ch + "的笔画数为 :" + HanDict.getBH(ch).length());
System.out.println();
ch = '大';
System.out.println(ch + "的拼音(中式注音)为:" + HanDict.getPY(ch, true));
System.out.println(ch + "的拼音(英式注音)为:" + HanDict.getPY(ch, false));
System.out.println(ch + "的部首为 :" + HanDict.getBS(ch));
System.out.println(ch + "的部首笔画为 :" + HanDict.getBH(HanDict.getBS(ch)));
System.out.println(ch + "的笔画顺序为 :" + HanDict.getBH(ch));
System.out.println(ch + "的笔画数为 :" + HanDict.getBH(ch).length());
System.out.println();
String str = "今年的收入为123万。";
System.out.println(str + " 的拼音(中式)为:" + getPY(str, true));
System.out.println(str + " 的拼音(英式)为:" + getPY(str, false));
}
}
