Java代码
package org.axman.xml.regex;
import java.util.regex.*;
import java.util.*;
public class Document {
private String xmlString;
public Document(String xmlString) throws IllegalArgumentException{
if(xmlString == null || xmlString.length() == 0)
throw new IllegalArgumentException("Input string orrer!");
this.xmlString = xmlString;
}
public String[] getElementsByTag(String tagName){
Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
Matcher m = p.matcher(this.xmlString);
ArrayList<String> al = new ArrayList<String>();
while(m.find())
al.add(m.group());
String[] arr = al.toArray(new String[al.size()]);
al.clear();
return arr;
}
public String getElementBySinglePath(String singlePath){
String[] path = singlePath.split("#");
String lastTag = path[path.length-1];
String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
//最后一个元素,可能是<x>v</x>形式或<x/>形式
for(int i=path.length-2;i >=0;i--){
lastTag = path[i];
tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
}
Pattern p = Pattern.compile(tmp);
Matcher m = p.matcher(this.xmlString);
if(m.find()){
return m.group(1);
}
return "";
}
public String getElementByMultiPath(String singlePath){
try{
String[] path = singlePath.split("#");
String input = this.xmlString;
String[] ele = null;
for (int i = 0; i < path.length; i++) {
Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
Matcher m = p.matcher(path[i]);
if (m.find()) {
String tagName = m.group(1);
System.out.println(input + "----" + tagName);
int index = (m.group(3) == null) ? 0 :
new Integer(m.group(3)).intValue();
ele = getElementsByTag(input, tagName);
input = ele[index];
}
}
return input;
}catch(Exception e){
return null;
}
}
public static String[] getElementsByTag(String parentElementString,String tagName){
Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
Matcher m = p.matcher(parentElementString);
ArrayList<String> al = new ArrayList<String>();
while(m.find())
al.add(m.group());
String[] arr = al.toArray(new String[al.size()]);
al.clear();
return arr;
}
public static String getElementBySinglePath(String parentElementString,String singlePath){
String[] path = singlePath.split("#");
String lastTag = path[path.length-1];
String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
//最后一个元素,可能是<x>v</x>形式或<x/>形式
for(int i=path.length-2;i >=0;i--){
lastTag = path[i];
tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
}
Pattern p = Pattern.compile(tmp);
Matcher m = p.matcher(parentElementString);
if(m.find()){
return m.group(1);
}
return "";
}
public static String getElementByMultiPath(String parentElementString,String singlePath){
try{
String[] path = singlePath.split("#");
String input = parentElementString;
String[] ele = null;
for (int i = 0; i < path.length; i++) {
Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
Matcher m = p.matcher(path[i]);
if (m.find()) {
String tagName = m.group(1);
int index = (m.group(3) == null) ? 0 :
new Integer(m.group(3)).intValue();
ele = getElementsByTag(input, tagName);
input = ele[index];
}
}
return input;
}catch(Exception e){
return null;
}
}
public HashMap<String,String> getAttributes(String elementString){
HashMap hm = new HashMap<String,String>();
Pattern p = Pattern.compile("<[^>]+>");
Matcher m = p.matcher(elementString);
String tmp = m.find()?m.group():"";
p = Pattern.compile("(\\w+)\\s*=\\s*"([^"]+)"");
m = p.matcher(tmp);
while(m.find()){
hm.put(m.group(1).trim(),m.group(2).trim());
}
return hm;
}
public static String getAttribute(String elementString,String attributeName){
HashMap hm = new HashMap<String,String>();
Pattern p = Pattern.compile("<[^>]+>");
Matcher m = p.matcher(elementString);
String tmp = m.find()?m.group():"";
p = Pattern.compile("(\\w+)\\s*=\\s*"([^"]+)"");
m = p.matcher(tmp);
while(m.find()){
if(m.group(1).trim().equals(attributeName))
return m.group(2).trim();
}
return "";
}
public static String getElementText(String elementString){
Pattern p = Pattern.compile(">([^<>]*)<");
Matcher m = p.matcher(elementString);
if(m.find()){
return m.group(1);
}
return "";
}
public static void main(String[] args){
new Document("<ROOT>sss <PARENT>sss <CHILD>aaaa</CHILD>ss </PARENT>sss </ROOT>").getElementByMultiPath("ROOT[0]#PARENT#CHILD");
//System.out.println(child);
}
}
本文来自CSDN博客,转载请标明出处:http://blog.csdn.net/axman/archive/2005/07/11/420910.aspx
package org.axman.xml.regex;
import java.util.regex.*;
import java.util.*;
public class Document {
private String xmlString;
public Document(String xmlString) throws IllegalArgumentException{
if(xmlString == null || xmlString.length() == 0)
throw new IllegalArgumentException("Input string orrer!");
this.xmlString = xmlString;
}
public String[] getElementsByTag(String tagName){
Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
Matcher m = p.matcher(this.xmlString);
ArrayList<String> al = new ArrayList<String>();
while(m.find())
al.add(m.group());
String[] arr = al.toArray(new String[al.size()]);
al.clear();
return arr;
}
public String getElementBySinglePath(String singlePath){
String[] path = singlePath.split("#");
String lastTag = path[path.length-1];
String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
//最后一个元素,可能是<x>v</x>形式或<x/>形式
for(int i=path.length-2;i >=0;i--){
lastTag = path[i];
tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
}
Pattern p = Pattern.compile(tmp);
Matcher m = p.matcher(this.xmlString);
if(m.find()){
return m.group(1);
}
return "";
}
public String getElementByMultiPath(String singlePath){
try{
String[] path = singlePath.split("#");
String input = this.xmlString;
String[] ele = null;
for (int i = 0; i < path.length; i++) {
Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
Matcher m = p.matcher(path[i]);
if (m.find()) {
String tagName = m.group(1);
System.out.println(input + "----" + tagName);
int index = (m.group(3) == null) ? 0 :
new Integer(m.group(3)).intValue();
ele = getElementsByTag(input, tagName);
input = ele[index];
}
}
return input;
}catch(Exception e){
return null;
}
}
public static String[] getElementsByTag(String parentElementString,String tagName){
Pattern p = Pattern.compile("<"+tagName+"[^>]*?((>.*?</"+tagName+">)|(/>))");
Matcher m = p.matcher(parentElementString);
ArrayList<String> al = new ArrayList<String>();
while(m.find())
al.add(m.group());
String[] arr = al.toArray(new String[al.size()]);
al.clear();
return arr;
}
public static String getElementBySinglePath(String parentElementString,String singlePath){
String[] path = singlePath.split("#");
String lastTag = path[path.length-1];
String tmp = "(<"+lastTag+"[^>]*?((>.*?</"+lastTag+">)|(/>)))";
//最后一个元素,可能是<x>v</x>形式或<x/>形式
for(int i=path.length-2;i >=0;i--){
lastTag = path[i];
tmp = "<"+lastTag+">.*"+tmp + ".*</"+lastTag+">";
}
Pattern p = Pattern.compile(tmp);
Matcher m = p.matcher(parentElementString);
if(m.find()){
return m.group(1);
}
return "";
}
public static String getElementByMultiPath(String parentElementString,String singlePath){
try{
String[] path = singlePath.split("#");
String input = parentElementString;
String[] ele = null;
for (int i = 0; i < path.length; i++) {
Pattern p = Pattern.compile("(\\w+)(\\[(\\d+)\\])?");
Matcher m = p.matcher(path[i]);
if (m.find()) {
String tagName = m.group(1);
int index = (m.group(3) == null) ? 0 :
new Integer(m.group(3)).intValue();
ele = getElementsByTag(input, tagName);
input = ele[index];
}
}
return input;
}catch(Exception e){
return null;
}
}
public HashMap<String,String> getAttributes(String elementString){
HashMap hm = new HashMap<String,String>();
Pattern p = Pattern.compile("<[^>]+>");
Matcher m = p.matcher(elementString);
String tmp = m.find()?m.group():"";
p = Pattern.compile("(\\w+)\\s*=\\s*"([^"]+)"");
m = p.matcher(tmp);
while(m.find()){
hm.put(m.group(1).trim(),m.group(2).trim());
}
return hm;
}
public static String getAttribute(String elementString,String attributeName){
HashMap hm = new HashMap<String,String>();
Pattern p = Pattern.compile("<[^>]+>");
Matcher m = p.matcher(elementString);
String tmp = m.find()?m.group():"";
p = Pattern.compile("(\\w+)\\s*=\\s*"([^"]+)"");
m = p.matcher(tmp);
while(m.find()){
if(m.group(1).trim().equals(attributeName))
return m.group(2).trim();
}
return "";
}
public static String getElementText(String elementString){
Pattern p = Pattern.compile(">([^<>]*)<");
Matcher m = p.matcher(elementString);
if(m.find()){
return m.group(1);
}
return "";
}
public static void main(String[] args){
new Document("<ROOT>sss <PARENT>sss <CHILD>aaaa</CHILD>ss </PARENT>sss </ROOT>").getElementByMultiPath("ROOT[0]#PARENT#CHILD");
//System.out.println(child);
}
}
本文来自CSDN博客,转载请标明出处:http://blog.csdn.net/axman/archive/2005/07/11/420910.aspx