提供的代码应被视为是草图,而不是权威的文章。我不是SAX方面的专家,可以改进实现以获得更好的性能,更简单的代码等。这表示SAX应该能够处理流较大的XML文件。
我将使用SAX解析器通过2次传递来解决此问题。(顺便说一句,我还将使用CSV生成库来创建输出,因为这将处理CSV涉及的所有巧妙的字符转义,但我并未在草图中实现)。
第一遍: 建立标题列数
第二次通过: 输出CSV
我认为XML文件格式正确。我假设我们没有预定义顺序的方案/ DTD。
在第一遍中,我假设将为包含文本内容的每个XML元素或任何属性添加CSV列(我假设属性将包含某些内容!)。
确定目标列数的第二遍将执行实际的CSV输出。
根据您的示例XML,我的代码草图将产生:
ItemID,StartTime,EndTime,ViewItemURL,AverageTime,category,category,type,type,AveragePrice4504216603,10:00:10.000Z,10:00:30.000Z,http://url,,,,,,4504216604,10:30:10.000Z,11:00:10.000Z,http://url,value1,9823,9112,TX,TY,value2
请注意,我使用了Google集合linkedHashMultimap,因为这在将多个值与单个键关联时非常有用。希望这个对你有帮助!
import com.google.common.collect.linkedHashMultimap;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.util.linkedHashMap;import java.util.Map.Entry;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;public class App { public static void main(String[] args) throws SAXException, FileNotFoundException, IOException { // First pass - to determine headers XMLReader xr = XMLReaderFactory.createXMLReader(); HeaderHandler handler = new HeaderHandler(); xr.setContentHandler(handler); xr.setErrorHandler(handler); FileReader r = new FileReader("test1.xml"); xr.parse(new InputSource(r)); linkedHashMap<String, Integer> headers = handler.getHeaders(); int totalnumberofcolumns = 0; for (int headercount : headers.values()) { totalnumberofcolumns += headercount; } String[] columnheaders = new String[totalnumberofcolumns]; int i = 0; for (Entry<String, Integer> entry : headers.entrySet()) { for (int j = 0; j < entry.getValue(); j++) { columnheaders[i] = entry.getKey(); i++; } } StringBuilder sb = new StringBuilder(); for (String h : columnheaders) { sb.append(h); sb.append(','); } System.out.println(sb.substring(0, sb.length() - 1)); // Second pass - collect and output data xr = XMLReaderFactory.createXMLReader(); DataHandler datahandler = new DataHandler(); datahandler.setHeaderArray(columnheaders); xr.setContentHandler(datahandler); xr.setErrorHandler(datahandler); r = new FileReader("test1.xml"); xr.parse(new InputSource(r)); } public static class HeaderHandler extends DefaultHandler { private String content; private String currentElement; private boolean insideElement = false; private Attributes attribs; private linkedHashMap<String, Integer> itemHeader; private linkedHashMap<String, Integer> accumulativeHeader = new linkedHashMap<String, Integer>(); public HeaderHandler() { super(); } private linkedHashMap<String, Integer> getHeaders() { return accumulativeHeader; } private void addItemHeader(String headerName) { if (itemHeader.containsKey(headerName)) { itemHeader.put(headerName, itemHeader.get(headerName) + 1); } else { itemHeader.put(headerName, 1); } } @Override public void startElement(String uri, String name, String qName, Attributes atts) { if ("item".equalsIgnoreCase(qName)) { itemHeader = new linkedHashMap<String, Integer>(); } currentElement = qName; content = null; insideElement = true; attribs = atts; } @Override public void endElement(String uri, String name, String qName) { if (!"item".equalsIgnoreCase(qName) && !"root".equalsIgnoreCase(qName)) { if (content != null && qName.equals(currentElement) && content.trim().length() > 0) { addItemHeader(qName); } if (attribs != null) { int attsLength = attribs.getLength(); if (attsLength > 0) { for (int i = 0; i < attsLength; i++) { String attName = attribs.getLocalName(i); addItemHeader(attName); } } } } if ("item".equalsIgnoreCase(qName)) { for (Entry<String, Integer> entry : itemHeader.entrySet()) { String headerName = entry.getKey(); Integer count = entry.getValue(); //System.out.println(entry.getKey() + ":" + entry.getValue()); if (accumulativeHeader.containsKey(headerName)) { if (count > accumulativeHeader.get(headerName)) { accumulativeHeader.put(headerName, count); } } else { accumulativeHeader.put(headerName, count); } } } insideElement = false; currentElement = null; attribs = null; } @Override public void characters(char ch[], int start, int length) { if (insideElement) { content = new String(ch, start, length); } } } public static class DataHandler extends DefaultHandler { private String content; private String currentElement; private boolean insideElement = false; private Attributes attribs; private linkedHashMultimap dataMap; private String[] headerArray; public DataHandler() { super(); } @Override public void startElement(String uri, String name, String qName, Attributes atts) { if ("item".equalsIgnoreCase(qName)) { dataMap = linkedHashMultimap.create(); } currentElement = qName; content = null; insideElement = true; attribs = atts; } @Override public void endElement(String uri, String name, String qName) { if (!"item".equalsIgnoreCase(qName) && !"root".equalsIgnoreCase(qName)) { if (content != null && qName.equals(currentElement) && content.trim().length() > 0) { dataMap.put(qName, content); } if (attribs != null) { int attsLength = attribs.getLength(); if (attsLength > 0) { for (int i = 0; i < attsLength; i++) { String attName = attribs.getLocalName(i); dataMap.put(attName, attribs.getValue(i)); } } } } if ("item".equalsIgnoreCase(qName)) { String data[] = new String[headerArray.length]; int i = 0; for (String h : headerArray) { if (dataMap.containsKey(h)) { Object[] values = dataMap.get(h).toArray(); data[i] = (String) values[0]; if (values.length > 1) { dataMap.removeAll(h); for (int j = 1; j < values.length; j++) { dataMap.put(h, values[j]); } } else { dataMap.removeAll(h); } } else { data[i] = ""; } i++; } StringBuilder sb = new StringBuilder(); for (String d : data) { sb.append(d); sb.append(','); } System.out.println(sb.substring(0, sb.length() - 1)); } insideElement = false; currentElement = null; attribs = null; } @Override public void characters(char ch[], int start, int length) { if (insideElement) { content = new String(ch, start, length); } } public void setHeaderArray(String[] headerArray) { this.headerArray = headerArray; } }}


