栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 面试经验 > 面试问答

在Java中将XML文件转换为CSV

面试问答 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

在Java中将XML文件转换为CSV

提供的代码应被视为是草图,而不是权威的文章。我不是SAX方面的专家,可以改进实现以获得更好的性能,更简单的代码等。这表示SAX应该能够处理流较大的XML文件。

我将使用SAX解析器通过2次传递来解决此问题。(顺便说一句,我还将使用CSV生成库来创建输出,因为这将处理CSV涉及的所有巧妙的字符转义,但我并未在草图中实现)。

第一遍: 建立标题列数

第二次通过: 输出CSV

我认为XML文件格式正确。我假设我们没有预定义顺序的方案/ DTD。

在第一遍中,我假设将为包含文本内容的每个XML元素或任何属性添加CSV列(我假设属性将包含某些内容!)。

确定目标列数的第二遍将执行实际的CSV输出。

根据您的示例XML,我的代码草图将产生:

ItemID,StartTime,EndTime,ViewItemURL,AverageTime,category,category,type,type,AveragePrice4504216603,10:00:10.000Z,10:00:30.000Z,http://url,,,,,,4504216604,10:30:10.000Z,11:00:10.000Z,http://url,value1,9823,9112,TX,TY,value2

请注意,我使用了Google集合linkedHashMultimap,因为这在将多个值与单个键关联时非常有用。希望这个对你有帮助!

import com.google.common.collect.linkedHashMultimap;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.util.linkedHashMap;import java.util.Map.Entry;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;public class App {    public static void main(String[] args) throws SAXException, FileNotFoundException, IOException {        // First pass - to determine headers        XMLReader xr = XMLReaderFactory.createXMLReader();        HeaderHandler handler = new HeaderHandler();        xr.setContentHandler(handler);        xr.setErrorHandler(handler);        FileReader r = new FileReader("test1.xml");        xr.parse(new InputSource(r));        linkedHashMap<String, Integer> headers = handler.getHeaders();        int totalnumberofcolumns = 0;        for (int headercount : headers.values()) { totalnumberofcolumns += headercount;        }        String[] columnheaders = new String[totalnumberofcolumns];        int i = 0;        for (Entry<String, Integer> entry : headers.entrySet()) { for (int j = 0; j < entry.getValue(); j++) {     columnheaders[i] = entry.getKey();     i++; }        }        StringBuilder sb = new StringBuilder();        for (String h : columnheaders) { sb.append(h); sb.append(',');        }        System.out.println(sb.substring(0, sb.length() - 1));        // Second pass - collect and output data        xr = XMLReaderFactory.createXMLReader();        DataHandler datahandler = new DataHandler();        datahandler.setHeaderArray(columnheaders);        xr.setContentHandler(datahandler);        xr.setErrorHandler(datahandler);        r = new FileReader("test1.xml");        xr.parse(new InputSource(r));    }    public static class HeaderHandler extends DefaultHandler {        private String content;        private String currentElement;        private boolean insideElement = false;        private Attributes attribs;        private linkedHashMap<String, Integer> itemHeader;        private linkedHashMap<String, Integer> accumulativeHeader = new linkedHashMap<String, Integer>();        public HeaderHandler() { super();        }        private linkedHashMap<String, Integer> getHeaders() { return accumulativeHeader;        }        private void addItemHeader(String headerName) { if (itemHeader.containsKey(headerName)) {     itemHeader.put(headerName, itemHeader.get(headerName) + 1); } else {     itemHeader.put(headerName, 1); }        }        @Override        public void startElement(String uri, String name,     String qName, Attributes atts) { if ("item".equalsIgnoreCase(qName)) {     itemHeader = new linkedHashMap<String, Integer>(); } currentElement = qName; content = null; insideElement = true; attribs = atts;        }        @Override        public void endElement(String uri, String name, String qName) { if (!"item".equalsIgnoreCase(qName) && !"root".equalsIgnoreCase(qName)) {     if (content != null && qName.equals(currentElement) && content.trim().length() > 0) {         addItemHeader(qName);     }     if (attribs != null) {         int attsLength = attribs.getLength();         if (attsLength > 0) {  for (int i = 0; i < attsLength; i++) {      String attName = attribs.getLocalName(i);      addItemHeader(attName);  }         }     } } if ("item".equalsIgnoreCase(qName)) {     for (Entry<String, Integer> entry : itemHeader.entrySet()) {         String headerName = entry.getKey();         Integer count = entry.getValue();         //System.out.println(entry.getKey() + ":" + entry.getValue());         if (accumulativeHeader.containsKey(headerName)) {  if (count > accumulativeHeader.get(headerName)) {      accumulativeHeader.put(headerName, count);  }         } else {  accumulativeHeader.put(headerName, count);         }     } } insideElement = false; currentElement = null; attribs = null;        }        @Override        public void characters(char ch[], int start, int length) { if (insideElement) {     content = new String(ch, start, length); }        }    }    public static class DataHandler extends DefaultHandler {        private String content;        private String currentElement;        private boolean insideElement = false;        private Attributes attribs;        private linkedHashMultimap dataMap;        private String[] headerArray;        public DataHandler() { super();        }        @Override        public void startElement(String uri, String name,     String qName, Attributes atts) { if ("item".equalsIgnoreCase(qName)) {     dataMap = linkedHashMultimap.create(); } currentElement = qName; content = null; insideElement = true; attribs = atts;        }        @Override        public void endElement(String uri, String name, String qName) { if (!"item".equalsIgnoreCase(qName) && !"root".equalsIgnoreCase(qName)) {     if (content != null && qName.equals(currentElement) && content.trim().length() > 0) {         dataMap.put(qName, content);     }     if (attribs != null) {         int attsLength = attribs.getLength();         if (attsLength > 0) {  for (int i = 0; i < attsLength; i++) {      String attName = attribs.getLocalName(i);      dataMap.put(attName, attribs.getValue(i));  }         }     } } if ("item".equalsIgnoreCase(qName)) {     String data[] = new String[headerArray.length];     int i = 0;     for (String h : headerArray) {         if (dataMap.containsKey(h)) {  Object[] values = dataMap.get(h).toArray();  data[i] = (String) values[0];  if (values.length > 1) {      dataMap.removeAll(h);      for (int j = 1; j < values.length; j++) {          dataMap.put(h, values[j]);      }  } else {      dataMap.removeAll(h);  }         } else {  data[i] = "";         }         i++;     }     StringBuilder sb = new StringBuilder();     for (String d : data) {         sb.append(d);         sb.append(',');     }     System.out.println(sb.substring(0, sb.length() - 1)); } insideElement = false; currentElement = null; attribs = null;        }        @Override        public void characters(char ch[], int start, int length) { if (insideElement) {     content = new String(ch, start, length); }        }        public void setHeaderArray(String[] headerArray) { this.headerArray = headerArray;        }    }}


转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/507282.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号