使用JAVA将CSV转换为XML

2023年1月6日 161次阅读

我有一组要转换为
XML的CSV数据.代码看起来不错,但输出不够完美.它省略了一些列,因为它们没有值,并且产生了很长的XML数据而不是破坏它.

这是我的CSV数据示例：

Name  Age Sex
chi   23   
kay   19  male
John      male

我的代码：

public class XMLCreators {
  // Protected Properties
  protected DocumentBuilderFactory domFactory = null;
  protected DocumentBuilder domBuilder = null;

  public XMLCreators() {
    try {
      domFactory = DocumentBuilderFactory.newInstance();
      domBuilder = domFactory.newDocumentBuilder();
    } catch (FactoryConfigurationError exp) {
      System.err.println(exp.toString());
    } catch (ParserConfigurationException exp) {
      System.err.println(exp.toString());
    } catch (Exception exp) {
      System.err.println(exp.toString());
    }

  }

  public int convertFile(String csvFileName, String xmlFileName,
      String delimiter) {

    int rowsCount = -1;
    try {
      Document newDoc = domBuilder.newDocument();
      // Root element
      Element rootElement = newDoc.createElement("XMLCreators");
      newDoc.appendChild(rootElement);
      // Read csv file
      BufferedReader csvReader;
      csvReader = new BufferedReader(new FileReader(csvFileName));
      int fieldCount = 0;
      String[] csvFields = null;
      StringTokenizer stringTokenizer = null;

      // Assumes the first line in CSV file is column/field names
      // The column names are used to name the elements in the XML file,
      // avoid the use of Space or other characters not suitable for XML element
      // naming

      String curLine = csvReader.readLine();
      if (curLine != null) {
        // how about other form of csv files?
        stringTokenizer = new StringTokenizer(curLine, delimiter);
        fieldCount = stringTokenizer.countTokens();
        if (fieldCount > 0) {
          csvFields = new String[fieldCount];
          int i = 0;
          while (stringTokenizer.hasMoreElements())
            csvFields[i++] = String.valueOf(stringTokenizer.nextElement());
        }
      }

      // At this point the coulmns are known, now read data by lines
      while ((curLine = csvReader.readLine()) != null) {
        stringTokenizer = new StringTokenizer(curLine, delimiter);
        fieldCount = stringTokenizer.countTokens();
        if (fieldCount > 0) {
          Element rowElement = newDoc.createElement("row");
          int i = 0;
          while (stringTokenizer.hasMoreElements()) {
            try {
              String curValue = String.valueOf(stringTokenizer.nextElement());
              Element curElement = newDoc.createElement(csvFields[i++]);
              curElement.appendChild(newDoc.createTextNode(curValue));
              rowElement.appendChild(curElement);
            } catch (Exception exp) {
            }
          }
          rootElement.appendChild(rowElement);
          rowsCount++;
        }
      }
      csvReader.close();

      // Save the document to the disk file
      TransformerFactory tranFactory = TransformerFactory.newInstance();
      Transformer aTransformer = tranFactory.newTransformer();
      Source src = new DOMSource(newDoc);
      Result result = new StreamResult(new File(xmlFileName));
      aTransformer.transform(src, result);
      rowsCount++;

      // Output to console for testing
      // Resultt result = new StreamResult(System.out);

    } catch (IOException exp) {
      System.err.println(exp.toString());
    } catch (Exception exp) {
      System.err.println(exp.toString());
    }
    return rowsCount;
    // "XLM Document has been created" + rowsCount;
  }
}

当对上述数据执行此代码时,它会产生：

<?xml version="1.0" encoding="UTF-8"?>
<XMLCreators>
<row>
<Name>chi</Name>
<Age>23</Age>
</row>
<row>
<Name>kay</Name>
<Age>19</Age>
<sex>male</sex>
</row>
<row>
<Name>john</Name>
<Age>male</Age>
</row>
</XMLCreators>

我自己以这种形式安排了它,但输出产生了很长的路线.要生成的输出应该是：

<?xml version="1.0" encoding="UTF-8"?>
<XMLCreators>
<row>
<Name>chi</Name>
<Age>23</Age>
<sex></sex>
</row>
<row>
<Name>kay</Name>
<Age>19</Age>
<sex>male</sex>
</row>
<row>
<Name>john</Name>
<Age></Age>
<sex>male</sex>
 </row>
 </XMLCreators>

最佳答案我同意肯尼特的观点.

我只是补充道

aTransformer .setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer .setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer .setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

这在元素之间添加了一条新行并允许缩进.

更新

让我们从这样一个事实开始,即您呈现的文件不是CSV(逗号分隔值)文件,我会让您担心这个问题……

List<String> headers = new ArrayList<String>(5);

File file = new File("Names2.csv");
BufferedReader reader = null;

try {

    DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder domBuilder = domFactory.newDocumentBuilder();

    Document newDoc = domBuilder.newDocument();
    // Root element
    Element rootElement = newDoc.createElement("XMLCreators");
    newDoc.appendChild(rootElement);

    reader = new BufferedReader(new FileReader(file));
    int line = 0;

    String text = null;
    while ((text = reader.readLine()) != null) {

        StringTokenizer st = new StringTokenizer(text, " ", false);    
        String[] rowValues = new String[st.countTokens()];
        int index = 0;
        while (st.hasMoreTokens()) {

            String next = st.nextToken();
            rowValues[index++] = next;

        }

        //String[] rowValues = text.split(",");

        if (line == 0) { // Header row
            for (String col : rowValues) {
                headers.add(col);
            }
        } else { // Data row
            Element rowElement = newDoc.createElement("row");
            rootElement.appendChild(rowElement);
            for (int col = 0; col < headers.size(); col++) {
                String header = headers.get(col);
                String value = null;

                if (col < rowValues.length) {
                    value = rowValues[col];
                } else {
                    // ?? Default value
                    value = "";
                }

                Element curElement = newDoc.createElement(header);
                curElement.appendChild(newDoc.createTextNode(value));
                rowElement.appendChild(curElement);
            }
        }
        line++;
    }

    ByteArrayOutputStream baos = null;
    OutputStreamWriter osw = null;

    try {
        baos = new ByteArrayOutputStream();
        osw = new OutputStreamWriter(baos);

        TransformerFactory tranFactory = TransformerFactory.newInstance();
        Transformer aTransformer = tranFactory.newTransformer();
        aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
        aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
        aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

        Source src = new DOMSource(newDoc);
        Result result = new StreamResult(osw);
        aTransformer.transform(src, result);

        osw.flush();
        System.out.println(new String(baos.toByteArray()));
    } catch (Exception exp) {
        exp.printStackTrace();
    } finally {
        try {
            osw.close();
        } catch (Exception e) {
        }
        try {
            baos.close();
        } catch (Exception e) {
        }
    }
} catch (Exception e) {
    e.printStackTrace();
}

现在我在这里使用了List而不是Map.您需要决定如何最好地解决缺失值问题.事先不知道文件的结构,这不是一个简单的解决方案.

无论如何,我最终得到了

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<XMLCreators>
    <row>
        <Name>chi</Name>
        <Age>23</Age>
        <Sex/>
    </row>
    <row>
        <Name>kay</Name>
        <Age>19</Age>
        <Sex>male</Sex>
    </row>
    <row>
        <Name>John</Name>
        <Age>male</Age>
        <Sex/>
    </row>
</XMLCreators>

更新合并

public class XMLCreators {
    // Protected Properties

    protected DocumentBuilderFactory domFactory = null;
    protected DocumentBuilder domBuilder = null;

    public XMLCreators() {
        try {
            domFactory = DocumentBuilderFactory.newInstance();
            domBuilder = domFactory.newDocumentBuilder();
        } catch (FactoryConfigurationError exp) {
            System.err.println(exp.toString());
        } catch (ParserConfigurationException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }

    }

    public int convertFile(String csvFileName, String xmlFileName,
                    String delimiter) {

        int rowsCount = -1;
        try {
            Document newDoc = domBuilder.newDocument();
            // Root element
            Element rootElement = newDoc.createElement("XMLCreators");
            newDoc.appendChild(rootElement);
            // Read csv file
            BufferedReader csvReader;
            csvReader = new BufferedReader(new FileReader(csvFileName));

//                int fieldCount = 0;
//                String[] csvFields = null;
//                StringTokenizer stringTokenizer = null;
//
//                // Assumes the first line in CSV file is column/field names
//                // The column names are used to name the elements in the XML file,
//                // avoid the use of Space or other characters not suitable for XML element
//                // naming
//
//                String curLine = csvReader.readLine();
//                if (curLine != null) {
//                    // how about other form of csv files?
//                    stringTokenizer = new StringTokenizer(curLine, delimiter);
//                    fieldCount = stringTokenizer.countTokens();
//                    if (fieldCount > 0) {
//                        csvFields = new String[fieldCount];
//                        int i = 0;
//                        while (stringTokenizer.hasMoreElements()) {
//                            csvFields[i++] = String.valueOf(stringTokenizer.nextElement());
//                        }
//                    }
//                }
//
//                // At this point the coulmns are known, now read data by lines
//                while ((curLine = csvReader.readLine()) != null) {
//                    stringTokenizer = new StringTokenizer(curLine, delimiter);
//                    fieldCount = stringTokenizer.countTokens();
//                    if (fieldCount > 0) {
//                        Element rowElement = newDoc.createElement("row");
//                        int i = 0;
//                        while (stringTokenizer.hasMoreElements()) {
//                            try {
//                                String curValue = String.valueOf(stringTokenizer.nextElement());
//                                Element curElement = newDoc.createElement(csvFields[i++]);
//                                curElement.appendChild(newDoc.createTextNode(curValue));
//                                rowElement.appendChild(curElement);
//                            } catch (Exception exp) {
//                            }
//                        }
//                        rootElement.appendChild(rowElement);
//                        rowsCount++;
//                    }
//                }
//                csvReader.close();
//
//                // Save the document to the disk file
//                TransformerFactory tranFactory = TransformerFactory.newInstance();
//                Transformer aTransformer = tranFactory.newTransformer();
//                Source src = new DOMSource(newDoc);
//                Result result = new StreamResult(new File(xmlFileName));
//                aTransformer.transform(src, result);
//                rowsCount++;
            int line = 0;
            List<String> headers = new ArrayList<String>(5);

            String text = null;
            while ((text = csvReader.readLine()) != null) {

                StringTokenizer st = new StringTokenizer(text, delimiter, false);
                String[] rowValues = new String[st.countTokens()];
                int index = 0;
                while (st.hasMoreTokens()) {

                    String next = st.nextToken();
                    rowValues[index++] = next;

                }

                if (line == 0) { // Header row

                    for (String col : rowValues) {
                        headers.add(col);
                    }

                } else { // Data row

                    rowsCount++;

                    Element rowElement = newDoc.createElement("row");
                    rootElement.appendChild(rowElement);
                    for (int col = 0; col < headers.size(); col++) {

                        String header = headers.get(col);
                        String value = null;

                        if (col < rowValues.length) {

                            value = rowValues[col];

                        } else {
                            // ?? Default value
                            value = "";
                        }

                        Element curElement = newDoc.createElement(header);
                        curElement.appendChild(newDoc.createTextNode(value));
                        rowElement.appendChild(curElement);

                    }

                }
                line++;

            }

            ByteArrayOutputStream baos = null;
            OutputStreamWriter osw = null;

            try {

                baos = new ByteArrayOutputStream();
                osw = new OutputStreamWriter(baos);

                TransformerFactory tranFactory = TransformerFactory.newInstance();
                Transformer aTransformer = tranFactory.newTransformer();
                aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
                aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
                aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

                Source src = new DOMSource(newDoc);
                Result result = new StreamResult(osw);
                aTransformer.transform(src, result);

                osw.flush();
                System.out.println(new String(baos.toByteArray()));

            } catch (Exception exp) {
                exp.printStackTrace();
            } finally {
                try {
                    osw.close();
                } catch (Exception e) {
                }
                try {
                    baos.close();
                } catch (Exception e) {
                }
            }

            // Output to console for testing
            // Resultt result = new StreamResult(System.out);

        } catch (IOException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }
        return rowsCount;
        // "XLM Document has been created" + rowsCount;
    }
}

使用OpenCSV更新

public class XMLCreators {
    // Protected Properties

    protected DocumentBuilderFactory domFactory = null;
    protected DocumentBuilder domBuilder = null;

    public XMLCreators() {
        try {
            domFactory = DocumentBuilderFactory.newInstance();
            domBuilder = domFactory.newDocumentBuilder();
        } catch (FactoryConfigurationError exp) {
            System.err.println(exp.toString());
        } catch (ParserConfigurationException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }

    }

    public int convertFile(String csvFileName, String xmlFileName,
                    String delimiter) {

        int rowsCount = -1;
        BufferedReader csvReader;
        try {
            Document newDoc = domBuilder.newDocument();
            // Root element
            Element rootElement = newDoc.createElement("XMLCreators");
            newDoc.appendChild(rootElement);
            // Read csv file
            csvReader = new BufferedReader(new FileReader(csvFileName));

            //** Now using the OpenCSV **//
            CSVReader reader = new CSVReader(new FileReader("names.csv"), delimiter.charAt(0));
            //CSVReader reader = new CSVReader(csvReader);
            String[] nextLine;
            int line = 0;
            List<String> headers = new ArrayList<String>(5);
            while ((nextLine = reader.readNext()) != null) {

                if (line == 0) { // Header row
                    for (String col : nextLine) {
                        headers.add(col);
                    }
                } else { // Data row
                    Element rowElement = newDoc.createElement("row");
                    rootElement.appendChild(rowElement);

                    int col = 0;
                    for (String value : nextLine) {
                        String header = headers.get(col);

                        Element curElement = newDoc.createElement(header);
                        curElement.appendChild(newDoc.createTextNode(value.trim()));
                        rowElement.appendChild(curElement);

                        col++;
                    }
                }
                line++;
            }
            //** End of CSV parsing**//

            FileWriter writer = null;

            try {

                writer = new FileWriter(new File(xmlFileName));

                TransformerFactory tranFactory = TransformerFactory.newInstance();
                Transformer aTransformer = tranFactory.newTransformer();
                aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
                aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
                aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

                Source src = new DOMSource(newDoc);
                Result result = new StreamResult(writer);
                aTransformer.transform(src, result);

                writer.flush();

            } catch (Exception exp) {
                exp.printStackTrace();
            } finally {
                try {
                    writer.close();
                } catch (Exception e) {
                }
            }

            // Output to console for testing
            // Resultt result = new StreamResult(System.out);

        } catch (IOException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }
        return rowsCount;
        // "XLM Document has been created" + rowsCount;
    }
}