[jdom-interest] outputElementContent and lineSeparator

Klotz, Leigh Leigh.Klotz at xerox.com
Wed Mar 1 19:35:25 PST 2006


I am using outputElementContent to get serialized text content for a
wire representation of XML data, which was then fed back into JDOM
later.
I found that if I didn't do format.setLineSeparator("\n"), newlines in
element content text nodes were doubled, even though I used
Format.getRawFormat.

The JDOM 1.0 documentation for setLineSeparator says
	Note that if the "newlines" property is false, this value is
irrelevant. 

I think this refers to a pre-1.0 "newlines property" that appears to
have been an argument to the XMLOutputter constructor.

The JDOM 1.0 documentation for setLineSeparator also says
	Note that this only applies to newlines generated by the
outputter.
	If you parse an XML document that contains newlines embedded
inside a text node, 
	and you do not set TextMode.NORMALIZE, then the newlines will be
output verbatim, 
	as "\n" which is how parsers normalize them. 

It seems to me that I am getting additional characters inside text nodes
unless I do setLineSeparator("\n")

Below is a test case that passes an Element through
outputElementContent, SAXBuilder, and element.addContent.

It shows that unless you do format.setLineSeparator("\n"), the end of
line characters get doubled, and turned into 
 and start growing.
Call with command-line arg "bug" or "workaround".

Leigh L. Klotz, Jr.
Xerox Corporation
----------------------
Results of running the test case with JDOM 1.0:
$ java -classpath .\;c:/ds/src/DocuShare3/dist/lib/jdom.jar jdomws bug
<?xml version="1.0" encoding="UTF-8"?>
<a>x
y
z
</a>

Line Separator length is 2
a='x
y
z
' whose length is 9
<?xml version="1.0" encoding="UTF-8"?>
<a>x&#xD;
y&#xD;
z&#xD;
</a>

$ java -classpath .\;jdom.jar jdomws workaround
<?xml version="1.0" encoding="UTF-8"?>
<a>x
y
z
</a>

Line Separator length is 1
a='x
y
z
' whose length is 6
<?xml version="1.0" encoding="UTF-8"?>
<a>x
y
z
</a>


----------------------

import java.io.IOException;
import java.io.StringReader;
import java.io.Reader;
import java.io.StringWriter;
import java.util.List;
import java.util.Iterator;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;

public class jdomws {
    
    boolean doWorkaround = false;

    public static void main(String args[]) throws Exception {
        if (args.length == 0) {
            System.err.println("usage: workaround|bug");
        } else {
            boolean doWorkaround = args[0].equals("workaround");
            new jdomws(doWorkaround).doit();
        }
    }

    public jdomws(boolean doWorkaround) {
        this.doWorkaround=doWorkaround;
    }

    void doit() throws Exception {
        Document d = stringToDocument("<?xml
version='1.0'?>\n<a>x\ny\nz\n</a>");
        System.out.println(documentToString(d));
        updateDoc(d);
        System.out.println(documentToString(d));
    }

    void updateDoc(Document document) {
        Element element = document.getRootElement();
        String name = element.getName();
        String value = elementContentToString(element);
        System.out.println(name+"='"+value+"' whose length is " +
value.length());
        element.removeContent();
        element.addContent(value);
    }

    String elementContentToString(Element element) {
        StringWriter strWriter = null;
        try {
            strWriter = new StringWriter();
            XMLOutputter outputter = null;
            Format format = null;
            format = Format.getRawFormat();
            format.setOmitDeclaration(true);
            if (doWorkaround)
                format.setLineSeparator("\n");
            outputter = new XMLOutputter(format);
            outputter.outputElementContent(element, strWriter);
            System.out.println("Line Separator length is
"+format.getLineSeparator().length());
            strWriter.close();
            return strWriter.toString();
        } catch (IOException ioe) {
            ioe.printStackTrace();
            throw new RuntimeException("XMLWebRequest: a StringWriter
threw an exception!");
        }
    }

    Document stringToDocument(String string) throws Exception {
        SAXBuilder builder = new SAXBuilder();
        Document document = builder.build(new StringReader(string));
        return document;
    }


    String documentToString(Document d) {
        Format format = Format.getRawFormat();
        XMLOutputter outputter = new XMLOutputter(format);
        return outputter.outputString(d);
    }

}



More information about the jdom-interest mailing list