[jdom-interest] getTextNormalize bug?
    Tim Daly 
    tdaly at ans.net
       
    Mon Nov 26 20:07:18 PST 2001
    
    
  
I wrote a simple program to read an XML file and write it to stdout.
The text of each Element is printed with the call:
  Element element; 
  ...
  System.out.println(element.getTextNormalize());
During my testing I ran it on the build.xml file in jdom, thus:
  java XMLCopy build.xml >foo.xml
Then I renamed foo.xml to build.xml and did:
  ./build.sh
The Ant program died with the message:
 Character conversion error: "Unconvertible UTF-8 character beginning 
 with 0xa9" (line number may be too low).
The original line in the original build.xml file contains:
  Copyright ©
(aside: we knew the problem was the copyright :-))
which got converted by getTextNormalize() into
  Copyright (someStrangeCharacter)
Is this a bug in getTextNormalize?
My source code follows.
Tim Daly
daly at idsi.net
=====================================================================
package samples;
import org.jdom.*;
import org.jdom.input.SAXBuilder;
import org.jdom.input.DOMBuilder;
import org.jdom.output.*;
import java.util.*;
public class Count 
{
  static Stack stack = new Stack();
  static int indent = 0;
  public static void doIndent(int count)
  { if (count < 0)
      indent=indent+count;
    for(int i=0; i<indent; i++)
      System.out.print(" ");
    if (count > 0)
      indent=indent+count;
  }
  public static void main(String[] args) 
  { if (args.length == 0) 
    { System.out.println("Usage: java Count URL1 URL2..."); 
      return;
    } 
    SAXBuilder saxBuilder = new SAXBuilder();
    DOMBuilder domBuilder = new DOMBuilder();
    DOMOutputter domOutputter = new DOMOutputter();
    Document jdomDocument;
    org.w3c.dom.Element domElement;
    org.jdom.Element jdomElement;
    org.w3c.dom.Document domDocument;
    try 
    { jdomDocument = saxBuilder.build(args[0]);
      domElement = domOutputter.output(jdomDocument.getRootElement());
      jdomElement = domBuilder.build(domElement);
      count(jdomElement);
    }
    catch (JDOMException e) 
    { System.out.println(args[0] + " is not a well formed XML document.");
      System.out.println(e.getMessage());
    }     
  }  
  public static void printAttributes(List attributes)
  { Iterator iterator = attributes.iterator();
    while (iterator.hasNext()) 
    { Object o = iterator.next();
      System.out.print(" "+((Attribute)o).getName()+" = \""+
                           ((Attribute)o).getValue()+"\"");
    }
  }
  public static void count(Element element) 
  { doIndent(1);
    System.out.print("<"+element.getName());
    stack.push(element.getName());
    printAttributes(element.getAttributes());
    System.out.println(">");
    String text = element.getTextNormalize();
    if (! text.equals(""))
    { doIndent(0);
      System.out.println(" "+text);
    }
    List children = element.getContent();
    Iterator iterator = children.iterator();
    while (iterator.hasNext()) 
    { Object o = iterator.next();
      if (o instanceof Element) 
        count((Element) o);
    }
    doIndent(-1);
    System.out.println("</"+(String)stack.pop()+">");
   }  
 }
    
    
More information about the jdom-interest
mailing list