/* Copyright 2002-2005 Elliotte Rusty Harold
This library is free software; you can redistribute it and/or modify
it under the terms of version 2.1 of the GNU Lesser General Public
License as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
You can contact Elliotte Rusty Harold by sending e-mail to
[email protected]. Please include the word "XOM" in the
subject line. The XOM home page is located at http://www.xom.nu/
*/
package nu.xom;
import java.io.IOException;
import java.io.Writer;
/**
* <p>
* This class is responsible for writing strings with the
* necessary escaping for their context.
* </p>
*
* @author Elliotte Rusty Harold
* @version 1.1b7
*
*/
abstract class TextWriter {
protected final Writer out;
protected final String encoding;
private String lineSeparator = "\r\n";
// true if the user has requested a specific
// line separator
boolean lineSeparatorSet = false;
private boolean inDocType = false;
private int maxLength = 0;
private int indent = 0;
private String indentString = "";
protected int column = 0;
// Is an xml:space="preserve" attribute in scope?
private boolean preserveSpace = false;
protected boolean normalize = false;
protected TextWriter(Writer out, String encoding) {
this.out = out;
this.encoding = encoding;
}
void reset() {
column = 0;
fakeIndents = 0;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
}
protected boolean lastCharacterWasSpace = false;
/**
* Indicates whether a linefeed is just half of a \r\n pair
* used for a line break.
*/
protected boolean skipFollowingLinefeed = false;
// Needed for memory between calls.
private char highSurrogate;
private boolean isHighSurrogate(int c) {
return c >= 0xD800 && c <= 0xDBFF;
}
private boolean isLowSurrogate(int c) {
return c >= 0xDC00 && c <= 0xDFFF;
}
final void writePCDATA(char c) throws IOException {
switch(c) {
case '\r':
if (!adjustingWhiteSpace() && !lineSeparatorSet) {
out.write("
");
column += 6;
justBroke=false;
}
else {
breakLine();
lastCharacterWasSpace = true;
}
skipFollowingLinefeed = true;
break;
case 14: // unreachable
case 15: // unreachable
case 16: // unreachable
case 17: // unreachable
case 18: // unreachable
case 19: // unreachable
case 20: // unreachable
case 21: // unreachable
case 22: // unreachable
case 23: // unreachable
case 24: // unreachable
case 25: // unreachable
case 26: // unreachable
case 27: // unreachable
case 28: // unreachable
case 29: // unreachable
case 30: // unreachable
case 31: // unreachable
throw new XMLException("Bad character snuck into document");
case ' ':
write(c);
break;
case '!':
write(c);
break;
case '"':
write(c);
break;
case '#':
write(c);
break;
case '$':
write(c);
break;
case '%':
write(c);
break;
case '&':
out.write("&");
column += 5;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
break;
case '\'':
write(c);
break;
case '(':
write(c);
break;
case ')':
write(c);
break;
case '*':
write(c);
break;
case '+':
write(c);
break;
case ',':
write(c);
break;
case '-':
write(c);
break;
case '.':
write(c);
break;
case '/':
write(c);
break;
case '0':
write(c);
break;
case '1':
write(c);
break;
case '2':
write(c);
break;
case '3':
write(c);
break;
case '4':
write(c);
break;
case '5':
write(c);
break;
case '6':
write(c);
break;
case '7':
write(c);
break;
case '8':
write(c);
break;
case '9':
write(c);
break;
case ':':
write(c);
break;
case ';':
write(c);
break;
case '<':
out.write("<");
column += 4;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
break;
case '=':
write(c);
break;
case '>':
out.write(">");
column += 4;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
break;
default:
if (needsEscaping(c)) writeEscapedChar(c);
else write(c);
}
}
private void writeEscapedChar(char c) throws IOException {
if (isHighSurrogate(c)) {
//store and wait for low half
highSurrogate = c;
}
else if (isLowSurrogate(c)) {
// decode and write entity reference
// I am assuming here that nothing allows the
// text to be created with a malformed surrogate
// pair such as a low surrogate that is not immediately
// preceded by a high surrogate
int uchar = UnicodeUtil.combineSurrogatePair(highSurrogate, c);
String s = "&#x" + Integer.toHexString(uchar).toUpperCase() + ';';
out.write(s);
column += s.length();
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
}
else {
String s = "&#x" + Integer.toHexString(c).toUpperCase() + ';';
out.write(s);
column += s.length();
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke=false;
}
}
private boolean adjustingWhiteSpace() {
return maxLength > 0 || indent > 0;
}
// This is the same as writePCDATA except that it
// also needs to escape " as " and tab as "	".
// I'm not escaping the single quote because Serializer
// always uses double quotes to contain
// values.
final void writeAttributeValue(char c)
throws IOException {
switch(c) {
// Handle white space that the parser might normalize
// on roundtrip. We only escape them if the serializer
// is not adjusting white space; that is indent is 0
// and maxLength is 0.
case '\t':
if (!adjustingWhiteSpace()) {
out.write("	");
column += 6;
lastCharacterWasSpace = true;
skipFollowingLinefeed = false;
justBroke=false;
}
else {
write(' ');
}
break;
case '\n':
if (skipFollowingLinefeed) {
skipFollowingLinefeed = false;
return;
}
else if (adjustingWhiteSpace()) {
out.write(" ");
lastCharacterWasSpace = true;
justBroke=false;
}
else {
if (lineSeparatorSet) {
escapeBreakLine();
}
else {
out.write("
");
column += 6;
justBroke=false;
}
lastCharacterWasSpace = true;
}
break;
case 11:
// unreachable
case 12:
// unreachable
throw new XMLException("Bad character snuck into document");
case '\r':
if (adjustingWhiteSpace()) {
out.write(" ");
lastCharacterWasSpace = true;
skipFollowingLinefeed = true;
justBroke=false;
}
else {
if (lineSeparatorSet) {
escapeBreakLine();
skipFollowingLinefeed = true;
}
else {
out.write("
");
column += 6;
justBroke=false;
}
}
break;
case 14:
// unreachable
case 15:
// unreachable
case 16:
// unreachable
case 17:
// unreachable
case 18:
// unreachable
case 19:
// unreachable
case 20:
// unreachable
case 21:
// unreachable
case 22:
// unreachable
case 23:
// unreachable
case 24:
// unreachable
case 25:
// unreachable
case 26:
// unreachable
case 27:
// unreachable
case 28:
// unreachable
case 29:
// unreachable
case 30:
// unreachable
case 31:
// unreachable
throw new XMLException("Bad character snuck into document");
case ' ':
write(c);
break;
case '!':
write(c);
break;
case '"':
out.write(""");
column += 6;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke=false;
break;
case '#':
write(c);
break;
case '$':
write(c);
break;
case '%':
write(c);
break;
case '&':
out.write("&");
column += 5;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
break;
case '\'':
write(c);
break;
case '(':
write(c);
break;
case ')':
write(c);
break;
case '*':
write(c);
break;
case '+':
write(c);
break;
case ',':
write(c);
break;
case '-':
write(c);
break;
case '.':
write(c);
break;
case '/':
write(c);
break;
case '0':
write(c);
break;
case '1':
write(c);
break;
case '2':
write(c);
break;
case '3':
write(c);
break;
case '4':
write(c);
break;
case '5':
write(c);
break;
case '6':
write(c);
break;
case '7':
write(c);
break;
case '8':
write(c);
break;
case '9':
write(c);
break;
case ':':
write(c);
break;
case ';':
write(c);
break;
case '<':
out.write("<");
column += 4;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
break;
case '=':
write(c);
break;
case '>':
out.write(">");
column += 4;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke = false;
break;
default:
if (needsEscaping(c)) writeEscapedChar(c);
else write(c);
}
}
// XXX We might be able to optimize this by using switch statements
// in the methods that call this to separate out the special cases.
// --\n, \t, space, etc.--and passing them to a different method
// thsu avoiding the if tests here. See if this method shows up as
// a HotSpot in profiling.
private void write(char c) throws IOException {
// Carriage returns are completely handled by
// writePCDATA and writeAttributeValue. They never
// enter this method.
if ((c == ' ' || c == '\n' || c == '\t')) {
if (needsBreak()) {
breakLine();
skipFollowingLinefeed = false;
}
else if (preserveSpace || (indent <= 0 && maxLength <= 0)) {
// We're neither indenting nor wrapping
// so we need to preserve white space
if (c == ' ' || c == '\t') {
out.write(c);
skipFollowingLinefeed = false;
column++;
justBroke=false;
}
else { // (c == '\n')
if (!lineSeparatorSet ||
!skipFollowingLinefeed) {
writeLineSeparator(c);
}
skipFollowingLinefeed = false;
column = 0;
}
}
else if (!lastCharacterWasSpace) {
out.write(' ');
column++;
skipFollowingLinefeed = false;
justBroke=false;
}
lastCharacterWasSpace = true;
}
else {
out.write(c);
// don't increment column for high surrogate, only low surrogate
if (c < 0xd800 || c > 0xDBFF) column++;
lastCharacterWasSpace = false;
skipFollowingLinefeed = false;
justBroke=false;
}
}
private void writeLineSeparator(char c)
throws IOException {
if (!inDocType && (!lineSeparatorSet || preserveSpace)) out.write(c);
else if (lineSeparator.equals("\r\n")) {
out.write("\r\n");
}
else if (lineSeparator.equals("\n")) {
out.write('\n');
}
else { // lineSeparator.equals("\r"))
out.write('\r');
}
// Remember, there are only three possible line separators
}
private boolean needsBreak() {
if (maxLength <= 0 || preserveSpace) return false;
// Better algorithm needed: Should look ahead in the
// stream, see if there's a white space character
// between here and the maxLength, Then again, simple is good.
// Here we just assume there's probably space somewhere
// within the next ten characters
return column >= maxLength - 10;
}
protected boolean justBroke = false;
boolean justBroke() {
return justBroke;
}
final void breakLine() throws IOException {
out.write(lineSeparator);
out.write(indentString);
column = indentString.length();
lastCharacterWasSpace = true;
justBroke = true;
}
private final void escapeBreakLine() throws IOException {
if ("\n".equals(lineSeparator)) {
out.write("
");
column += 6;
}
else if ("\r\n".equals(lineSeparator)) {
out.write("
");
column += 12;
}
else {
out.write("
");
column += 6;
}
lastCharacterWasSpace = true;
}
// Note that when this method is called directly, then
// normalization is not performed on c. Currently this is
// only called for ASCII characters like <, >, and the space,
// which should be OK
protected final void writeMarkup(char c) throws IOException {
if (needsEscaping(c)) {
throw new UnavailableCharacterException(c, encoding);
}
write(c);
}
// XXX should we have a special package protected
// method to be used only for ASCII characters we know don't need escaping or
// normalization such as <, /, A-Z, etc.?
void writePCDATA(String s) throws IOException {
s = normalize(s);
int length = s.length();
for (int i=0; i < length; i++) {
writePCDATA(s.charAt(i));
}
}
void writeAttributeValue(String s)
throws IOException {
s = normalize(s);
int length = s.length();
for (int i=0; i < length; i++) {
writeAttributeValue(s.charAt(i));
}
}
void writeMarkup(String s) throws IOException {
s = normalize(s);
int length = s.length();
for (int i=0; i < length; i++) {
writeMarkup(s.charAt(i));
}
}
protected String normalize(String s) {
if (normalize) {
return UnicodeUtil.normalize(s);
}
return s;
}
boolean isIndenting() {
return indentString.length() > 0;
}
private int fakeIndents = 0;
private final static String _128_SPACES=" ";
private final static int _128 = 128;
void incrementIndent() {
if (indent == 0) return;
String newIndent;
int length = indentString.length() + indent;
if (indentString.length() + indent < _128) {
newIndent = _128_SPACES.substring(0, length);
}
else {
StringBuffer sb = new StringBuffer(length);
sb.append(_128_SPACES);
for (int i = _128; i < length; i++) {
sb.append(' ');
}
newIndent = sb.toString();
}
// limit maximum indent to half of maximum line length
if (maxLength > 0 && newIndent.length() > maxLength / 2) {
fakeIndents++;
}
else this.indentString = newIndent;
}
void decrementIndent() {
if (indent == 0) return;
else if (fakeIndents > 0) fakeIndents--;
else {
indentString = indentString.substring(
0, indentString.length()-indent
);
}
}
String getEncoding() {
return this.encoding;
}
/**
* <p>
* Returns the String used as a line separator.
* This is always "\n", "\r", or "\r\n".
* </p>
*
* @return the line separator
*/
String getLineSeparator() {
return lineSeparator;
}
/**
* <p>
* Sets the lineSeparator. This
* can only be one of the three
* strings "\n", "\r", or "\r\n".
* All other values are forbidden.
* </p>
*
* @param lineSeparator the lineSeparator to set
*
* @throws IllegalArgumentException if you attempt to use
* any line separator other than "\n", "\r", or "\r\n".
*
*/
void setLineSeparator(String lineSeparator) {
if (lineSeparator.equals("\n")
|| lineSeparator.equals("\r")
|| lineSeparator.equals("\r\n")) {
this.lineSeparator = lineSeparator;
this.lineSeparatorSet = true;
}
else {
throw new IllegalArgumentException(
"Illegal Line Separator");
}
}
void setInDocType(boolean inDocType) {
this.inDocType = inDocType;
}
/**
* <p>
* Returns the number of spaces this serializer indents.
* </p>
*
* @return the number of spaces this serializer indents
*/
int getIndent() {
return indent;
}
/**
* <p>
* Returns the maximum line length.
* </p>
*
* @return the maximum line length.
*/
int getMaxLength() {
return maxLength;
}
/**
* <p>
* Sets the suggested maximum line length for this serializer.
* In some circumstances this may not be respected.
* </p>
*
* @param maxLength the maxLength to set
*/
void setMaxLength(int maxLength) {
if (maxLength < 0) maxLength = 0;
this.maxLength = maxLength;
}
/**
* <p>
* Sets the number of spaces to indent each successive level in the
* hierarchy. Use 0 for no extra indenting.
* </p>
*
* @param indent the indent to set
*/
void setIndent(int indent) {
this.indent = indent;
}
void flush() throws IOException {
out.flush();
}
abstract boolean needsEscaping(char c);
/**
* <p>
* Used to track the current status of xml:space.
* This is false by default, unless an xml:space="preserve"
* attribute is in-scope. When such an attribute is in-scope,
* white space is not adjusted even if indenting and/or
* a maximum line length has been requested.
* </p>
*
*
* @return true if an <code>xml:space="true"</code> attribute
* is in-scope
*/
boolean isPreserveSpace() {
return preserveSpace;
}
/**
* @param preserveSpace whether to preserve all white space
*/
void setPreserveSpace(boolean preserveSpace) {
this.preserveSpace = preserveSpace;
}
/**
* @return the current column number
*/
int getColumnNumber() {
return this.column;
}
/**
* <p>
* If true, this property indicates serialization will
* perform Unicode normalization on all data using normalization
* form C (NFC). Performing Unicode normalization
* does change the document's infoset.
* The default is false; do not normalize.
* </p>
*
* <p>
* This feature has not yet been benchmarked or optimized.
* It may result in substantially slower code.
* </p>
*
* @param normalize true if normalization is performed;
* false if it isn't.
*/
void setNFC(boolean normalize) {
this.normalize = normalize;
}
/**
* <p>
* If true, this property indicates serialization will
* perform Unicode normalization on all data using normalization
* form C (NFC). The default is false; do not normalize.
* </p>
*
* @return true if this serialization performs Unicode
* normalization; false if it doesn't.
*/
boolean getNFC() {
return this.normalize;
}
}
|