BE THE CODER - javax/mail/internet/MimeUtility.java


/*

 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.

 *

 * Copyright 1997-2009 Sun Microsystems, Inc. All rights reserved.

 *

 * The contents of this file are subject to the terms of either the GNU

 * General Public License Version 2 only ("GPL") or the Common Development

 * and Distribution License("CDDL") (collectively, the "License").  You

 * may not use this file except in compliance with the License. You can obtain

 * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html

 * or glassfish/bootstrap/legal/LICENSE.txt.  See the License for the specific

 * language governing permissions and limitations under the License.

 *

 * When distributing the software, include this License Header Notice in each

 * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.

 * Sun designates this particular file as subject to the "Classpath" exception

 * as provided by Sun in the GPL Version 2 section of the License file that

 * accompanied this code.  If applicable, add the following below the License

 * Header, with the fields enclosed by brackets [] replaced by your own

 * identifying information: "Portions Copyrighted [year]

 * [name of copyright owner]"

 *

 * Contributor(s):

 *

 * If you wish your version of this file to be governed by only the CDDL or

 * only the GPL Version 2, indicate your decision by adding "[Contributor]

 * elects to include this software in this distribution under the [CDDL or GPL

 * Version 2] license."  If you don't indicate a single choice of license, a

 * recipient has the option to distribute your version of this file under

 * either the CDDL, the GPL Version 2 or to extend the choice of license to

 * its licensees as provided above.  However, if you add GPL Version 2 code

 * and therefore, elected the GPL Version 2 license, then the option applies

 * only if the new code is made subject to such option by the copyright

 * holder.

 */



package javax.mail.internet;



import javax.mail.MessagingException;

import javax.activation.*;

import java.util.*;

import java.io.*;

import com.sun.mail.util.*;



/**

 * This is a utility class that provides various MIME related

 * functionality. <p>

 *

 * There are a set of methods to encode and decode MIME headers as 

 * per RFC 2047.  Note that, in general, these methods are

 * <strong>not</strong> needed when using methods such as

 * <code>setSubject</code> and <code>setRecipients</code>; JavaMail

 * will automatically encode and decode data when using these "higher

 * level" methods.  The methods below are only needed when maniuplating

 * raw MIME headers using <code>setHeader</code> and <code>getHeader</code>

 * methods.  A brief description on handling such headers is given below: <p>

 *

 * RFC 822 mail headers <strong>must</strong> contain only US-ASCII

 * characters. Headers that contain non US-ASCII characters must be

 * encoded so that they contain only US-ASCII characters. Basically,

 * this process involves using either BASE64 or QP to encode certain

 * characters. RFC 2047 describes this in detail. <p>

 *

 * In Java, Strings contain (16 bit) Unicode characters. ASCII is a

 * subset of Unicode (and occupies the range 0 - 127). A String

 * that contains only ASCII characters is already mail-safe. If the

 * String contains non US-ASCII characters, it must be encoded. An

 * additional complexity in this step is that since Unicode is not

 * yet a widely used charset, one might want to first charset-encode

 * the String into another charset and then do the transfer-encoding.

 * <p>

 * Note that to get the actual bytes of a mail-safe String (say,

 * for sending over SMTP), one must do 

 * <p><blockquote><pre>

 *

 *  byte[] bytes = string.getBytes("iso-8859-1");  

 *

 * </pre></blockquote><p>

 * 

 * The <code>setHeader</code> and <code>addHeader</code> methods

 * on MimeMessage and MimeBodyPart assume that the given header values

 * are Unicode strings that contain only US-ASCII characters. Hence

 * the callers of those methods must insure that the values they pass

 * do not contain non US-ASCII characters. The methods in this class 

 * help do this. <p>

 *

 * The <code>getHeader</code> family of methods on MimeMessage and

 * MimeBodyPart return the raw header value. These might be encoded

 * as per RFC 2047, and if so, must be decoded into Unicode Strings.

 * The methods in this class help to do this. <p>

 *

 * Several System properties control strict conformance to the MIME

 * spec.  Note that these are not session properties but must be set

 * globally as System properties. <p>

 *

 * The <code>mail.mime.decodetext.strict</code> property controls

 * decoding of MIME encoded words.  The MIME spec requires that encoded

 * words start at the beginning of a whitespace separated word.  Some

 * mailers incorrectly include encoded words in the middle of a word.

 * If the <code>mail.mime.decodetext.strict</code> System property is

 * set to <code>"false"</code>, an attempt will be made to decode these

 * illegal encoded words. The default is true. <p>

 *

 * The <code>mail.mime.encodeeol.strict</code> property controls the

 * choice of Content-Transfer-Encoding for MIME parts that are not of

 * type "text".  Often such parts will contain textual data for which

 * an encoding that allows normal end of line conventions is appropriate.

 * In rare cases, such a part will appear to contain entirely textual

 * data, but will require an encoding that preserves CR and LF characters

 * without change.  If the <code>mail.mime.encodeeol.strict</code>

 * System property is set to <code>"true"</code>, such an encoding will

 * be used when necessary.  The default is false. <p>

 *

 * In addition, the <code>mail.mime.charset</code> System property can

 * be used to specify the default MIME charset to use for encoded words

 * and text parts that don't otherwise specify a charset.  Normally, the

 * default MIME charset is derived from the default Java charset, as

 * specified in the <code>file.encoding</code> System property.  Most

 * applications will have no need to explicitly set the default MIME

 * charset.  In cases where the default MIME charset to be used for

 * mail messages is different than the charset used for files stored on

 * the system, this property should be set. <p>

 *

 * The current implementation also supports the following System property.

 * <p>

 * The <code>mail.mime.ignoreunknownencoding</code> property controls

 * whether unknown values in the <code>Content-Transfer-Encoding</code>

 * header, as passed to the <code>decode</code> method, cause an exception.

 * If set to <code>"true"</code>, unknown values are ignored and 8bit

 * encoding is assumed.  Otherwise, unknown values cause a MessagingException

 * to be thrown.

 *

 * @author  John Mani

 * @author  Bill Shannon

 */



public class MimeUtility {



    // This class cannot be instantiated

    private MimeUtility() { }



    public static final int ALL = -1;



    private static boolean decodeStrict =

  PropUtil.getBooleanSystemProperty("mail.mime.decodetext.strict", true);

    private static boolean encodeEolStrict =

  PropUtil.getBooleanSystemProperty("mail.mime.encodeeol.strict", false);

    private static boolean ignoreUnknownEncoding =

  PropUtil.getBooleanSystemProperty(

      "mail.mime.ignoreunknownencoding", false);

    /*

     * The following two properties allow disabling the fold()

     * and unfold() methods and reverting to the previous behavior.

     * They should never need to be changed and are here only because

     * of my paranoid concern with compatibility.

     */

    private static boolean foldEncodedWords =

  PropUtil.getBooleanSystemProperty("mail.mime.foldencodedwords", false);

    private static boolean foldText =

  PropUtil.getBooleanSystemProperty("mail.mime.foldtext", true);





    /**

     * Get the content-transfer-encoding that should be applied

     * to the input stream of this datasource, to make it mailsafe. <p>

     *

     * The algorithm used here is: <br>

     * <ul>

     * <li>

     * If the primary type of this datasource is "text" and if all

     * the bytes in its input stream are US-ASCII, then the encoding

     * is "7bit". If more than half of the bytes are non-US-ASCII, then

     * the encoding is "base64". If less than half of the bytes are

     * non-US-ASCII, then the encoding is "quoted-printable".

     * <li>

     * If the primary type of this datasource is not "text", then if

     * all the bytes of its input stream are US-ASCII, the encoding

     * is "7bit". If there is even one non-US-ASCII character, the

     * encoding is "base64".

     * </ul>

     *

     * @param  ds  DataSource

     * @return    the encoding. This is either "7bit",

     *      "quoted-printable" or "base64"

     */ 

    public static String getEncoding(DataSource ds) {

  ContentType cType = null;

  InputStream is = null;

  String encoding = null;



  try {

      cType = new ContentType(ds.getContentType());

      is = ds.getInputStream();

  } catch (Exception ex) {

      return "base64"; // what else ?!

  }



  boolean isText = cType.match("text/*");

  // if not text, stop processing when we see non-ASCII

  int i = checkAscii(is, ALL, !isText);

  switch (i) {

  case ALL_ASCII:

      encoding = "7bit"; // all ascii

      break;

  case MOSTLY_ASCII:

      encoding = "quoted-printable"; // mostly ascii

      break;

  default:

      encoding = "base64"; // mostly binary

      break;

  }



  // Close the input stream

  try {

      is.close();

  } catch (IOException ioex) { }



  return encoding;

    }



    /**

     * Same as <code>getEncoding(DataSource)</code> except that instead

     * of reading the data from an <code>InputStream</code> it uses the

     * <code>writeTo</code> method to examine the data.  This is more

     * efficient in the common case of a <code>DataHandler</code>

     * created with an object and a MIME type (for example, a

     * "text/plain" String) because all the I/O is done in this

     * thread.  In the case requiring an <code>InputStream</code> the

     * <code>DataHandler</code> uses a thread, a pair of pipe streams,

     * and the <code>writeTo</code> method to produce the data. <p>

     *

     * @since  JavaMail 1.2

     */

    public static String getEncoding(DataHandler dh) {

  ContentType cType = null;

  String encoding = null;



  /*

   * Try to pick the most efficient means of determining the

   * encoding.  If this DataHandler was created using a DataSource,

   * the getEncoding(DataSource) method is typically faster.  If

   * the DataHandler was created with an object, this method is

   * much faster.  To distinguish the two cases, we use a heuristic.

   * A DataHandler created with an object will always have a null name.

   * A DataHandler created with a DataSource will usually have a

   * non-null name.

   *

   * XXX - This is actually quite a disgusting hack, but it makes

   *   a common case run over twice as fast.

   */

  if (dh.getName() != null)

      return getEncoding(dh.getDataSource());



  try {

      cType = new ContentType(dh.getContentType());

  } catch (Exception ex) {

      return "base64"; // what else ?!

  }



  if (cType.match("text/*")) {

      // Check all of the available bytes

      AsciiOutputStream aos = new AsciiOutputStream(false, false);

      try {

    dh.writeTo(aos);

      } catch (IOException ex) {

        // ignore it, can't happen

      }

      switch (aos.getAscii()) {

      case ALL_ASCII:

    encoding = "7bit"; // all ascii

    break;

      case MOSTLY_ASCII:

    encoding = "quoted-printable"; // mostly ascii

    break;

      default:

    encoding = "base64"; // mostly binary

    break;

      }

  } else { // not "text"

      // Check all of available bytes, break out if we find

      // at least one non-US-ASCII character

      AsciiOutputStream aos =

      new AsciiOutputStream(true, encodeEolStrict);

      try {

    dh.writeTo(aos);

      } catch (IOException ex) { }  // ignore it

      if (aos.getAscii() == ALL_ASCII) // all ascii

    encoding = "7bit";

      else // found atleast one non-ascii character, use b64 

    encoding = "base64";

  }



  return encoding;

    }



    /**

     * Decode the given input stream. The Input stream returned is

     * the decoded input stream. All the encodings defined in RFC 2045

     * are supported here. They include "base64", "quoted-printable",

     * "7bit", "8bit", and "binary". In addition, "uuencode" is also

     * supported. <p>

     *

     * In the current implementation, if the

     * <code>mail.mime.ignoreunknownencoding</code> system property is set to

     * <code>"true"</code>, unknown encoding values are ignored and the

     * original InputStream is returned.

     *

     * @param  is    input stream

     * @param  encoding  the encoding of the stream.

     * @return      decoded input stream.

     * @exception MessagingException  if the encoding is unknown

     */

    public static InputStream decode(InputStream is, String encoding)

    throws MessagingException {

  if (encoding.equalsIgnoreCase("base64"))

      return new BASE64DecoderStream(is);

  else if (encoding.equalsIgnoreCase("quoted-printable"))

      return new QPDecoderStream(is);

  else if (encoding.equalsIgnoreCase("uuencode") ||

     encoding.equalsIgnoreCase("x-uuencode") ||

     encoding.equalsIgnoreCase("x-uue"))

      return new UUDecoderStream(is);

  else if (encoding.equalsIgnoreCase("binary") ||

     encoding.equalsIgnoreCase("7bit") ||

     encoding.equalsIgnoreCase("8bit"))

      return is;

  else {

      if (!ignoreUnknownEncoding)

    throw new MessagingException("Unknown encoding: " + encoding);

      return is;

  }

    }



    /**

     * Wrap an encoder around the given output stream. 

     * All the encodings defined in RFC 2045 are supported here. 

     * They include "base64", "quoted-printable", "7bit", "8bit" and

     * "binary". In addition, "uuencode" is also supported.

     *

     * @param  os    output stream

     * @param  encoding  the encoding of the stream. 

     * @return      output stream that applies the

     *        specified encoding.

     * @exception MessagingException  if the encoding is unknown

     */

    public static OutputStream encode(OutputStream os, String encoding)

    throws MessagingException {

        if (encoding == null)

      return os;

  else if (encoding.equalsIgnoreCase("base64"))

      return new BASE64EncoderStream(os);

  else if (encoding.equalsIgnoreCase("quoted-printable"))

      return new QPEncoderStream(os);

  else if (encoding.equalsIgnoreCase("uuencode") ||

     encoding.equalsIgnoreCase("x-uuencode") ||

     encoding.equalsIgnoreCase("x-uue"))

      return new UUEncoderStream(os);

  else if (encoding.equalsIgnoreCase("binary") ||

     encoding.equalsIgnoreCase("7bit") ||

     encoding.equalsIgnoreCase("8bit"))

      return os;

  else

      throw new MessagingException("Unknown encoding: " +encoding);

    }



    /**

     * Wrap an encoder around the given output stream.

     * All the encodings defined in RFC 2045 are supported here.

     * They include "base64", "quoted-printable", "7bit", "8bit" and

     * "binary". In addition, "uuencode" is also supported.

     * The <code>filename</code> parameter is used with the "uuencode"

     * encoding and is included in the encoded output.

     *

     * @param   os              output stream

     * @param   encoding        the encoding of the stream.

     * @param   filename        name for the file being encoded (only used

     *                          with uuencode)

     * @return                  output stream that applies the

     *                          specified encoding.

     * @since                   JavaMail 1.2

     */

    public static OutputStream encode(OutputStream os, String encoding,

                                      String filename)

                throws MessagingException {

        if (encoding == null)

            return os;

        else if (encoding.equalsIgnoreCase("base64"))

            return new BASE64EncoderStream(os);

        else if (encoding.equalsIgnoreCase("quoted-printable"))

            return new QPEncoderStream(os);

        else if (encoding.equalsIgnoreCase("uuencode") ||

                 encoding.equalsIgnoreCase("x-uuencode") ||

                 encoding.equalsIgnoreCase("x-uue"))

            return new UUEncoderStream(os, filename);

        else if (encoding.equalsIgnoreCase("binary") ||

                 encoding.equalsIgnoreCase("7bit") ||

                 encoding.equalsIgnoreCase("8bit"))

            return os;

        else

            throw new MessagingException("Unknown encoding: " +encoding);

    }



    /**

     * Encode a RFC 822 "text" token into mail-safe form as per

     * RFC 2047. <p>

     *

     * The given Unicode string is examined for non US-ASCII

     * characters. If the string contains only US-ASCII characters,

     * it is returned as-is.  If the string contains non US-ASCII

     * characters, it is first character-encoded using the platform's

     * default charset, then transfer-encoded using either the B or 

     * Q encoding. The resulting bytes are then returned as a Unicode 

     * string containing only ASCII  characters. <p>

     *

     * Note that this method should be used to encode only 

     * "unstructured" RFC 822 headers. <p>

     *

     * Example of usage:

     * <p><blockquote><pre>

     *

     *  MimePart part = ...

     *  String rawvalue = "FooBar Mailer, Japanese version 1.1"

     *  try {

     *    // If we know for sure that rawvalue contains only US-ASCII 

     *    // characters, we can skip the encoding part

     *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));

     *  } catch (UnsupportedEncodingException e) {

     *    // encoding failure

     *  } catch (MessagingException me) {

     *   // setHeader() failure

     *  }

     *

     * </pre></blockquote><p>

     * 

     * @param  text  Unicode string

     * @return  Unicode string containing only US-ASCII characters

     * @exception UnsupportedEncodingException if the encoding fails

     */

    public static String encodeText(String text)

      throws UnsupportedEncodingException {

  return encodeText(text, null, null);

    }



    /**

     * Encode a RFC 822 "text" token into mail-safe form as per

     * RFC 2047. <p>

     *

     * The given Unicode string is examined for non US-ASCII

     * characters. If the string contains only US-ASCII characters,

     * it is returned as-is.  If the string contains non US-ASCII

     * characters, it is first character-encoded using the specified

     * charset, then transfer-encoded using either the B or Q encoding.

     * The resulting bytes are then returned as a Unicode string 

     * containing only ASCII characters. <p>

     *

     * Note that this method should be used to encode only 

     * "unstructured" RFC 822 headers. 

     * 

     * @param  text  the header value

     * @param  charset  the charset. If this parameter is null, the

     *    platform's default chatset is used.

     * @param  encoding the encoding to be used. Currently supported

     *    values are "B" and "Q". If this parameter is null, then

     *    the "Q" encoding is used if most of characters to be

     *    encoded are in the ASCII charset, otherwise "B" encoding

     *    is used.

     * @return  Unicode string containing only US-ASCII characters

     */

    public static String encodeText(String text, String charset,

            String encoding)

      throws UnsupportedEncodingException {

  return encodeWord(text, charset, encoding, false);

    }



    /**

     * Decode "unstructured" headers, that is, headers that are defined

     * as '*text' as per RFC 822. <p>

     *

     * The string is decoded using the algorithm specified in

     * RFC 2047, Section 6.1. If the charset-conversion fails

     * for any sequence, an UnsupportedEncodingException is thrown.

     * If the String is not an RFC 2047 style encoded header, it is

     * returned as-is <p>

     *

     * Example of usage:

     * <p><blockquote><pre>

     *

     *  MimePart part = ...

     *  String rawvalue = null;

     *  String  value = null;

     *  try {

     *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)

     *      value = MimeUtility.decodeText(rawvalue);

     *  } catch (UnsupportedEncodingException e) {

     *      // Don't care

     *      value = rawvalue;

     *  } catch (MessagingException me) { }

     *

     *  return value;

     *

     * </pre></blockquote><p>

     *

     * @param  etext  the possibly encoded value

     * @exception       UnsupportedEncodingException if the charset

     *      conversion failed.

     */

    public static String decodeText(String etext)

    throws UnsupportedEncodingException {

  /*

   * We look for sequences separated by "linear-white-space".

   * (as per RFC 2047, Section 6.1)

   * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.

   */

  String lwsp = " \t\n\r";

  StringTokenizer st;



  /*

   * First, lets do a quick run thru the string and check

   * whether the sequence "=?"  exists at all. If none exists,

   * we know there are no encoded-words in here and we can just

   * return the string as-is, without suffering thru the later 

   * decoding logic. 

   * This handles the most common case of unencoded headers 

   * efficiently.

   */

  if (etext.indexOf("=?") == -1)

      return etext;



  // Encoded words found. Start decoding ...



  st = new StringTokenizer(etext, lwsp, true);

  StringBuffer sb = new StringBuffer();  // decode buffer

  StringBuffer wsb = new StringBuffer(); // white space buffer

  boolean prevWasEncoded = false;



  while (st.hasMoreTokens()) {

      char c;

      String s = st.nextToken();

      // If whitespace, append it to the whitespace buffer

      if (((c = s.charAt(0)) == ' ') || (c == '\t') ||

    (c == '\r') || (c == '\n'))

    wsb.append(c);

      else {

    // Check if token is an 'encoded-word' ..

    String word;

    try {

        word = decodeWord(s);

        // Yes, this IS an 'encoded-word'.

        if (!prevWasEncoded && wsb.length() > 0) {

      // if the previous word was also encoded, we

      // should ignore the collected whitespace. Else

      // we include the whitespace as well.

      sb.append(wsb);

        }

        prevWasEncoded = true;

    } catch (ParseException pex) {

        // This is NOT an 'encoded-word'.

        word = s;

        // possibly decode inner encoded words

        if (!decodeStrict) {

      String dword = decodeInnerWords(word);

      if (dword != word) {

          // if a different String object was returned,

          // decoding was done.

          if (prevWasEncoded && word.startsWith("=?")) {

        // encoded followed by encoded,

        // throw away whitespace between

          } else {

        // include collected whitespace ..

        if (wsb.length() > 0)

            sb.append(wsb);

          }

          // did original end with encoded?

          prevWasEncoded = word.endsWith("?=");

          word = dword;

      } else {

          // include collected whitespace ..

          if (wsb.length() > 0)

        sb.append(wsb);

          prevWasEncoded = false;

      }

        } else {

      // include collected whitespace ..

      if (wsb.length() > 0)

          sb.append(wsb);

      prevWasEncoded = false;

        }

    }

    sb.append(word); // append the actual word

    wsb.setLength(0); // reset wsb for reuse

      }

  }

  sb.append(wsb);    // append trailing whitespace

  return sb.toString();

    }



    /**

     * Encode a RFC 822 "word" token into mail-safe form as per

     * RFC 2047. <p>

     *

     * The given Unicode string is examined for non US-ASCII

     * characters. If the string contains only US-ASCII characters,

     * it is returned as-is.  If the string contains non US-ASCII

     * characters, it is first character-encoded using the platform's

     * default charset, then transfer-encoded using either the B or 

     * Q encoding. The resulting bytes are then returned as a Unicode 

     * string containing only ASCII  characters. <p>

     * 

     * This method is meant to be used when creating RFC 822 "phrases".

     * The InternetAddress class, for example, uses this to encode

     * it's 'phrase' component.

     *

     * @param  word  Unicode string

     * @return  Array of Unicode strings containing only US-ASCII 

     *    characters.

     * @exception UnsupportedEncodingException if the encoding fails

     */

    public static String encodeWord(String word) 

      throws UnsupportedEncodingException {

  return encodeWord(word, null, null);

    }



    /**

     * Encode a RFC 822 "word" token into mail-safe form as per

     * RFC 2047. <p>

     *

     * The given Unicode string is examined for non US-ASCII

     * characters. If the string contains only US-ASCII characters,

     * it is returned as-is.  If the string contains non US-ASCII

     * characters, it is first character-encoded using the specified

     * charset, then transfer-encoded using either the B or Q encoding.

     * The resulting bytes are then returned as a Unicode string 

     * containing only ASCII characters. <p>

     * 

     * @param  word  Unicode string

     * @param  charset  the MIME charset

     * @param  encoding the encoding to be used. Currently supported

     *    values are "B" and "Q". If this parameter is null, then

     *    the "Q" encoding is used if most of characters to be

     *    encoded are in the ASCII charset, otherwise "B" encoding

     *    is used.

     * @return  Unicode string containing only US-ASCII characters

     * @exception UnsupportedEncodingException if the encoding fails

     */

    public static String encodeWord(String word, String charset, 

            String encoding)

          throws UnsupportedEncodingException {

  return encodeWord(word, charset, encoding, true);

    }



    /*

     * Encode the given string. The parameter 'encodingWord' should

     * be true if a RFC 822 "word" token is being encoded and false if a

     * RFC 822 "text" token is being encoded. This is because the 

     * "Q" encoding defined in RFC 2047 has more restrictions when

     * encoding "word" tokens. (Sigh)

     */ 

    private static String encodeWord(String string, String charset,

             String encoding, boolean encodingWord)

      throws UnsupportedEncodingException {



  // If 'string' contains only US-ASCII characters, just

  // return it.

  int ascii = checkAscii(string);

  if (ascii == ALL_ASCII)

      return string;



  // Else, apply the specified charset conversion.

  String jcharset;

  if (charset == null) { // use default charset

      jcharset = getDefaultJavaCharset(); // the java charset

      charset = getDefaultMIMECharset(); // the MIME equivalent

  } else // MIME charset -> java charset

      jcharset = javaCharset(charset);



  // If no transfer-encoding is specified, figure one out.

  if (encoding == null) {

      if (ascii != MOSTLY_NONASCII)

    encoding = "Q";

      else

    encoding = "B";

  }



  boolean b64;

  if (encoding.equalsIgnoreCase("B")) 

      b64 = true;

  else if (encoding.equalsIgnoreCase("Q"))

      b64 = false;

  else

      throw new UnsupportedEncodingException(

      "Unknown transfer encoding: " + encoding);



  StringBuffer outb = new StringBuffer(); // the output buffer

  doEncode(string, b64, jcharset, 

     // As per RFC 2047, size of an encoded string should not

     // exceed 75 bytes.

     // 7 = size of "=?", '?', 'B'/'Q', '?', "?="

     75 - 7 - charset.length(), // the available space

     "=?" + charset + "?" + encoding + "?", // prefix

     true, encodingWord, outb);



  return outb.toString();

    }



    private static void doEncode(String string, boolean b64, 

    String jcharset, int avail, String prefix, 

    boolean first, boolean encodingWord, StringBuffer buf) 

      throws UnsupportedEncodingException {



  // First find out what the length of the encoded version of

  // 'string' would be.

  byte[] bytes = string.getBytes(jcharset);

  int len;

  if (b64) // "B" encoding

      len = BEncoderStream.encodedLength(bytes);

  else // "Q"

      len = QEncoderStream.encodedLength(bytes, encodingWord);

  

  int size;

  if ((len > avail) && ((size = string.length()) > 1)) { 

      // If the length is greater than 'avail', split 'string'

      // into two and recurse.

      doEncode(string.substring(0, size/2), b64, jcharset, 

         avail, prefix, first, encodingWord, buf);

      doEncode(string.substring(size/2, size), b64, jcharset,

         avail, prefix, false, encodingWord, buf);

  } else {

      // length <= than 'avail'. Encode the given string

      ByteArrayOutputStream os = new ByteArrayOutputStream();

      OutputStream eos; // the encoder

      if (b64) // "B" encoding

    eos = new BEncoderStream(os);

      else // "Q" encoding

    eos = new QEncoderStream(os, encodingWord);

      

      try { // do the encoding

    eos.write(bytes);

    eos.close();

      } catch (IOException ioex) { }



      byte[] encodedBytes = os.toByteArray(); // the encoded stuff

      // Now write out the encoded (all ASCII) bytes into our

      // StringBuffer

      if (!first) // not the first line of this sequence

    if (foldEncodedWords)

        buf.append("\r\n "); // start a continuation line

    else

        buf.append(" "); // line will be folded later



      buf.append(prefix);

      for (int i = 0; i < encodedBytes.length; i++)

    buf.append((char)encodedBytes[i]);

      buf.append("?="); // terminate the current sequence

  }

    }



    /**

     * The string is parsed using the rules in RFC 2047 and RFC 2231 for

     * parsing an "encoded-word".  If the parse fails, a ParseException is 

     * thrown. Otherwise, it is transfer-decoded, and then 

     * charset-converted into Unicode. If the charset-conversion

     * fails, an UnsupportedEncodingException is thrown.<p>

     *

     * @param  eword  the encoded value

     * @exception       ParseException if the string is not an

     *      encoded-word as per RFC 2047 and RFC 2231.

     * @exception       UnsupportedEncodingException if the charset

     *      conversion failed.

     */

    public static String decodeWord(String eword)

    throws ParseException, UnsupportedEncodingException {



  if (!eword.startsWith("=?")) // not an encoded word

      throw new ParseException(

    "encoded word does not start with \"=?\": " + eword);

  

  // get charset

  int start = 2; int pos; 

  if ((pos = eword.indexOf('?', start)) == -1)

      throw new ParseException(

    "encoded word does not include charset: " + eword);

  String charset = eword.substring(start, pos);

  int lpos = charset.indexOf('*');  // RFC 2231 language specified?

  if (lpos >= 0)        // yes, throw it away

      charset = charset.substring(0, lpos);

  charset = javaCharset(charset);



  // get encoding

  start = pos+1;

  if ((pos = eword.indexOf('?', start)) == -1)

      throw new ParseException(

    "encoded word does not include encoding: " + eword);

  String encoding = eword.substring(start, pos);



  // get encoded-sequence

  start = pos+1;

  if ((pos = eword.indexOf("?=", start)) == -1)

      throw new ParseException(

    "encoded word does not end with \"?=\": " + eword);

  /*

   * XXX - should include this, but leaving it out for compatibility...

   *

  if (decodeStrict && pos != eword.length() - 2)

      throw new ParseException(

    "encoded word does not end with \"?=\": " + eword););

   */

  String word = eword.substring(start, pos);



  try {

      String decodedWord;

      if (word.length() > 0) {

    // Extract the bytes from word

    ByteArrayInputStream bis = 

        new ByteArrayInputStream(ASCIIUtility.getBytes(word));



    // Get the appropriate decoder

    InputStream is;

    if (encoding.equalsIgnoreCase("B")) 

        is = new BASE64DecoderStream(bis);

    else if (encoding.equalsIgnoreCase("Q"))

        is = new QDecoderStream(bis);

    else

        throw new UnsupportedEncodingException(

            "unknown encoding: " + encoding);



    // For b64 & q, size of decoded word <= size of word. So

    // the decoded bytes must fit into the 'bytes' array. This

    // is certainly more efficient than writing bytes into a

    // ByteArrayOutputStream and then pulling out the byte[]

    // from it.

    int count = bis.available();

    byte[] bytes = new byte[count];

    // count is set to the actual number of decoded bytes 

    count = is.read(bytes, 0, count);



    // Finally, convert the decoded bytes into a String using

    // the specified charset

    decodedWord = count <= 0 ? "" :

        new String(bytes, 0, count, charset);

      } else {

    // no characters to decode, return empty string

    decodedWord = "";

      }

      if (pos + 2 < eword.length()) {

    // there's still more text in the string

    String rest = eword.substring(pos + 2);

    if (!decodeStrict)

        rest = decodeInnerWords(rest);

    decodedWord += rest;

      }

      return decodedWord;

  } catch (UnsupportedEncodingException uex) {

      // explicitly catch and rethrow this exception, otherwise

      // the below IOException catch will swallow this up!

      throw uex;

  } catch (IOException ioex) {

      // Shouldn't happen.

      throw new ParseException(ioex.toString());

  } catch (IllegalArgumentException iex) {

      /* An unknown charset of the form ISO-XXX-XXX, will cause

       * the JDK to throw an IllegalArgumentException ... Since the

       * JDK will attempt to create a classname using this string,

       * but valid classnames must not contain the character '-',

       * and this results in an IllegalArgumentException, rather than

       * the expected UnsupportedEncodingException. Yikes

       */

      throw new UnsupportedEncodingException(charset);

  }

    }



    /**

     * Look for encoded words within a word.  The MIME spec doesn't

     * allow this, but many broken mailers, especially Japanese mailers,

     * produce such incorrect encodings.

     */

    private static String decodeInnerWords(String word)

        throws UnsupportedEncodingException {

  int start = 0, i;

  StringBuffer buf = new StringBuffer();

  while ((i = word.indexOf("=?", start)) >= 0) {

      buf.append(word.substring(start, i));

      // find first '?' after opening '=?' - end of charset

      int end = word.indexOf('?', i + 2);

      if (end < 0)

    break;

      // find next '?' after that - end of encoding

      end = word.indexOf('?', end + 1);

      if (end < 0)

    break;

      // find terminating '?='

      end = word.indexOf("?=", end + 1);

      if (end < 0)

    break;

      String s = word.substring(i, end + 2);

      try {

    s = decodeWord(s);

      } catch (ParseException pex) {

    // ignore it, just use the original string

      }

      buf.append(s);

      start = end + 2;

  }

  if (start == 0)

      return word;

  if (start < word.length())

      buf.append(word.substring(start));

  return buf.toString();

    }



    /**

     * A utility method to quote a word, if the word contains any

     * characters from the specified 'specials' list.<p>

     *

     * The <code>HeaderTokenizer</code> class defines two special

     * sets of delimiters - MIME and RFC 822. <p>

     *

     * This method is typically used during the generation of 

     * RFC 822 and MIME header fields.

     *

     * @param  word  word to be quoted

     * @param  specials the set of special characters

     * @return    the possibly quoted word

     * @see  javax.mail.internet.HeaderTokenizer#MIME

     * @see  javax.mail.internet.HeaderTokenizer#RFC822

     */

    public static String quote(String word, String specials) {

  int len = word.length();

  if (len == 0)

      return "\"\"";  // an empty string is handled specially



  /*

   * Look for any "bad" characters, Escape and

   *  quote the entire string if necessary.

   */

  boolean needQuoting = false;

  for (int i = 0; i < len; i++) {

      char c = word.charAt(i);

      if (c == '"' || c == '\\' || c == '\r' || c == '\n') {

    // need to escape them and then quote the whole string

    StringBuffer sb = new StringBuffer(len + 3);

    sb.append('"');

    sb.append(word.substring(0, i));

    int lastc = 0;

    for (int j = i; j < len; j++) {

        char cc = word.charAt(j);

        if ((cc == '"') || (cc == '\\') || 

      (cc == '\r') || (cc == '\n'))

      if (cc == '\n' && lastc == '\r')

          ;  // do nothing, CR was already escaped

      else

          sb.append('\\');  // Escape the character

        sb.append(cc);

        lastc = cc;

    }

    sb.append('"');

    return sb.toString();

      } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)

    // These characters cause the string to be quoted

    needQuoting = true;

  }



  if (needQuoting) {

      StringBuffer sb = new StringBuffer(len + 2);

      sb.append('"').append(word).append('"');

      return sb.toString();

  } else 

      return word;

    }



    /**

     * Fold a string at linear whitespace so that each line is no longer

     * than 76 characters, if possible.  If there are more than 76

     * non-whitespace characters consecutively, the string is folded at

     * the first whitespace after that sequence.  The parameter

     * <code>used</code> indicates how many characters have been used in

     * the current line; it is usually the length of the header name. <p>

     *

     * Note that line breaks in the string aren't escaped; they probably

     * should be.

     *

     * @param  used  characters used in line so far

     * @param  s  the string to fold

     * @return    the folded string

     * @since    JavaMail 1.4

     */

    public static String fold(int used, String s) {

  if (!foldText)

      return s;



  int end;

  char c;

  // Strip trailing spaces and newlines

  for (end = s.length() - 1; end >= 0; end--) {

      c = s.charAt(end);

      if (c != ' ' && c != '\t' && c != '\r' && c != '\n')

    break;

  }

  if (end != s.length() - 1)

      s = s.substring(0, end + 1);



  // if the string fits now, just return it

  if (used + s.length() <= 76)

      return s;



  // have to actually fold the string

  StringBuffer sb = new StringBuffer(s.length() + 4);

  char lastc = 0;

  while (used + s.length() > 76) {

      int lastspace = -1;

      for (int i = 0; i < s.length(); i++) {

    if (lastspace != -1 && used + i > 76)

        break;

    c = s.charAt(i);

    if (c == ' ' || c == '\t')

        if (!(lastc == ' ' || lastc == '\t'))

      lastspace = i;

    lastc = c;

      }

      if (lastspace == -1) {

    // no space, use the whole thing

    sb.append(s);

    s = "";

    used = 0;

    break;

      }

      sb.append(s.substring(0, lastspace));

      sb.append("\r\n");

      lastc = s.charAt(lastspace);

      sb.append(lastc);

      s = s.substring(lastspace + 1);

      used = 1;

  }

  sb.append(s);

  return sb.toString();

    }



    /**

     * Unfold a folded header.  Any line breaks that aren't escaped and

     * are followed by whitespace are removed.

     *

     * @param  s  the string to unfold

     * @return    the unfolded string

     * @since    JavaMail 1.4

     */

    public static String unfold(String s) {

  if (!foldText)

      return s;



  StringBuffer sb = null;

  int i;

  while ((i = indexOfAny(s, "\r\n")) >= 0) {

      int start = i;

      int l = s.length();

      i++;    // skip CR or NL

      if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')

    i++;  // skip LF

      if (start == 0 || s.charAt(start - 1) != '\\') {

    char c;

    // if next line starts with whitespace, skip all of it

    // XXX - always has to be true?

    if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {

        i++;  // skip whitespace

        while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))

      i++;

        if (sb == null)

      sb = new StringBuffer(s.length());

        if (start != 0) {

      sb.append(s.substring(0, start));

      sb.append(' ');

        }

        s = s.substring(i);

        continue;

    }

    // it's not a continuation line, just leave it in

    if (sb == null)

        sb = new StringBuffer(s.length());

    sb.append(s.substring(0, i));

    s = s.substring(i);

      } else {

    // there's a backslash at "start - 1"

    // strip it out, but leave in the line break

    if (sb == null)

        sb = new StringBuffer(s.length());

    sb.append(s.substring(0, start - 1));

    sb.append(s.substring(start, i));

    s = s.substring(i);

      }

  }

  if (sb != null) {

      sb.append(s);

      return sb.toString();

  } else

      return s;

    }



    /**

     * Return the first index of any of the characters in "any" in "s",

     * or -1 if none are found.

     *

     * This should be a method on String.

     */

    private static int indexOfAny(String s, String any) {

  return indexOfAny(s, any, 0);

    }



    private static int indexOfAny(String s, String any, int start) {

  try {

      int len = s.length();

      for (int i = start; i < len; i++) {

    if (any.indexOf(s.charAt(i)) >= 0)

        return i;

      }

      return -1;

  } catch (StringIndexOutOfBoundsException e) {

      return -1;

  }

    }



    /**

     * Convert a MIME charset name into a valid Java charset name. <p>

     *

     * @param charset  the MIME charset name

     * @return  the Java charset equivalent. If a suitable mapping is

     *    not available, the passed in charset is itself returned.

     */

    public static String javaCharset(String charset) {

  if (mime2java == null || charset == null)

      // no mapping table, or charset parameter is null

      return charset;



  String alias =

      (String)mime2java.get(charset.toLowerCase(Locale.ENGLISH));

  return alias == null ? charset : alias;

    }



    /**

     * Convert a java charset into its MIME charset name. <p>

     *

     * Note that a future version of JDK (post 1.2) might provide

     * this functionality, in which case, we may deprecate this

     * method then.

     *

     * @param   charset    the JDK charset

     * @return        the MIME/IANA equivalent. If a mapping

     *      is not possible, the passed in charset itself

     *      is returned.

     * @since    JavaMail 1.1

     */

    public static String mimeCharset(String charset) {

  if (java2mime == null || charset == null) 

      // no mapping table or charset param is null

      return charset;



  String alias =

      (String)java2mime.get(charset.toLowerCase(Locale.ENGLISH));

  return alias == null ? charset : alias;

    }



    private static String defaultJavaCharset;

    private static String defaultMIMECharset;



    /**

     * Get the default charset corresponding to the system's current 

     * default locale.  If the System property <code>mail.mime.charset</code>

     * is set, a system charset corresponding to this MIME charset will be

     * returned. <p>

     * 

     * @return  the default charset of the system's default locale, 

     *     as a Java charset. (NOT a MIME charset)

     * @since  JavaMail 1.1

     */

    public static String getDefaultJavaCharset() {

  if (defaultJavaCharset == null) {

      /*

       * If mail.mime.charset is set, it controls the default

       * Java charset as well.

       */

      String mimecs = null;

      try {

    mimecs = System.getProperty("mail.mime.charset");

      } catch (SecurityException ex) { }  // ignore it

      if (mimecs != null && mimecs.length() > 0) {

    defaultJavaCharset = javaCharset(mimecs);

    return defaultJavaCharset;

      }



      try {

    defaultJavaCharset = System.getProperty("file.encoding", 

              "8859_1");

      } catch (SecurityException sex) {

    

    class NullInputStream extends InputStream {

        public int read() {

      return 0;

        }

    }

    InputStreamReader reader = 

      new InputStreamReader(new NullInputStream());

    defaultJavaCharset = reader.getEncoding();

    if (defaultJavaCharset == null)

        defaultJavaCharset = "8859_1";

      }

  }



  return defaultJavaCharset;

    }



    /*

     * Get the default MIME charset for this locale.

     */

    static String getDefaultMIMECharset() {

  if (defaultMIMECharset == null) {

      try {

    defaultMIMECharset = System.getProperty("mail.mime.charset");

      } catch (SecurityException ex) { }  // ignore it

  }

  if (defaultMIMECharset == null)

      defaultMIMECharset = mimeCharset(getDefaultJavaCharset());

  return defaultMIMECharset;

    }



    // Tables to map MIME charset names to Java names and vice versa.

    // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset

    private static Hashtable mime2java;

    private static Hashtable java2mime;



    static {

  java2mime = new Hashtable(40);

  mime2java = new Hashtable(10);



  try {

      // Use this class's classloader to load the mapping file

      // XXX - we should use SecuritySupport, but it's in another package

      InputStream is = 

        javax.mail.internet.MimeUtility.class.getResourceAsStream(

        "/META-INF/javamail.charset.map");



      if (is != null) {

    try {

        is = new LineInputStream(is);



        // Load the JDK-to-MIME charset mapping table

        loadMappings((LineInputStream)is, java2mime);



        // Load the MIME-to-JDK charset mapping table

        loadMappings((LineInputStream)is, mime2java);

    } finally {

        try {

      is.close();

        } catch (Exception cex) {

      // ignore

        }

    }

      }

  } catch (Exception ex) { }



  // If we didn't load the tables, e.g., because we didn't have

  // permission, load them manually.  The entries here should be

  // the same as the default javamail.charset.map.

  if (java2mime.isEmpty()) {

      java2mime.put("8859_1", "ISO-8859-1");

      java2mime.put("iso8859_1", "ISO-8859-1");

      java2mime.put("iso8859-1", "ISO-8859-1");



      java2mime.put("8859_2", "ISO-8859-2");

      java2mime.put("iso8859_2", "ISO-8859-2");

      java2mime.put("iso8859-2", "ISO-8859-2");



      java2mime.put("8859_3", "ISO-8859-3");

      java2mime.put("iso8859_3", "ISO-8859-3");

      java2mime.put("iso8859-3", "ISO-8859-3");



      java2mime.put("8859_4", "ISO-8859-4");

      java2mime.put("iso8859_4", "ISO-8859-4");

      java2mime.put("iso8859-4", "ISO-8859-4");



      java2mime.put("8859_5", "ISO-8859-5");

      java2mime.put("iso8859_5", "ISO-8859-5");

      java2mime.put("iso8859-5", "ISO-8859-5");



      java2mime.put("8859_6", "ISO-8859-6");

      java2mime.put("iso8859_6", "ISO-8859-6");

      java2mime.put("iso8859-6", "ISO-8859-6");



      java2mime.put("8859_7", "ISO-8859-7");

      java2mime.put("iso8859_7", "ISO-8859-7");

      java2mime.put("iso8859-7", "ISO-8859-7");



      java2mime.put("8859_8", "ISO-8859-8");

      java2mime.put("iso8859_8", "ISO-8859-8");

      java2mime.put("iso8859-8", "ISO-8859-8");



      java2mime.put("8859_9", "ISO-8859-9");

      java2mime.put("iso8859_9", "ISO-8859-9");

      java2mime.put("iso8859-9", "ISO-8859-9");



      java2mime.put("sjis", "Shift_JIS");

      java2mime.put("jis", "ISO-2022-JP");

      java2mime.put("iso2022jp", "ISO-2022-JP");

      java2mime.put("euc_jp", "euc-jp");

      java2mime.put("koi8_r", "koi8-r");

      java2mime.put("euc_cn", "euc-cn");

      java2mime.put("euc_tw", "euc-tw");

      java2mime.put("euc_kr", "euc-kr");

  }

  if (mime2java.isEmpty()) {

      mime2java.put("iso-2022-cn", "ISO2022CN");

      mime2java.put("iso-2022-kr", "ISO2022KR");

      mime2java.put("utf-8", "UTF8");

      mime2java.put("utf8", "UTF8");

      mime2java.put("ja_jp.iso2022-7", "ISO2022JP");

      mime2java.put("ja_jp.eucjp", "EUCJIS");

      mime2java.put("euc-kr", "KSC5601");

      mime2java.put("euckr", "KSC5601");

      mime2java.put("us-ascii", "ISO-8859-1");

      mime2java.put("x-us-ascii", "ISO-8859-1");

  }

    }



    private static void loadMappings(LineInputStream is, Hashtable table) {

  String currLine;



  while (true) {

      try {

    currLine = is.readLine();

      } catch (IOException ioex) {

    break; // error in reading, stop

      }



      if (currLine == null) // end of file, stop

    break;

      if (currLine.startsWith("--") && currLine.endsWith("--"))

    // end of this table

    break;  



      // ignore empty lines and comments

      if (currLine.trim().length() == 0 || currLine.startsWith("#"))

    continue;

      

      // A valid entry is of the form <key><separator><value>

      // where, <separator> := SPACE | HT. Parse this

      StringTokenizer tk = new StringTokenizer(currLine, " \t");

      try {

    String key = tk.nextToken();

    String value = tk.nextToken();

    table.put(key.toLowerCase(Locale.ENGLISH), value);

      } catch (NoSuchElementException nex) { }

  }

    }



    static final int ALL_ASCII     = 1;

    static final int MOSTLY_ASCII   = 2;

    static final int MOSTLY_NONASCII   = 3;



    /** 

     * Check if the given string contains non US-ASCII characters.

     * @param  s  string

     * @return    ALL_ASCII if all characters in the string 

     *      belong to the US-ASCII charset. MOSTLY_ASCII

     *      if more than half of the available characters

     *      are US-ASCII characters. Else MOSTLY_NONASCII.

     */

    static int checkAscii(String s) {

  int ascii = 0, non_ascii = 0;

  int l = s.length();



  for (int i = 0; i < l; i++) {

      if (nonascii((int)s.charAt(i))) // non-ascii

    non_ascii++;

      else

    ascii++;

  }



  if (non_ascii == 0)

      return ALL_ASCII;

  if (ascii > non_ascii)

      return MOSTLY_ASCII;



  return MOSTLY_NONASCII;

    }



    /** 

     * Check if the given byte array contains non US-ASCII characters.

     * @param  b  byte array

     * @return    ALL_ASCII if all characters in the string 

     *      belong to the US-ASCII charset. MOSTLY_ASCII

     *      if more than half of the available characters

     *      are US-ASCII characters. Else MOSTLY_NONASCII.

     *

     * XXX - this method is no longer used

     */

    static int checkAscii(byte[] b) {

  int ascii = 0, non_ascii = 0;



  for (int i=0; i < b.length; i++) {

      // The '&' operator automatically causes b[i] to be promoted

      // to an int, and we mask out the higher bytes in the int 

      // so that the resulting value is not a negative integer.

      if (nonascii(b[i] & 0xff)) // non-ascii

    non_ascii++;

      else

    ascii++;

  }

  

  if (non_ascii == 0)

      return ALL_ASCII;

  if (ascii > non_ascii)

      return MOSTLY_ASCII;

  

  return MOSTLY_NONASCII;

    }



    /** 

     * Check if the given input stream contains non US-ASCII characters.

     * Upto <code>max</code> bytes are checked. If <code>max</code> is

     * set to <code>ALL</code>, then all the bytes available in this

     * input stream are checked. If <code>breakOnNonAscii</code> is true

     * the check terminates when the first non-US-ASCII character is

     * found and MOSTLY_NONASCII is returned. Else, the check continues

     * till <code>max</code> bytes or till the end of stream.

     *

     * @param  is  the input stream

     * @param  max  maximum bytes to check for. The special value

     *      ALL indicates that all the bytes in this input

     *      stream must be checked.

     * @param  breakOnNonAscii if <code>true</code>, then terminate the

     *      the check when the first non-US-ASCII character

     *      is found.

     * @return    ALL_ASCII if all characters in the string 

     *      belong to the US-ASCII charset. MOSTLY_ASCII

     *      if more than half of the available characters

     *      are US-ASCII characters. Else MOSTLY_NONASCII.

     */

    static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {

  int ascii = 0, non_ascii = 0;

  int len;

  int block = 4096;

  int linelen = 0;

  boolean longLine = false, badEOL = false;

  boolean checkEOL = encodeEolStrict && breakOnNonAscii;

  byte buf[] = null;

  if (max != 0) {

      block = (max == ALL) ? 4096 : Math.min(max, 4096);

      buf = new byte[block]; 

  }

  while (max != 0) {

      try {

    if ((len = is.read(buf, 0, block)) == -1)

        break;

    int lastb = 0;

    for (int i = 0; i < len; i++) {

            // The '&' operator automatically causes b[i] to 

        // be promoted to an int, and we mask out the higher

        // bytes in the int so that the resulting value is 

        // not a negative integer.

        int b = buf[i] & 0xff;

        if (checkEOL &&

          ((lastb == '\r' && b != '\n') ||

          (lastb != '\r' && b == '\n')))

      badEOL = true;

        if (b == '\r' || b == '\n')

      linelen = 0;

        else {

      linelen++;

      if (linelen > 998)  // 1000 - CRLF

          longLine = true;

        }

        if (nonascii(b)) {  // non-ascii

            if (breakOnNonAscii) // we are done

          return MOSTLY_NONASCII;

            else

          non_ascii++;

        } else

            ascii++;

        lastb = b;

    }

      } catch (IOException ioex) {

    break;

      }

      if (max != ALL)

    max -= len;

  }



  if (max == 0 && breakOnNonAscii)

      // We have been told to break on the first non-ascii character.

      // We haven't got any non-ascii character yet, but then we

      // have not checked all of the available bytes either. So we

      // cannot say for sure that this input stream is ALL_ASCII,

      // and hence we must play safe and return MOSTLY_NONASCII



      return MOSTLY_NONASCII;



  if (non_ascii == 0) { // no non-us-ascii characters so far

      // If we're looking at non-text data, and we saw CR without LF

      // or vice versa, consider this mostly non-ASCII so that it

      // will be base64 encoded (since the quoted-printable encoder

      // doesn't encode this case properly).

      if (badEOL)

    return MOSTLY_NONASCII;

      // if we've seen a long line, we degrade to mostly ascii

      else if (longLine)

    return MOSTLY_ASCII;

      else

    return ALL_ASCII;

  }

  if (ascii > non_ascii) // mostly ascii

      return MOSTLY_ASCII;

  return MOSTLY_NONASCII;

    }



    static final boolean nonascii(int b) {

  return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');

    }

}



/**

 * An OutputStream that determines whether the data written to

 * it is all ASCII, mostly ASCII, or mostly non-ASCII.

 */

class AsciiOutputStream extends OutputStream {

    private boolean breakOnNonAscii;

    private int ascii = 0, non_ascii = 0;

    private int linelen = 0;

    private boolean longLine = false;

    private boolean badEOL = false;

    private boolean checkEOL = false;

    private int lastb = 0;

    private int ret = 0;



    public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {

  this.breakOnNonAscii = breakOnNonAscii;

  checkEOL = encodeEolStrict && breakOnNonAscii;

    }



    public void write(int b) throws IOException {

  check(b);

    }



    public void write(byte b[]) throws IOException {

  write(b, 0, b.length);

    }



    public void write(byte b[], int off, int len) throws IOException {

  len += off;

  for (int i = off; i < len ; i++)

      check(b[i]);

    }



    private final void check(int b) throws IOException {

  b &= 0xff;

  if (checkEOL &&

    ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))

      badEOL = true;

  if (b == '\r' || b == '\n')

      linelen = 0;

  else {

      linelen++;

      if (linelen > 998)  // 1000 - CRLF

    longLine = true;

  }

  if (MimeUtility.nonascii(b)) { // non-ascii

      non_ascii++;

      if (breakOnNonAscii) {  // we are done

    ret = MimeUtility.MOSTLY_NONASCII;

    throw new EOFException();

      }

  } else

      ascii++;

  lastb = b;

    }



    /**

     * Return ASCII-ness of data stream.

     */

    public int getAscii() {

  if (ret != 0)

      return ret;

  // If we're looking at non-text data, and we saw CR without LF

  // or vice versa, consider this mostly non-ASCII so that it

  // will be base64 encoded (since the quoted-printable encoder

  // doesn't encode this case properly).

  if (badEOL)

      return MimeUtility.MOSTLY_NONASCII;

  else if (non_ascii == 0) { // no non-us-ascii characters so far

      // if we've seen a long line, we degrade to mostly ascii

      if (longLine)

    return MimeUtility.MOSTLY_ASCII;

      else

    return MimeUtility.ALL_ASCII;

  }

  if (ascii > non_ascii) // mostly ascii

      return MimeUtility.MOSTLY_ASCII;

  return MimeUtility.MOSTLY_NONASCII;

    }

}
Open Source Repository
Home	/mail/mail-1.4.3 \| Repository Home
Open Source Repository