BE THE CODER - jxl/read/biff/SSTRecord.java


/*********************************************************************

*

*      Copyright (C) 2002 Andrew Khan

*

* This library is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2.1 of the License, or (at your option) any later version.

*

* This library is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

* Lesser General Public License for more details.

*

* You should have received a copy of the GNU Lesser General Public

* License along with this library; if not, write to the Free Software

* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

***************************************************************************/



package jxl.read.biff;



import jxl.common.Assert;



import jxl.WorkbookSettings;

import jxl.biff.IntegerHelper;

import jxl.biff.RecordData;

import jxl.biff.StringHelper;



/**

 * Holds all the strings in the shared string table

 */

class SSTRecord extends RecordData

{

  /**

   * The total number of strings in this table

   */

  private int totalStrings;

  /**

   * The number of unique strings

   */

  private int uniqueStrings;

  /**

   * The shared strings

   */

  private String[] strings;

  /**

   * The array of continuation breaks

   */

  private int[] continuationBreaks;



  /**

   * A holder for a byte array

   */

  private static class ByteArrayHolder

  {

    /**

     * the byte holder

     */

    public byte[] bytes;

  }



  /**

   * A holder for a boolean

   */

  private static class BooleanHolder

  {

    /**

     * the holder holder

     */

    public boolean value;

  }



  /**

   * Constructs this object from the raw data

   *

   * @param t the raw data

   * @param continuations the continuations

   * @param ws the workbook settings

   */

  public SSTRecord(Record t, Record[] continuations, WorkbookSettings ws)

  {

    super(t);



    // If a continue record appears in the middle of

    // a string, then the encoding character is repeated



    // Concatenate everything into one big bugger of a byte array

    int totalRecordLength = 0;



    for (int i = 0; i < continuations.length; i++)

    {

      totalRecordLength += continuations[i].getLength();

    }

    totalRecordLength += getRecord().getLength();



    byte[] data = new byte[totalRecordLength];



    // First the original data gets put in

    int pos = 0;

    System.arraycopy(getRecord().getData(), 0,

                     data, 0, getRecord().getLength());

    pos += getRecord().getLength();



    // Now copy in everything else.

    continuationBreaks = new int[continuations.length];

    Record r = null;

    for (int i = 0; i < continuations.length; i++)

    {

      r = continuations[i];

      System.arraycopy(r.getData(), 0,

                       data, pos,

                       r.getLength());

      continuationBreaks[i] = pos;

      pos += r.getLength();

    }



    totalStrings = IntegerHelper.getInt(data[0], data[1],

                                        data[2], data[3]);

    uniqueStrings = IntegerHelper.getInt(data[4], data[5],

                                         data[6], data[7]);



    strings = new String[uniqueStrings];

    readStrings(data, 8, ws);

  }



  /**

   * Reads in all the strings from the raw data

   *

   * @param data the raw data

   * @param offset the offset

   * @param ws the workbook settings

   */

  private void readStrings(byte[] data, int offset, WorkbookSettings ws)

  {

    int pos = offset;

    int numChars;

    byte optionFlags;

    String s = null;

    boolean asciiEncoding = false;

    boolean richString = false;

    boolean extendedString = false;

    int formattingRuns = 0;

    int extendedRunLength = 0;



    for (int i = 0; i < uniqueStrings; i++)

    {

      // Read in the number of characters

      numChars = IntegerHelper.getInt(data[pos], data[pos + 1]);

      pos += 2;

      optionFlags = data[pos];

      pos++;



      // See if it is an extended string

      extendedString = ((optionFlags & 0x04) != 0);



      // See if string contains formatting information

      richString = ((optionFlags & 0x08) != 0);



      if (richString)

      {

        // Read in the crun

        formattingRuns = IntegerHelper.getInt(data[pos], data[pos + 1]);

        pos += 2;

      }



      if (extendedString)

      {

        // Read in cchExtRst

        extendedRunLength = IntegerHelper.getInt

          (data[pos], data[pos + 1], data[pos + 2], data[pos + 3]);

        pos += 4;

      }



      // See if string is ASCII (compressed) or unicode

      asciiEncoding = ((optionFlags & 0x01) == 0);



      ByteArrayHolder bah = new ByteArrayHolder();

      BooleanHolder   bh = new BooleanHolder();

      bh.value = asciiEncoding;

      pos += getChars(data, bah, pos, bh, numChars);

      asciiEncoding = bh.value;



      if (asciiEncoding)

      {

        s = StringHelper.getString(bah.bytes, numChars, 0, ws);

      }

      else

      {

        s = StringHelper.getUnicodeString(bah.bytes, numChars, 0);

      }



      strings[i] = s;



      // For rich strings, skip over the formatting runs

      if (richString)

      {

        pos += 4 * formattingRuns;

      }



      // For extended strings, skip over the extended string data

      if (extendedString)

      {

        pos += extendedRunLength;

      }



      if (pos > data.length)

      {

        Assert.verify(false, "pos exceeds record length");

      }

    }

  }



  /**

   * Gets the chars in the ascii array, taking into account continuation

   * breaks

   *

   * @param source the original source

   * @param bah holder for the new byte array

   * @param pos the current position in the source

   * @param ascii holder for a return ascii flag

   * @param numChars the number of chars in the string

   * @return the number of bytes read from the source

   */

  private int getChars(byte[] source,

                       ByteArrayHolder bah,

                       int pos,

                       BooleanHolder ascii,

                       int numChars)

  {

    int i = 0;

    boolean spansBreak = false;



    if (ascii.value)

    {

      bah.bytes = new byte[numChars];

    }

    else

    {

      bah.bytes = new byte[numChars * 2];

    }



    while (i < continuationBreaks.length && !spansBreak)

    {

      spansBreak = pos <= continuationBreaks[i] &&

                   (pos + bah.bytes.length > continuationBreaks[i]);



      if (!spansBreak)

      {

        i++;

      }

    }



    // If it doesn't span a break simply do an array copy into the

    // destination array and finish

    if (!spansBreak)

    {

      System.arraycopy(source, pos, bah.bytes, 0, bah.bytes.length);

      return bah.bytes.length;

    }



    // Copy the portion before the break pos into the array

    int breakpos = continuationBreaks[i];

    System.arraycopy(source, pos, bah.bytes, 0, breakpos - pos);



    int bytesRead = breakpos - pos;

    int charsRead;

    if (ascii.value)

    {

      charsRead = bytesRead;

    }

    else

    {

      charsRead = bytesRead / 2;

    }



    bytesRead += getContinuedString(source,

                                    bah,

                                    bytesRead,

                                    i,

                                    ascii,

                                    numChars - charsRead);

    return bytesRead;

  }



  /**

   * Gets the rest of the string after a continuation break

   *

   * @param source the original bytes

   * @param bah the holder for the new bytes

   * @param destPos the des pos

   * @param contBreakIndex the index of the continuation break

   * @param ascii the ascii flag holder

   * @param charsLeft the number of chars left in the array

   * @return the number of bytes read in the continued string

   */

  private int getContinuedString(byte[] source,

                                 ByteArrayHolder bah,

                                 int destPos,

                                 int contBreakIndex,

                                 BooleanHolder ascii,

                                 int charsLeft)

  {

    int breakpos = continuationBreaks[contBreakIndex];

    int bytesRead = 0;



    while (charsLeft > 0)

    {

      Assert.verify(contBreakIndex < continuationBreaks.length,

                    "continuation break index");



      if (ascii.value && source[breakpos] == 0)

      {

        // The string is consistently ascii throughout



        int length = contBreakIndex == continuationBreaks.length - 1 ?

          charsLeft :

          Math.min

            (charsLeft,

             continuationBreaks[contBreakIndex + 1] - breakpos - 1);



        System.arraycopy(source,

                         breakpos + 1,

                         bah.bytes,

                         destPos,

                         length);

        destPos   += length;

        bytesRead += length + 1;

        charsLeft -= length;

        ascii.value = true;

      }

      else if (!ascii.value && source[breakpos] != 0)

      {

        // The string is Unicode throughout



        int length = contBreakIndex == continuationBreaks.length - 1 ?

          charsLeft * 2 :

          Math.min

            (charsLeft * 2,

             continuationBreaks[contBreakIndex + 1] - breakpos - 1);



        // It looks like the string continues as Unicode too.  That's handy

        System.arraycopy(source,

                         breakpos + 1,

                         bah.bytes,

                         destPos,

                         length);



        destPos   += length;

        bytesRead += length + 1;

        charsLeft -= length / 2;

        ascii.value = false;

      }

      else if (!ascii.value && source[breakpos] == 0)

      {

        // Bummer - the string starts off as Unicode, but after the

        // continuation it is in straightforward ASCII encoding

        int chars = contBreakIndex == continuationBreaks.length - 1 ?

          charsLeft:

          Math.min

            (charsLeft,

             continuationBreaks[contBreakIndex + 1] - breakpos - 1);



        for (int j = 0; j < chars; j++)

        {

          bah.bytes[destPos] = source[breakpos + j + 1];

          destPos += 2;

        }



        bytesRead += chars + 1;

        charsLeft -= chars;

        ascii.value = false;

      }

      else

      {

        // Double Bummer - the string starts off as ASCII, but after the

        // continuation it is in Unicode.  This impacts the allocated array



        // Reallocate what we have of the byte array so that it is all

        // Unicode

        byte[] oldBytes = bah.bytes;

        bah.bytes = new byte[destPos * 2 + charsLeft * 2];

        for (int j = 0; j < destPos; j++)

        {

          bah.bytes[j * 2] = oldBytes[j];

        }



        destPos = destPos * 2;



        int length = contBreakIndex == continuationBreaks.length - 1 ?

          charsLeft * 2 :

          Math.min

            (charsLeft * 2,

             continuationBreaks[contBreakIndex + 1] - breakpos - 1);



        System.arraycopy(source,

                         breakpos + 1,

                         bah.bytes,

                         destPos,

                         length);



        destPos   += length;

        bytesRead += length + 1;

        charsLeft -= length / 2;

        ascii.value = false;

      }



      contBreakIndex++;



      if (contBreakIndex < continuationBreaks.length)

      {

        breakpos = continuationBreaks[contBreakIndex];

      }

    }



    return bytesRead;

  }



  /**

   * Gets the string at the specified position

   *

   * @param index the index of the string to return

   * @return the strings

   */

  public String getString(int index)

  {

    Assert.verify(index < uniqueStrings);

    return strings[index];

  }

}
Open Source Repository
Home	/excel/jxl-2.6.12 \| Repository Home
Open Source Repository