BE THE CODER - org/apache/commons/lang3/text/StrTokenizer.java


/*

 * Licensed to the Apache Software Foundation (ASF) under one or more

 * contributor license agreements.  See the NOTICE file distributed with

 * this work for additional information regarding copyright ownership.

 * The ASF licenses this file to You under the Apache License, Version 2.0

 * (the "License"); you may not use this file except in compliance with

 * the License.  You may obtain a copy of the License at

 *

 *      http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */

package org.apache.commons.lang3.text;



import java.util.ArrayList;

import java.util.Collections;

import java.util.List;

import java.util.ListIterator;

import java.util.NoSuchElementException;



import org.apache.commons.lang3.ArrayUtils;



/**

 * Tokenizes a string based based on delimiters (separators)

 * and supporting quoting and ignored character concepts.

 * <p>

 * This class can split a String into many smaller strings. It aims

 * to do a similar job to {@link java.util.StringTokenizer StringTokenizer},

 * however it offers much more control and flexibility including implementing

 * the <code>ListIterator</code> interface. By default, it is set up

 * like <code>StringTokenizer</code>.

 * <p>

 * The input String is split into a number of <i>tokens</i>.

 * Each token is separated from the next String by a <i>delimiter</i>.

 * One or more delimiter characters must be specified.

 * <p>

 * Each token may be surrounded by quotes.

 * The <i>quote</i> matcher specifies the quote character(s).

 * A quote may be escaped within a quoted section by duplicating itself.

 * <p>

 * Between each token and the delimiter are potentially characters that need trimming.

 * The <i>trimmer</i> matcher specifies these characters.

 * One usage might be to trim whitespace characters.

 * <p>

 * At any point outside the quotes there might potentially be invalid characters.

 * The <i>ignored</i> matcher specifies these characters to be removed.

 * One usage might be to remove new line characters.

 * <p>

 * Empty tokens may be removed or returned as null.

 * <pre>

 * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)

 * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims whitespace)

 * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)

 * </pre>

 * <p>

 *

 * This tokenizer has the following properties and options:

 *

 * <table>

 *  <tr>

 *   <th>Property</th><th>Type</th><th>Default</th>

 *  </tr>

 *  <tr>

 *   <td>delim</td><td>CharSetMatcher</td><td>{ \t\n\r\f}</td>

 *  </tr>

 *  <tr>

 *   <td>quote</td><td>NoneMatcher</td><td>{}</td>

 *  </tr>

 *  <tr>

 *   <td>ignore</td><td>NoneMatcher</td><td>{}</td>

 *  </tr>

 *  <tr>

 *   <td>emptyTokenAsNull</td><td>boolean</td><td>false</td>

 *  </tr>

 *  <tr>

 *   <td>ignoreEmptyTokens</td><td>boolean</td><td>true</td>

 *  </tr>

 * </table>

 *

 * @since 2.2

 * @version $Id: StrTokenizer.java 1153241 2011-08-02 18:49:52Z ggregory $

 */

public class StrTokenizer implements ListIterator<String>, Cloneable {



    private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE;

    private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE;

    static {

        CSV_TOKENIZER_PROTOTYPE = new StrTokenizer();

        CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher());

        CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());

        CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());

        CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());

        CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);

        CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);



        TSV_TOKENIZER_PROTOTYPE = new StrTokenizer();

        TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher());

        TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());

        TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());

        TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());

        TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);

        TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);

    }



    /** The text to work on. */

    private char chars[];

    /** The parsed tokens */

    private String tokens[];

    /** The current iteration position */

    private int tokenPos;



    /** The delimiter matcher */

    private StrMatcher delimMatcher = StrMatcher.splitMatcher();

    /** The quote matcher */

    private StrMatcher quoteMatcher = StrMatcher.noneMatcher();

    /** The ignored matcher */

    private StrMatcher ignoredMatcher = StrMatcher.noneMatcher();

    /** The trimmer matcher */

    private StrMatcher trimmerMatcher = StrMatcher.noneMatcher();



    /** Whether to return empty tokens as null */

    private boolean emptyAsNull = false;

    /** Whether to ignore empty tokens */

    private boolean ignoreEmptyTokens = true;



    //-----------------------------------------------------------------------



    /**

     * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.

     * 

     * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.

     */

    private static StrTokenizer getCSVClone() {

        return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone();

    }



    /**

     * Gets a new tokenizer instance which parses Comma Separated Value strings

     * initializing it with the given input.  The default for CSV processing

     * will be trim whitespace from both ends (which can be overridden with

     * the setTrimmer method).

     * <p>

     * You must call a "reset" method to set the string which you want to parse.

     * @return a new tokenizer instance which parses Comma Separated Value strings

     */

    public static StrTokenizer getCSVInstance() {

        return getCSVClone();

    }



    /**

     * Gets a new tokenizer instance which parses Comma Separated Value strings

     * initializing it with the given input.  The default for CSV processing

     * will be trim whitespace from both ends (which can be overridden with

     * the setTrimmer method).

     *

     * @param input  the text to parse

     * @return a new tokenizer instance which parses Comma Separated Value strings

     */

    public static StrTokenizer getCSVInstance(String input) {

        StrTokenizer tok = getCSVClone();

        tok.reset(input);

        return tok;

    }



    /**

     * Gets a new tokenizer instance which parses Comma Separated Value strings

     * initializing it with the given input.  The default for CSV processing

     * will be trim whitespace from both ends (which can be overridden with

     * the setTrimmer method).

     *

     * @param input  the text to parse

     * @return a new tokenizer instance which parses Comma Separated Value strings

     */

    public static StrTokenizer getCSVInstance(char[] input) {

        StrTokenizer tok = getCSVClone();

        tok.reset(input);

        return tok;

    }



    /**

     * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.

     * 

     * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.

     */

    private static StrTokenizer getTSVClone() {

        return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone();

    }





    /**

     * Gets a new tokenizer instance which parses Tab Separated Value strings.

     * The default for CSV processing will be trim whitespace from both ends

     * (which can be overridden with the setTrimmer method).

     * <p>

     * You must call a "reset" method to set the string which you want to parse.

     * @return a new tokenizer instance which parses Tab Separated Value strings.

     */

    public static StrTokenizer getTSVInstance() {

        return getTSVClone();

    }



    /**

     * Gets a new tokenizer instance which parses Tab Separated Value strings.

     * The default for CSV processing will be trim whitespace from both ends

     * (which can be overridden with the setTrimmer method).

     * @param input  the string to parse

     * @return a new tokenizer instance which parses Tab Separated Value strings.

     */

    public static StrTokenizer getTSVInstance(String input) {

        StrTokenizer tok = getTSVClone();

        tok.reset(input);

        return tok;

    }



    /**

     * Gets a new tokenizer instance which parses Tab Separated Value strings.

     * The default for CSV processing will be trim whitespace from both ends

     * (which can be overridden with the setTrimmer method).

     * @param input  the string to parse

     * @return a new tokenizer instance which parses Tab Separated Value strings.

     */

    public static StrTokenizer getTSVInstance(char[] input) {

        StrTokenizer tok = getTSVClone();

        tok.reset(input);

        return tok;

    }



    //-----------------------------------------------------------------------

    /**

     * Constructs a tokenizer splitting on space, tab, newline and formfeed

     * as per StringTokenizer, but with no text to tokenize.

     * <p>

     * This constructor is normally used with {@link #reset(String)}.

     */

    public StrTokenizer() {

        super();

        this.chars = null;

    }



    /**

     * Constructs a tokenizer splitting on space, tab, newline and formfeed

     * as per StringTokenizer.

     *

     * @param input  the string which is to be parsed

     */

    public StrTokenizer(String input) {

        super();

        if (input != null) {

            chars = input.toCharArray();

        } else {

            chars = null;

        }

    }



    /**

     * Constructs a tokenizer splitting on the specified delimiter character.

     *

     * @param input  the string which is to be parsed

     * @param delim  the field delimiter character

     */

    public StrTokenizer(String input, char delim) {

        this(input);

        setDelimiterChar(delim);

    }



    /**

     * Constructs a tokenizer splitting on the specified delimiter string.

     *

     * @param input  the string which is to be parsed

     * @param delim  the field delimiter string

     */

    public StrTokenizer(String input, String delim) {

        this(input);

        setDelimiterString(delim);

    }



    /**

     * Constructs a tokenizer splitting using the specified delimiter matcher.

     *

     * @param input  the string which is to be parsed

     * @param delim  the field delimiter matcher

     */

    public StrTokenizer(String input, StrMatcher delim) {

        this(input);

        setDelimiterMatcher(delim);

    }



    /**

     * Constructs a tokenizer splitting on the specified delimiter character

     * and handling quotes using the specified quote character.

     *

     * @param input  the string which is to be parsed

     * @param delim  the field delimiter character

     * @param quote  the field quoted string character

     */

    public StrTokenizer(String input, char delim, char quote) {

        this(input, delim);

        setQuoteChar(quote);

    }



    /**

     * Constructs a tokenizer splitting using the specified delimiter matcher

     * and handling quotes using the specified quote matcher.

     *

     * @param input  the string which is to be parsed

     * @param delim  the field delimiter matcher

     * @param quote  the field quoted string matcher

     */

    public StrTokenizer(String input, StrMatcher delim, StrMatcher quote) {

        this(input, delim);

        setQuoteMatcher(quote);

    }



    /**

     * Constructs a tokenizer splitting on space, tab, newline and formfeed

     * as per StringTokenizer.

     *

     * @param input  the string which is to be parsed, not cloned

     */

    public StrTokenizer(char[] input) {

        super();

        this.chars = ArrayUtils.clone(input);

    }



    /**

     * Constructs a tokenizer splitting on the specified character.

     *

     * @param input  the string which is to be parsed, not cloned

     * @param delim the field delimiter character

     */

    public StrTokenizer(char[] input, char delim) {

        this(input);

        setDelimiterChar(delim);

    }



    /**

     * Constructs a tokenizer splitting on the specified string.

     *

     * @param input  the string which is to be parsed, not cloned

     * @param delim the field delimiter string

     */

    public StrTokenizer(char[] input, String delim) {

        this(input);

        setDelimiterString(delim);

    }



    /**

     * Constructs a tokenizer splitting using the specified delimiter matcher.

     *

     * @param input  the string which is to be parsed, not cloned

     * @param delim  the field delimiter matcher

     */

    public StrTokenizer(char[] input, StrMatcher delim) {

        this(input);

        setDelimiterMatcher(delim);

    }



    /**

     * Constructs a tokenizer splitting on the specified delimiter character

     * and handling quotes using the specified quote character.

     *

     * @param input  the string which is to be parsed, not cloned

     * @param delim  the field delimiter character

     * @param quote  the field quoted string character

     */

    public StrTokenizer(char[] input, char delim, char quote) {

        this(input, delim);

        setQuoteChar(quote);

    }



    /**

     * Constructs a tokenizer splitting using the specified delimiter matcher

     * and handling quotes using the specified quote matcher.

     *

     * @param input  the string which is to be parsed, not cloned

     * @param delim  the field delimiter character

     * @param quote  the field quoted string character

     */

    public StrTokenizer(char[] input, StrMatcher delim, StrMatcher quote) {

        this(input, delim);

        setQuoteMatcher(quote);

    }



    // API

    //-----------------------------------------------------------------------

    /**

     * Gets the number of tokens found in the String.

     *

     * @return the number of matched tokens

     */

    public int size() {

        checkTokenized();

        return tokens.length;

    }



    /**

     * Gets the next token from the String.

     * Equivalent to {@link #next()} except it returns null rather than

     * throwing {@link NoSuchElementException} when no tokens remain.

     *

     * @return the next sequential token, or null when no more tokens are found

     */

    public String nextToken() {

        if (hasNext()) {

            return tokens[tokenPos++];

        }

        return null;

    }



    /**

     * Gets the previous token from the String.

     *

     * @return the previous sequential token, or null when no more tokens are found

     */

    public String previousToken() {

        if (hasPrevious()) {

            return tokens[--tokenPos];

        }

        return null;

    }



    /**

     * Gets a copy of the full token list as an independent modifiable array.

     *

     * @return the tokens as a String array

     */

    public String[] getTokenArray() {

        checkTokenized();

        return tokens.clone();

    }



    /**

     * Gets a copy of the full token list as an independent modifiable list.

     *

     * @return the tokens as a String array

     */

    public List<String> getTokenList() {

        checkTokenized();

        List<String> list = new ArrayList<String>(tokens.length);

        for (String element : tokens) {

            list.add(element);

        }

        return list;

    }



    /**

     * Resets this tokenizer, forgetting all parsing and iteration already completed.

     * <p>

     * This method allows the same tokenizer to be reused for the same String.

     *

     * @return this, to enable chaining

     */

    public StrTokenizer reset() {

        tokenPos = 0;

        tokens = null;

        return this;

    }



    /**

     * Reset this tokenizer, giving it a new input string to parse.

     * In this manner you can re-use a tokenizer with the same settings

     * on multiple input lines.

     *

     * @param input  the new string to tokenize, null sets no text to parse

     * @return this, to enable chaining

     */

    public StrTokenizer reset(String input) {

        reset();

        if (input != null) {

            this.chars = input.toCharArray();

        } else {

            this.chars = null;

        }

        return this;

    }



    /**

     * Reset this tokenizer, giving it a new input string to parse.

     * In this manner you can re-use a tokenizer with the same settings

     * on multiple input lines.

     *

     * @param input  the new character array to tokenize, not cloned, null sets no text to parse

     * @return this, to enable chaining

     */

    public StrTokenizer reset(char[] input) {

        reset();

        this.chars = ArrayUtils.clone(input);

        return this;

    }



    // ListIterator

    //-----------------------------------------------------------------------

    /**

     * Checks whether there are any more tokens.

     *

     * @return true if there are more tokens

     */

    public boolean hasNext() {

        checkTokenized();

        return tokenPos < tokens.length;

    }



    /**

     * Gets the next token.

     *

     * @return the next String token

     * @throws NoSuchElementException if there are no more elements

     */

    public String next() {

        if (hasNext()) {

            return tokens[tokenPos++];

        }

        throw new NoSuchElementException();

    }



    /**

     * Gets the index of the next token to return.

     *

     * @return the next token index

     */

    public int nextIndex() {

        return tokenPos;

    }



    /**

     * Checks whether there are any previous tokens that can be iterated to.

     *

     * @return true if there are previous tokens

     */

    public boolean hasPrevious() {

        checkTokenized();

        return tokenPos > 0;

    }



    /**

     * Gets the token previous to the last returned token.

     *

     * @return the previous token

     */

    public String previous() {

        if (hasPrevious()) {

            return tokens[--tokenPos];

        }

        throw new NoSuchElementException();

    }



    /**

     * Gets the index of the previous token.

     *

     * @return the previous token index

     */

    public int previousIndex() {

        return tokenPos - 1;

    }



    /**

     * Unsupported ListIterator operation.

     *

     * @throws UnsupportedOperationException always

     */

    public void remove() {

        throw new UnsupportedOperationException("remove() is unsupported");

    }



    /**

     * Unsupported ListIterator operation.

     * @param obj this parameter ignored.

     * @throws UnsupportedOperationException always

     */

    public void set(String obj) {

        throw new UnsupportedOperationException("set() is unsupported");

    }



    /**

     * Unsupported ListIterator operation.

     * @param obj this parameter ignored.

     * @throws UnsupportedOperationException always

     */

    public void add(String obj) {

        throw new UnsupportedOperationException("add() is unsupported");

    }



    // Implementation

    //-----------------------------------------------------------------------

    /**

     * Checks if tokenization has been done, and if not then do it.

     */

    private void checkTokenized() {

        if (tokens == null) {

            if (chars == null) {

                // still call tokenize as subclass may do some work

                List<String> split = tokenize(null, 0, 0);

                tokens = split.toArray(new String[split.size()]);

            } else {

                List<String> split = tokenize(chars, 0, chars.length);

                tokens = split.toArray(new String[split.size()]);

            }

        }

    }



    /**

     * Internal method to performs the tokenization.

     * <p>

     * Most users of this class do not need to call this method. This method

     * will be called automatically by other (public) methods when required.

     * <p>

     * This method exists to allow subclasses to add code before or after the

     * tokenization. For example, a subclass could alter the character array,

     * offset or count to be parsed, or call the tokenizer multiple times on

     * multiple strings. It is also be possible to filter the results.

     * <p>

     * <code>StrTokenizer</code> will always pass a zero offset and a count

     * equal to the length of the array to this method, however a subclass

     * may pass other values, or even an entirely different array.

     * 

     * @param chars  the character array being tokenized, may be null

     * @param offset  the start position within the character array, must be valid

     * @param count  the number of characters to tokenize, must be valid

     * @return the modifiable list of String tokens, unmodifiable if null array or zero count

     */

    protected List<String> tokenize(char[] chars, int offset, int count) {

        if (chars == null || count == 0) {

            return Collections.emptyList();

        }

        StrBuilder buf = new StrBuilder();

        List<String> tokens = new ArrayList<String>();

        int pos = offset;

        

        // loop around the entire buffer

        while (pos >= 0 && pos < count) {

            // find next token

            pos = readNextToken(chars, pos, count, buf, tokens);

            

            // handle case where end of string is a delimiter

            if (pos >= count) {

                addToken(tokens, "");

            }

        }

        return tokens;

    }



    /**

     * Adds a token to a list, paying attention to the parameters we've set.

     *

     * @param list  the list to add to

     * @param tok  the token to add

     */

    private void addToken(List<String> list, String tok) {

        if (tok == null || tok.length() == 0) {

            if (isIgnoreEmptyTokens()) {

                return;

            }

            if (isEmptyTokenAsNull()) {

                tok = null;

            }

        }

        list.add(tok);

    }



    /**

     * Reads character by character through the String to get the next token.

     *

     * @param chars  the character array being tokenized

     * @param start  the first character of field

     * @param len  the length of the character array being tokenized

     * @param workArea  a temporary work area

     * @param tokens  the list of parsed tokens

     * @return the starting position of the next field (the character

     *  immediately after the delimiter), or -1 if end of string found

     */

    private int readNextToken(char[] chars, int start, int len, StrBuilder workArea, List<String> tokens) {

        // skip all leading whitespace, unless it is the

        // field delimiter or the quote character

        while (start < len) {

            int removeLen = Math.max(

                    getIgnoredMatcher().isMatch(chars, start, start, len),

                    getTrimmerMatcher().isMatch(chars, start, start, len));

            if (removeLen == 0 ||

                getDelimiterMatcher().isMatch(chars, start, start, len) > 0 ||

                getQuoteMatcher().isMatch(chars, start, start, len) > 0) {

                break;

            }

            start += removeLen;

        }

        

        // handle reaching end

        if (start >= len) {

            addToken(tokens, "");

            return -1;

        }

        

        // handle empty token

        int delimLen = getDelimiterMatcher().isMatch(chars, start, start, len);

        if (delimLen > 0) {

            addToken(tokens, "");

            return start + delimLen;

        }

        

        // handle found token

        int quoteLen = getQuoteMatcher().isMatch(chars, start, start, len);

        if (quoteLen > 0) {

            return readWithQuotes(chars, start + quoteLen, len, workArea, tokens, start, quoteLen);

        }

        return readWithQuotes(chars, start, len, workArea, tokens, 0, 0);

    }



    /**

     * Reads a possibly quoted string token.

     *

     * @param chars  the character array being tokenized

     * @param start  the first character of field

     * @param len  the length of the character array being tokenized

     * @param workArea  a temporary work area

     * @param tokens  the list of parsed tokens

     * @param quoteStart  the start position of the matched quote, 0 if no quoting

     * @param quoteLen  the length of the matched quote, 0 if no quoting

     * @return the starting position of the next field (the character

     *  immediately after the delimiter, or if end of string found,

     *  then the length of string

     */

    private int readWithQuotes(char[] chars, int start, int len, StrBuilder workArea, 

                               List<String> tokens, int quoteStart, int quoteLen) {

        // Loop until we've found the end of the quoted

        // string or the end of the input

        workArea.clear();

        int pos = start;

        boolean quoting = (quoteLen > 0);

        int trimStart = 0;

        

        while (pos < len) {

            // quoting mode can occur several times throughout a string

            // we must switch between quoting and non-quoting until we

            // encounter a non-quoted delimiter, or end of string

            if (quoting) {

                // In quoting mode

                

                // If we've found a quote character, see if it's

                // followed by a second quote.  If so, then we need

                // to actually put the quote character into the token

                // rather than end the token.

                if (isQuote(chars, pos, len, quoteStart, quoteLen)) {

                    if (isQuote(chars, pos + quoteLen, len, quoteStart, quoteLen)) {

                        // matched pair of quotes, thus an escaped quote

                        workArea.append(chars, pos, quoteLen);

                        pos += (quoteLen * 2);

                        trimStart = workArea.size();

                        continue;

                    }

                    

                    // end of quoting

                    quoting = false;

                    pos += quoteLen;

                    continue;

                }

                

                // copy regular character from inside quotes

                workArea.append(chars[pos++]);

                trimStart = workArea.size();

                

            } else {

                // Not in quoting mode

                

                // check for delimiter, and thus end of token

                int delimLen = getDelimiterMatcher().isMatch(chars, pos, start, len);

                if (delimLen > 0) {

                    // return condition when end of token found

                    addToken(tokens, workArea.substring(0, trimStart));

                    return pos + delimLen;

                }

                

                // check for quote, and thus back into quoting mode

                if (quoteLen > 0 && isQuote(chars, pos, len, quoteStart, quoteLen)) {

                    quoting = true;

                    pos += quoteLen;

                    continue;

                }

                

                // check for ignored (outside quotes), and ignore

                int ignoredLen = getIgnoredMatcher().isMatch(chars, pos, start, len);

                if (ignoredLen > 0) {

                    pos += ignoredLen;

                    continue;

                }

                

                // check for trimmed character

                // don't yet know if its at the end, so copy to workArea

                // use trimStart to keep track of trim at the end

                int trimmedLen = getTrimmerMatcher().isMatch(chars, pos, start, len);

                if (trimmedLen > 0) {

                    workArea.append(chars, pos, trimmedLen);

                    pos += trimmedLen;

                    continue;

                }

                

                // copy regular character from outside quotes

                workArea.append(chars[pos++]);

                trimStart = workArea.size();

            }

        }

        

        // return condition when end of string found

        addToken(tokens, workArea.substring(0, trimStart));

        return -1;

    }



    /**

     * Checks if the characters at the index specified match the quote

     * already matched in readNextToken().

     *

     * @param chars  the character array being tokenized

     * @param pos  the position to check for a quote

     * @param len  the length of the character array being tokenized

     * @param quoteStart  the start position of the matched quote, 0 if no quoting

     * @param quoteLen  the length of the matched quote, 0 if no quoting

     * @return true if a quote is matched

     */

    private boolean isQuote(char[] chars, int pos, int len, int quoteStart, int quoteLen) {

        for (int i = 0; i < quoteLen; i++) {

            if ((pos + i) >= len || chars[pos + i] != chars[quoteStart + i]) {

                return false;

            }

        }

        return true;

    }



    // Delimiter

    //-----------------------------------------------------------------------

    /**

     * Gets the field delimiter matcher.

     *

     * @return the delimiter matcher in use

     */

    public StrMatcher getDelimiterMatcher() {

        return this.delimMatcher;

    }



    /**

     * Sets the field delimiter matcher.

     * <p>

     * The delimitier is used to separate one token from another.

     *

     * @param delim  the delimiter matcher to use

     * @return this, to enable chaining

     */

    public StrTokenizer setDelimiterMatcher(StrMatcher delim) {

        if (delim == null) {

            this.delimMatcher = StrMatcher.noneMatcher();

        } else {

            this.delimMatcher = delim;

        }

        return this;

    }



    /**

     * Sets the field delimiter character.

     *

     * @param delim  the delimiter character to use

     * @return this, to enable chaining

     */

    public StrTokenizer setDelimiterChar(char delim) {

        return setDelimiterMatcher(StrMatcher.charMatcher(delim));

    }



    /**

     * Sets the field delimiter string.

     *

     * @param delim  the delimiter string to use

     * @return this, to enable chaining

     */

    public StrTokenizer setDelimiterString(String delim) {

        return setDelimiterMatcher(StrMatcher.stringMatcher(delim));

    }



    // Quote

    //-----------------------------------------------------------------------

    /**

     * Gets the quote matcher currently in use.

     * <p>

     * The quote character is used to wrap data between the tokens.

     * This enables delimiters to be entered as data.

     * The default value is '"' (double quote).

     *

     * @return the quote matcher in use

     */

    public StrMatcher getQuoteMatcher() {

        return quoteMatcher;

    }



    /**

     * Set the quote matcher to use.

     * <p>

     * The quote character is used to wrap data between the tokens.

     * This enables delimiters to be entered as data.

     *

     * @param quote  the quote matcher to use, null ignored

     * @return this, to enable chaining

     */

    public StrTokenizer setQuoteMatcher(StrMatcher quote) {

        if (quote != null) {

            this.quoteMatcher = quote;

        }

        return this;

    }



    /**

     * Sets the quote character to use.

     * <p>

     * The quote character is used to wrap data between the tokens.

     * This enables delimiters to be entered as data.

     *

     * @param quote  the quote character to use

     * @return this, to enable chaining

     */

    public StrTokenizer setQuoteChar(char quote) {

        return setQuoteMatcher(StrMatcher.charMatcher(quote));

    }



    // Ignored

    //-----------------------------------------------------------------------

    /**

     * Gets the ignored character matcher.

     * <p>

     * These characters are ignored when parsing the String, unless they are

     * within a quoted region.

     * The default value is not to ignore anything.

     *

     * @return the ignored matcher in use

     */

    public StrMatcher getIgnoredMatcher() {

        return ignoredMatcher;

    }



    /**

     * Set the matcher for characters to ignore.

     * <p>

     * These characters are ignored when parsing the String, unless they are

     * within a quoted region.

     *

     * @param ignored  the ignored matcher to use, null ignored

     * @return this, to enable chaining

     */

    public StrTokenizer setIgnoredMatcher(StrMatcher ignored) {

        if (ignored != null) {

            this.ignoredMatcher = ignored;

        }

        return this;

    }



    /**

     * Set the character to ignore.

     * <p>

     * This character is ignored when parsing the String, unless it is

     * within a quoted region.

     *

     * @param ignored  the ignored character to use

     * @return this, to enable chaining

     */

    public StrTokenizer setIgnoredChar(char ignored) {

        return setIgnoredMatcher(StrMatcher.charMatcher(ignored));

    }



    // Trimmer

    //-----------------------------------------------------------------------

    /**

     * Gets the trimmer character matcher.

     * <p>

     * These characters are trimmed off on each side of the delimiter

     * until the token or quote is found.

     * The default value is not to trim anything.

     *

     * @return the trimmer matcher in use

     */

    public StrMatcher getTrimmerMatcher() {

        return trimmerMatcher;

    }



    /**

     * Sets the matcher for characters to trim.

     * <p>

     * These characters are trimmed off on each side of the delimiter

     * until the token or quote is found.

     *

     * @param trimmer  the trimmer matcher to use, null ignored

     * @return this, to enable chaining

     */

    public StrTokenizer setTrimmerMatcher(StrMatcher trimmer) {

        if (trimmer != null) {

            this.trimmerMatcher = trimmer;

        }

        return this;

    }



    //-----------------------------------------------------------------------

    /**

     * Gets whether the tokenizer currently returns empty tokens as null.

     * The default for this property is false.

     *

     * @return true if empty tokens are returned as null

     */

    public boolean isEmptyTokenAsNull() {

        return this.emptyAsNull;

    }



    /**

     * Sets whether the tokenizer should return empty tokens as null.

     * The default for this property is false.

     *

     * @param emptyAsNull  whether empty tokens are returned as null

     * @return this, to enable chaining

     */

    public StrTokenizer setEmptyTokenAsNull(boolean emptyAsNull) {

        this.emptyAsNull = emptyAsNull;

        return this;

    }



    //-----------------------------------------------------------------------

    /**

     * Gets whether the tokenizer currently ignores empty tokens.

     * The default for this property is true.

     *

     * @return true if empty tokens are not returned

     */

    public boolean isIgnoreEmptyTokens() {

        return ignoreEmptyTokens;

    }



    /**

     * Sets whether the tokenizer should ignore and not return empty tokens.

     * The default for this property is true.

     *

     * @param ignoreEmptyTokens  whether empty tokens are not returned

     * @return this, to enable chaining

     */

    public StrTokenizer setIgnoreEmptyTokens(boolean ignoreEmptyTokens) {

        this.ignoreEmptyTokens = ignoreEmptyTokens;

        return this;

    }



    //-----------------------------------------------------------------------

    /**

     * Gets the String content that the tokenizer is parsing.

     *

     * @return the string content being parsed

     */

    public String getContent() {

        if (chars == null) {

            return null;

        }

        return new String(chars);

    }



    //-----------------------------------------------------------------------

    /**

     * Creates a new instance of this Tokenizer. The new instance is reset so

     * that it will be at the start of the token list.

     * If a {@link CloneNotSupportedException} is caught, return <code>null</code>.

     * 

     * @return a new instance of this Tokenizer which has been reset.

     */

    @Override

    public Object clone() {

        try {

            return cloneReset();

        } catch (CloneNotSupportedException ex) {

            return null;

        }

    }



    /**

     * Creates a new instance of this Tokenizer. The new instance is reset so that

     * it will be at the start of the token list.

     * 

     * @return a new instance of this Tokenizer which has been reset.

     * @throws CloneNotSupportedException if there is a problem cloning

     */

    Object cloneReset() throws CloneNotSupportedException {

        // this method exists to enable 100% test coverage

        StrTokenizer cloned = (StrTokenizer) super.clone();

        if (cloned.chars != null) {

            cloned.chars = cloned.chars.clone();

        }

        cloned.reset();

        return cloned;

    }



    //-----------------------------------------------------------------------

    /**

     * Gets the String content that the tokenizer is parsing.

     *

     * @return the string content being parsed

     */

    @Override

    public String toString() {

        if (tokens == null) {

            return "StrTokenizer[not tokenized yet]";

        }

        return "StrTokenizer" + getTokenList();

    }



}
Open Source Repository
Home	/commons-lang/commons-lang3-3.0.1 \| Repository Home
Open Source Repository