package org.lsst.ccs.command;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Converts an input command into a series of tokens.
 * @author tonyj
 */
public class StringTokenizer {

    /**
     * Do we really need this, is was used by cliche to by able to provide
     * feedback on where in the command line an illegal token was found, but
     * we do not currently use this functionality.
     */
    public static class Token implements Serializable {

        private static final long serialVersionUID = 999886576830939492L;
        private final int location;
        private final String string;
        private final State state;

        Token(int location, String string) {
            this(location,string, State.WORD);
        }
        Token(int location, String string, State state) {
            super();
            this.location = location;
            this.string = string;
            this.state = state;
        }

        public int getLocation() {
            return location;
        }

        public String getString() {
            return string;
        }
        
        public boolean isOption() {
            return state == State.OPTION || state == State.SHORT_OPTION;
        }
        
        public boolean isShortOption() {
            return state == State.SHORT_OPTION;
        }
    }

    private enum State {
        WHITESPACE, WORD, STRINGDQ, STRINGSQ, COMMENT, SQUARE_BRAKET, OPTION, SHORT_OPTION
    };

    public static List<Token> tokenize(final String input) {
        List<Token> result = new ArrayList<>();
        if (input == null) {
            return result;
        }

        State state = State.WHITESPACE;
        char ch; // character in hand
        int tokenIndex = -1;
        StringBuilder token = new StringBuilder("");
        int nBrackets = 0;

        for (int i = 0; i < input.length(); i++) {
            ch = input.charAt(i);
            boolean isLastCharacter = i == input.length() - 1;
            switch (state) {
                case WHITESPACE:
                    if (Character.isWhitespace(ch)) {
                        // keep state
                    } else if (Character.isLetterOrDigit(ch) || ch == '_') {
                        state = State.WORD;
                        tokenIndex = i;
                        token.append(ch);
                    } else if (ch == '"') {
                        state = State.STRINGDQ;
                        tokenIndex = i;
                    } else if (ch == '\'') {
                        state = State.STRINGSQ;
                        tokenIndex = i;
                    } else if (ch == '#') {
                        state = State.COMMENT;
                    } else if (ch == '[') {
                        state = State.SQUARE_BRAKET;
                        nBrackets++;
                        token.append(ch);
                        tokenIndex = i;
                    } else {
                        state = State.WORD;
                        tokenIndex = i;
                        token.append(ch);
                    } 
                    break;

                case WORD:
                    if (Character.isWhitespace(ch) || ch == '#' || isLastCharacter) {
                        if ( isLastCharacter && !Character.isWhitespace(ch) && ch != '#') {
                            token.append(ch);
                        }
                        String currentTokenValue = token.toString();
                        if (isShortOption(currentTokenValue)) {
                            if (currentTokenValue.equals("-")) {
                                //Get rid of the leading dash.
                                result.add(new Token(tokenIndex + 1, "", State.SHORT_OPTION));
                            } else {
                                //Get rid of the leading dash.
                                String shortOptions = currentTokenValue.substring(1);
                                for (int jj = 0; jj < shortOptions.length(); jj++) {
                                    result.add(new Token(tokenIndex + 1 + jj, Character.toString(shortOptions.charAt(jj)), State.SHORT_OPTION));
                                }
                            }
                            token.setLength(0);
                        } else if (isOption(currentTokenValue)) {
                            //Get rid of the leading double dashes
                            String optionValue = "";
                            if (!currentTokenValue.equals("--")) {
                                optionValue = currentTokenValue.substring(2);
                            }
                            result.add(new Token(tokenIndex + 2, optionValue, State.OPTION));
                            token.setLength(0);
                        } else {
                            // submit token
                            result.add(new Token(tokenIndex, token.toString(), State.WORD));
                            token.setLength(0);
                        }
                        state = Character.isWhitespace(ch) ? State.WHITESPACE : State.COMMENT;
                    } else {
                        // for now we do allow special chars in words
                        token.append(ch);
                    }
                    break;

                case STRINGDQ:
                    if (ch == '"') {
                        if (i < input.length() - 1 && input.charAt(i + 1) == '"') {
                            token.append('"');
                            i++;
                            // and keep state
                        } else {
                            state = State.WORD;
                        }
                    } else {
                        token.append(ch);
                    }
                    break;

                case STRINGSQ:
                    if (ch == '\'') {
                        if (i < input.length() - 1 && input.charAt(i + 1) == '\'') {
                            token.append('\'');
                            i++;
                            // and keep state
                        } else {
                            state = State.WORD;
                        }
                    } else {
                        token.append(ch);
                    }
                    break;

                case SQUARE_BRAKET:
                    if (ch == '[') {
                        nBrackets++;
                    } else if (ch == ']') {
                        nBrackets--;
                    }
                    token.append(ch);
                    if ( nBrackets == 0 ) {
                        result.add(new Token(tokenIndex, token.toString()));
                        token.setLength(0);
                        state = State.WHITESPACE;
                    }
                    break;

                case COMMENT:
                    // eat ch
                    break;

                default:
                    assert false : "Unknown state in StringTokenizer state machine";
                    break;
            }
        }

        String currentTokenValue = token.toString();
        if (currentTokenValue.equals("-")) {
            state = State.SHORT_OPTION;
            //Get rid of the leading dash.
            result.add(new Token(tokenIndex + 1, "", state));
        }
        if (!currentTokenValue.isEmpty() && state == State.WORD || state == State.STRINGDQ || state == State.STRINGSQ) {
            result.add(new Token(tokenIndex, token.toString(), state));
        }

        return result;
    }
    
    private final static Pattern shortOptionPattern = Pattern.compile("^-[a-zA-Z]+.*");
    private final static Pattern optionPattern = Pattern.compile("^--[a-zA-Z]+.*");
    
    public static boolean isShortOption(String input) {
        return input.equals("-") || shortOptionPattern.matcher(input).matches();        
    }
    public static boolean isOption(String input) {
        return input.equals("--") || optionPattern.matcher(input).matches();        
    }
    
}
