View Javadoc

1   package org.lsst.ccs.command;
2   
3   import java.io.Serializable;
4   import java.util.ArrayList;
5   import java.util.List;
6   
7   /**
8    * Converts an input command into a series of tokens.
9    * @author tonyj
10   */
11  class StringTokenizer {
12  
13      /**
14       * Do we really need this, is was used by cliche to by able to provide
15       * feedback on where in the command line an illegal token was found, but
16       * we do not currently use this functionality.
17       */
18      static class Token implements Serializable {
19  
20          private int location;
21          private String string;
22  
23          Token(int location, String string) {
24              super();
25              this.location = location;
26              this.string = string;
27          }
28  
29          int getLocation() {
30              return location;
31          }
32  
33          String getString() {
34              return string;
35          }
36      }
37  
38      private enum State {
39  
40          WHITESPACE, WORD, STRINGDQ, STRINGSQ, COMMENT
41      };
42  
43      public static List<Token> tokenize(final String input) {
44          List<Token> result = new ArrayList<>();
45          if (input == null) {
46              return result;
47          }
48  
49          State state = State.WHITESPACE;
50          char ch; // character in hand
51          int tokenIndex = -1;
52          StringBuilder token = new StringBuilder("");
53  
54          for (int i = 0; i < input.length(); i++) {
55              ch = input.charAt(i);
56              switch (state) {
57                  case WHITESPACE:
58                      if (Character.isWhitespace(ch)) {
59                          // keep state
60                      } else if (Character.isLetterOrDigit(ch) || ch == '_') {
61                          state = State.WORD;
62                          tokenIndex = i;
63                          token.append(ch);
64                      } else if (ch == '"') {
65                          state = State.STRINGDQ;
66                          tokenIndex = i;
67                      } else if (ch == '\'') {
68                          state = State.STRINGSQ;
69                          tokenIndex = i;
70                      } else if (ch == '#') {
71                          state = State.COMMENT;
72                      } else {
73                          state = State.WORD;
74                          tokenIndex = i;
75                          token.append(ch);
76                      }
77                      break;
78  
79                  case WORD:
80                      if (Character.isWhitespace(ch)) {
81                          // submit token
82                          result.add(new Token(tokenIndex, token.toString()));
83                          token.setLength(0);
84                          state = State.WHITESPACE;
85                      } else if (ch == '#') {
86                          // submit token
87                          result.add(new Token(tokenIndex, token.toString()));
88                          token.setLength(0);
89                          state = State.COMMENT;
90                      } else {
91                          // for now we do allow special chars in words
92                          token.append(ch);
93                      }
94                      break;
95  
96                  case STRINGDQ:
97                      if (ch == '"') {
98                          if (i < input.length() - 1 && input.charAt(i + 1) == '"') {
99                              token.append('"');
100                             i++;
101                             // and keep state
102                         } else {
103                             state = State.WORD;
104                         }
105                     } else {
106                         token.append(ch);
107                     }
108                     break;
109 
110                 case STRINGSQ:
111                     if (ch == '\'') {
112                         if (i < input.length() - 1 && input.charAt(i + 1) == '\'') {
113                             token.append('\'');
114                             i++;
115                             // and keep state
116                         } else {
117                             state = State.WORD;
118                         }
119                     } else {
120                         token.append(ch);
121                     }
122                     break;
123 
124                 case COMMENT:
125                     // eat ch
126                     break;
127 
128                 default:
129                     assert false : "Unknown state in StringTokenizer state machine";
130                     break;
131             }
132         }
133 
134         if (state == State.WORD || state == State.STRINGDQ || state == State.STRINGSQ) {
135             result.add(new Token(tokenIndex, token.toString()));
136         }
137 
138         return result;
139     }
140 }