1 package org.lsst.ccs.command;
2
3 import java.io.Serializable;
4 import java.util.ArrayList;
5 import java.util.List;
6
7
8
9
10
11 class StringTokenizer {
12
13
14
15
16
17
18 static class Token implements Serializable {
19
20 private int location;
21 private String string;
22
23 Token(int location, String string) {
24 super();
25 this.location = location;
26 this.string = string;
27 }
28
29 int getLocation() {
30 return location;
31 }
32
33 String getString() {
34 return string;
35 }
36 }
37
38 private enum State {
39
40 WHITESPACE, WORD, STRINGDQ, STRINGSQ, COMMENT
41 };
42
43 public static List<Token> tokenize(final String input) {
44 List<Token> result = new ArrayList<>();
45 if (input == null) {
46 return result;
47 }
48
49 State state = State.WHITESPACE;
50 char ch;
51 int tokenIndex = -1;
52 StringBuilder token = new StringBuilder("");
53
54 for (int i = 0; i < input.length(); i++) {
55 ch = input.charAt(i);
56 switch (state) {
57 case WHITESPACE:
58 if (Character.isWhitespace(ch)) {
59
60 } else if (Character.isLetterOrDigit(ch) || ch == '_') {
61 state = State.WORD;
62 tokenIndex = i;
63 token.append(ch);
64 } else if (ch == '"') {
65 state = State.STRINGDQ;
66 tokenIndex = i;
67 } else if (ch == '\'') {
68 state = State.STRINGSQ;
69 tokenIndex = i;
70 } else if (ch == '#') {
71 state = State.COMMENT;
72 } else {
73 state = State.WORD;
74 tokenIndex = i;
75 token.append(ch);
76 }
77 break;
78
79 case WORD:
80 if (Character.isWhitespace(ch)) {
81
82 result.add(new Token(tokenIndex, token.toString()));
83 token.setLength(0);
84 state = State.WHITESPACE;
85 } else if (ch == '#') {
86
87 result.add(new Token(tokenIndex, token.toString()));
88 token.setLength(0);
89 state = State.COMMENT;
90 } else {
91
92 token.append(ch);
93 }
94 break;
95
96 case STRINGDQ:
97 if (ch == '"') {
98 if (i < input.length() - 1 && input.charAt(i + 1) == '"') {
99 token.append('"');
100 i++;
101
102 } else {
103 state = State.WORD;
104 }
105 } else {
106 token.append(ch);
107 }
108 break;
109
110 case STRINGSQ:
111 if (ch == '\'') {
112 if (i < input.length() - 1 && input.charAt(i + 1) == '\'') {
113 token.append('\'');
114 i++;
115
116 } else {
117 state = State.WORD;
118 }
119 } else {
120 token.append(ch);
121 }
122 break;
123
124 case COMMENT:
125
126 break;
127
128 default:
129 assert false : "Unknown state in StringTokenizer state machine";
130 break;
131 }
132 }
133
134 if (state == State.WORD || state == State.STRINGDQ || state == State.STRINGSQ) {
135 result.add(new Token(tokenIndex, token.toString()));
136 }
137
138 return result;
139 }
140 }