// file: Tokenizer.java // author: Robert Keller // purpose: Input Tokenizer for Poly package package Poly; import java.io.*; import java.lang.*; /** * Tokenizer provides a tokenizer of the class Poly. *
* In addition to tokenizing for values of specific types, an entire * Poly can be read by one method call, assuming the input is in the * for of an S expression or an R expression. *
* In an S expression, the elements of a List are shown in parentheses * with a space between each element. (The elements can themselves be * Lists.) *
* In an R expression, the elements are shown in square brackets, with * an optional comma separator between them. **/ public class Tokenizer extends StreamTokenizer { public final static EOF eof = new EOF(); public long lval; // the long value, if used public final static int TT_LONG = -4; // indicates a long token value /** construct tokenizer from input stream **/ public Tokenizer(InputStream I) { super(I); // initialize resetSyntax(); // These call parent methods wordChars(33, 255); whitespaceChars(0, ' '); ordinaryChar( '(' ); // mark as special ordinaryChar( ')' ); ordinaryChar( '[' ); ordinaryChar( ',' ); ordinaryChar( ']' ); } /** * exceptionHandler can be over-ridden to handle IO exceptions **/ public void exceptionHandler( Exception e ) { System.err.print("Poly.Tokenizer caught "); if( e instanceof IOException ) { System.err.println("IOException " + e); } else if( e instanceof eofException ) { System.err.println("eofException " + e); } else if( e instanceof NumberFormatException ) { System.err.println("NumberFormatException " + e); } else { System.err.println("exception " + e); } } /** * get token, indicating TT_LONG, TT_NUMBER, TT_WORD, TT_EOL, or TT_EOF **/ public int nextToken() { int token; try { token = super.nextToken(); } catch( IOException e ) { exceptionHandler(e); return TT_EOF; } switch( token ) // get token using parent method { case TT_EOF: return ttype; case TT_WORD: // analyze string token if( sval.equals("-") ) { // special case: treat as word; otherwise would be treated as long 0 break; } try // try token as a long (TT_LONG) { Long as_long = new Long(sval); lval = as_long.longValue(); nval = as_long.doubleValue(); ttype = TT_LONG; } catch( NumberFormatException e ) { // exception converting to long try // try token as double (TT_NUMBER) { Double as_double = new Double(sval); nval = as_double.doubleValue(); ttype = TT_NUMBER; } catch( NumberFormatException f ) // exception converting to double { // default to word (TT_WORD) // ttype = TT_WORD; // already set } } break; default: // something else, pass it on break; } return ttype; } /** * nextFullToken is like nextToken but ignores ends of line **/ public int nextFullToken() { boolean needToken = true; while( needToken ) switch( nextToken() ) { case TT_EOF: return TT_EOF; case TT_EOL: continue; default: // something else, pass it on needToken = false; break; } return ttype; } /** * get next token and try to interpret as double **/ public double nextDouble() { try { switch( nextFullToken() ) { case TT_EOF: throw new eofException(); case TT_NUMBER: case TT_LONG: return nval; default: throw new NumberFormatException("Double requested"); } } catch( Exception e ) { exceptionHandler(e); } return 0; } /** * get next token and try to interpret as long **/ public long nextLong() { try { switch( nextFullToken() ) { case TT_EOF: throw new eofException(); case TT_LONG: return lval; default: throw new NumberFormatException("Long requested"); } } catch( Exception e ) { exceptionHandler(e); } return 0; } /** * get next token and interpret as string **/ public String nextString() { try { switch( nextFullToken() ) { case TT_EOF: throw new eofException(); default: return sval; } } catch( Exception e ) { exceptionHandler(e); } return ""; } /** * get next token and try to interpret as an atom **/ public Object nextAtom() { try { switch( nextFullToken() ) { case TT_EOF: return eof; case TT_LONG: return new Long(lval); case TT_NUMBER: return new Double(nval); default: return new String(sval); } } catch( Exception e ) { exceptionHandler(e); } return eof; } /** * get next S expression from input stream * returns an object of class EOF on end-of-file **/ public Object nextSexp() { try { int c = nextFullToken(); switch( c ) { case TT_EOF: return eof; case '(': { return getRestSexp(); } case ')': { return List.nil; } case '[': return new Character('['); case ']': return new Character(']'); case ',': return new Character(','); case TT_LONG: return new Long(lval); case TT_NUMBER: return new Double(nval); default: return new String(sval); } } catch( Exception e ) { exceptionHandler(e); } return eof; } List getRestSexp() { try { int c = nextFullToken(); switch( c ) { case TT_EOF: throw new eofException(); case ')': return List.nil; case '(': { return List.cons(getRestSexp(), getRestSexp()); } case '[': return List.cons(new Character('['), getRestSexp()); case ']': return List.cons(new Character(']'), getRestSexp()); case ',': return List.cons(new Character(','), getRestSexp()); case TT_LONG: return List.cons(new Long(lval), getRestSexp()); case TT_NUMBER: return List.cons(new Double(nval), getRestSexp()); default: return List.cons(new String(sval), getRestSexp()); } } catch( Exception e ) { exceptionHandler(e); } return List.nil; } /** * get next R expression from input stream **/ public Object nextRexp() { try { int c = nextFullToken(); switch( c ) { case TT_EOF: return eof; case '[': { return getRestRexp(); } case ']': { return List.nil; } case ',': { return List.nil; } case '(': return new Character('('); case ')': return new Character(')'); case TT_LONG: return new Long(lval); case TT_NUMBER: return new Double(nval); default: return new String(sval); } } catch( Exception e ) { exceptionHandler(e); } return eof; } List getRestRexp() { try { int c = nextFullToken(); switch( c ) { case TT_EOF: throw new eofException(); case ']': return List.nil; case '[': { return List.cons(getRestRexp(), getRestRexp()); } case ',': return getRestRexp(); // ignore ',' case '(': return List.cons(new Character('('), getRestRexp()); case ')': return List.cons(new Character(')'), getRestRexp()); case TT_LONG: return List.cons(new Long(lval), getRestRexp()); case TT_NUMBER: return List.cons(new Double(nval), getRestRexp()); default: return List.cons(new String(sval), getRestRexp()); } } catch( Exception e ) { return List.nil; } } /** Test method: loops through input, printing out each type of token. Terminates on end-of-file. Note: This does not test S- or R-expressions. There is another file for that: testList.java. **/ static public void main(String[] args) { // Create a tokenizer and set parameters Tokenizer input = new Tokenizer(System.in); input.eolIsSignificant(true); System.out.println("ready"); outer: for( ;; ) { switch( input.nextToken() ) { case input.TT_EOF: System.out.println("EOF"); break outer; // end-of-file; quit case input.TT_EOL: System.out.println("EOL"); break; case input.TT_WORD: System.out.print("word: "); System.out.println(input.sval); break; case input.TT_NUMBER: System.out.print("double: "); System.out.println(input.nval); System.out.print("as a string: "); System.out.println(input.sval); break; case input.TT_LONG: System.out.print("long: "); System.out.println(input.lval); System.out.print("as a double: "); System.out.println(input.nval); System.out.print("as a string: "); System.out.println(input.sval); break; default: System.out.print("other: "); System.out.println(input.sval); break; } } } }