// file: Tokenizer.java // author: Robert Keller // purpose: Input Tokenizer for polya package package polya; import java.io.*; import java.lang.*; /** * Tokenizer provides a tokenizer of the package polya. *
* In addition to tokenizing for values of specific types, an entire * Poly can be read by one method call, assuming the input is in the * for of an S expression or an R expression. *
* In an S expression, the elements of a Polylist are shown in parentheses * with a space between each element. (The elements can themselves be * Polylists.) *
* In an R expression, the elements are shown in square brackets, with * an optional comma separator between them. */ public class Tokenizer extends StreamTokenizer { public final static EOF eof = new EOF(); public long lval; // the long value, if used public final static int TT_LONG = -4; // indicates a long token value /** construct tokenizer from input stream */ public Tokenizer(InputStream I) { this(new InputStreamReader(I)); } /** construct tokenizer from Reader */ public Tokenizer(Reader R) { super(new BufferedReader(R)); // initialize resetSyntax(); // These call parent methods slashSlashComments(true); slashStarComments(true); eolIsSignificant(false); ordinaryChar('['); ordinaryChar(']'); ordinaryChar(','); } /** * exceptionHandler can be over-ridden to handle IO exceptions */ public void exceptionHandler( Exception e ) { System.err.print("polya.Tokenizer caught "); if( e instanceof IOException ) { System.err.println("IOException " + e); } else if( e instanceof eofException ) { System.err.println("eofException " + e); } else if( e instanceof NumberFormatException ) { System.err.println("NumberFormatException " + e); } else { System.err.println("exception " + e); } } boolean isWhiteSpace(int c) { switch( c ) { case ' ': case '\t': case '\n': case '\r': case '\f': return true; default: return false; } } // hand-implemented get/putBack system, since the one in StreamTokenizer // doesn't work int buff; boolean valid = false; void get() { if( valid ) { valid = false; ttype = buff; return; } try { super.nextToken(); } catch( IOException e ) { exceptionHandler(e); ttype = TT_EOF; } } void putBack(int c) { valid = true; buff = c; } public void getSkippingWhiteSpace() { get(); while( isWhiteSpace(ttype) ) { get(); } } /** * get token, indicating TT_LONG, TT_NUMBER, TT_WORD, or TT_EOF */ public int nextToken() { getSkippingWhiteSpace(); switch( ttype ) // parens by themselves are tokens { case '[': case ']': case ',': case '(': case ')': case TT_EOF: return ttype; } StringBuffer buff = new StringBuffer(); buff.append((char)ttype); get(); out: while( ttype != TT_EOF && !isWhiteSpace(ttype) ) { switch( ttype ) { case '[': case ']': case ',': case '(': case ')': break out; default: buff.append((char)ttype); } get(); } putBack(ttype); sval = buff.toString(); try // try token as a long (TT_LONG) { Long as_long = new Long(sval); lval = as_long.longValue(); ttype = TT_LONG; return ttype; } catch( NumberFormatException e ) { // exception converting to long try // try token as double (TT_NUMBER) { Double as_double = new Double(sval); nval = as_double.doubleValue(); ttype = TT_NUMBER; return ttype; } catch( NumberFormatException f ) // exception converting to double { // default to word (TT_WORD) ttype = TT_WORD; return ttype; } } } /** * get next S expression from input stream * returns an object of class EOF on end-of-file */ public Object nextSexp() { try { int c = nextToken(); switch( c ) { case TT_EOF: return eof; case ')': { return Polylist.nil; } case '(': { return getRestSexp(); } case '[': { return "["; } case ']': { return "]"; } case ',': { return ","; } case TT_LONG: return new Long(lval); case TT_NUMBER: return new Double(nval); default: return new String(sval); } } catch( Exception e ) { exceptionHandler(e); } return eof; } Polylist getRestSexp() { try { int c = nextToken(); switch( c ) { case TT_EOF: return Polylist.nil; case ')': return Polylist.nil; case '(': { return Polylist.cons(getRestSexp(), getRestSexp()); } case '[': { return Polylist.cons("[", getRestSexp()); } case ']': { return Polylist.cons("]", getRestSexp()); } case TT_LONG: return Polylist.cons(new Long(lval), getRestSexp()); case TT_NUMBER: return Polylist.cons(new Double(nval), getRestSexp()); default: return Polylist.cons(new String(sval), getRestSexp()); } } catch( Exception e ) { exceptionHandler(e); } return Polylist.nil; } /** get next R expression from input stream */ public Object nextRexp() { int c = 0; try { c = nextToken(); } catch( Exception e ) { exceptionHandler(e); } switch( c ) { case '[': { return getRestRexp(); } case TT_EOF: // shouldn't happen { return eof; } case ']': { return Polylist.nil; } case ',': { return Polylist.nil; } case '(': return new String("("); case ')': return new String(")"); case TT_LONG: return new Long(lval); case TT_NUMBER: return new Double(nval); default: return sval; } } Polylist getRestRexp() { int c = 0; try { c = nextToken(); } catch( Exception e ) { exceptionHandler(e); } switch( c ) { case TT_EOF: return Polylist.nil; // shouldn't happen case ']': return Polylist.nil; case '[': { return Polylist.cons(getRestRexp(), getRestRexp()); } case ',': return getRestRexp(); case '(': return Polylist.cons(new String("("), getRestRexp()); case ')': return Polylist.cons(new String(")"), getRestRexp()); case TT_LONG: return Polylist.cons(new Long(lval), getRestRexp()); case TT_NUMBER: return Polylist.cons(new Double(nval), getRestRexp()); default: return Polylist.cons(sval, getRestRexp()); } } /** * Test method: * loops through input, printing out each type of token. * Terminates on end-of-filek, with test of S expressions, then R * expressions. */ static public void main(String[] args) { // Create a tokenizer and set parameters Tokenizer input = new Tokenizer(System.in); input.eolIsSignificant(true); System.out.println("ready"); outer: for( ;; ) { switch( input.nextToken() ) { case Tokenizer.TT_EOF: System.out.println("EOF"); break outer; // end-of-file; quit case Tokenizer.TT_EOL: System.out.println("EOL"); break; case Tokenizer.TT_WORD: System.out.print("word: "); System.out.println(input.sval); break; case Tokenizer.TT_NUMBER: System.out.print("double: "); System.out.println(input.nval); System.out.print("as a string: "); System.out.println(input.sval); break; case Tokenizer.TT_LONG: System.out.print("long: "); System.out.println(input.lval); System.out.print("as a double: "); System.out.println(input.nval); System.out.print("as a string: "); System.out.println(input.sval); break; default: System.out.print("other: "); System.out.println(input.sval); break; } } } }