/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                         Copyright (c) 1996                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                     Author :  Alan W Black                            */
/*                     Date   :  April 1996                              */
/*-----------------------------------------------------------------------*/
/*                    Token/Tokenizer class                              */
/*                                                                       */
/*=======================================================================*/

#ifndef __EST_TOKEN_H__
#define __EST_TOKEN_H__

#include <stdio.h>
#include "EST_String.h"
#include "EST_common.h"

// I can never really remember this so we'll define it here
extern const EST_String EST_Token_Default_WhiteSpaceChars;
extern const EST_String EST_Token_Default_SingleCharSymbols;
extern const EST_String EST_Token_Default_PunctuationSymbols;
extern const EST_String EST_Token_Default_PrePunctuationSymbols;

// A EST_Token is like a string but contains a word, puntucation etc
// it also tells you what its preceeding whitespaces was like 

class EST_Token {
  private:
    EST_String space;
    EST_String prepunc;
    EST_String pname;
    EST_String punc;
    int linenum;
    int linepos;
    int p_filepos;
  public:
    EST_Token() {init();}
    EST_Token(const EST_String p) {init(); pname = p; }
    void init() {linenum=linepos=p_filepos=0;}
    void set_token(const EST_String &p) { pname = p; }
    void set_whitespace(const EST_String &p) { space = p; }
    void set_punctuation(const EST_String &p) { punc = p; }
    void set_prepunctuation(const EST_String &p) { prepunc = p; }
    void set_token(const char *p) { pname = p; }
    void set_whitespace(const char *p) { space = p; }
    void set_punctuation(const char *p) { punc = p; }
    void set_prepunctuation(const char *p) { prepunc = p; }
    void set_row(int r) { linenum = r; }
    void set_col(int c) { linepos = c; }
    void set_filepos(int c) { p_filepos = c; }
    const EST_String &string() const { return pname; }
    EST_String lstring() { return downcase(pname); }
    EST_String ustring() { return upcase(pname); }
    operator EST_String() { return string(); }
    int row(void) const { return linenum; }
    int col(void) const { return linepos; }
    int filepos(void) const { return p_filepos; }
    const EST_String &whitespace() { return space; }
    const EST_String &punctuation() { return punc; }
    const EST_String &prepunctuation() { return prepunc; }
    
    friend ostream& operator<<(ostream& s, const EST_Token &p);

    EST_Token & operator =(const EST_Token &a);
    EST_Token & operator =(const EST_String &a);
    int operator ==(const EST_String &a) { return (pname == a); }
    int operator !=(const EST_String &a) { return (pname != a); }
    int operator ==(const char *a) { return (strcmp(pname,a)==0); }
    int operator !=(const char *a) { return (strcmp(pname,a)!=0); }
};

enum EST_tokenstream_type {tst_none, tst_file, tst_pipe, tst_string, tst_istream}; 

class EST_TokenStream{
 private:
    EST_tokenstream_type type;
    EST_String WhiteSpaceChars;
    EST_String SingleCharSymbols;
    EST_String PunctuationSymbols;
    EST_String PrePunctuationSymbols;
    FILE *fp;
    istream *is;
    int fd;
    char *buffer;
    int buffer_length;
    int pos;
    int linepos;
    int p_filepos;
    int getch(void);
    EST_TokenStream &getch(char &C);
    int peeked_charp;
    int peeked_char;       // ungot character 
    int peekch(void);
    int peeked_tokp;
    int eof_flag;
    int quotes;
    char quote;
    char escape;
    EST_Token current_tok;
    void default_values(void);
    /* local buffers to save reallocating */
    int tok_wspacelen;
    char *tok_wspace;
    int tok_stufflen;
    char *tok_stuff;
    int tok_prepuncslen;
    char *tok_prepuncs;
    bool close_at_end;
    // Copying open file descriptors is not something I want to do
    // and I'm not convinced you want to either, so the following
    // causes a compiler error if you try to do it. Pass by reference
    // instead
    EST_TokenStream(EST_TokenStream &s);
  public:
    EST_TokenStream();
    ~EST_TokenStream();
    int open(const EST_String &filename);
    int open(FILE *ofp, bool close_when_finished);
    int open(istream &newis);
    int open_string(const EST_String &newbuffer);
    void set_WhiteSpaceChars(const EST_String &ws) { WhiteSpaceChars = ws;}
    void set_SingleCharSymbols(const EST_String &sc) { SingleCharSymbols = sc;}
    void set_PunctuationSymbols(const EST_String &ps) 
         { PunctuationSymbols = ps;}
    void set_PrePunctuationSymbols(const EST_String &ps) 
         { PrePunctuationSymbols = ps;}
    void set_quotes(char q, char e) { quotes = TRUE; quote = q; escape = e;}
    int quoted_mode(void) { return quotes; }
    int linenum(void) const {return linepos;}
    EST_TokenStream &get(EST_Token &t);
    EST_Token &get();
    EST_Token get_upto(const EST_String &s);
    EST_Token get_upto_eoln(void);
    EST_Token &peek(void)
    {	if (!peeked_tokp) get();
	peeked_tokp = TRUE; return current_tok; }
    int eof();
    int eoln();
    int filepos(void) const { return p_filepos; }
    void close(void);
    int restart(void);

    EST_TokenStream & operator >>(EST_Token &p);
    EST_TokenStream & operator >>(EST_String &p);
    friend ostream& operator<<(ostream& s, EST_TokenStream &p);

//    EST_TokenStream & operator =(const EST_TokenStream &a);
};

EST_String quote_string(const EST_String &s,
			const EST_String &quote = "\"", 
			const EST_String &escape = "\\", 
			int force=0);

#endif // __EST_TOKEN_H__
