/*
    
                                                                       
                   Project:  HTML 3.0 Browser                            
                    Branch:  Parser                                      
                      File:  HTML_LEX.C                                  
               Description:  Lexical analisys routine                    
                    Author:  D.Bergelson                                 
                      Date:  06.03.96                                    
                     Ready:  [?]                                         
                                                                       
       */

#include <io.h>
#include <ctype.h>
#include <string.h>
#include <alloc.h>

#include "parser\htm_cnst.h"
#include "parser\htm_glob.h"
#include "parser\htm_tokn.h"
#include "parser\htm_err.h"
#include "parse.h"
#pragma hdrstop

extern YYSTYPE yylval;
enum {TEXT=0, PRE, KEYWORD, ATTR, AVALUE, IN_QUOTE};
char *states [ 6] = {"TEXT", "PRE", "KEYWORD", "ATTR", "AVALUE", "IN_QUOTE"};
char *tktypes[10] = {"_OpenTag", "_CloseTag", "_EndTag", "_C_KEYWORD",
                     "_S_KEYWORD", "_A_KEYWORD", "_V_KEYWORD", "_NUM",
                     "_IDENTIFIER", "_QUOTED_ATTR"};

/* text, preformatted text, HTML KEYWORD, attribute KEYWORD, */
/* attribute value KEYWORD                                   */
int in_close   = FALSE;
int in_open    = FALSE;
int opened_par = FALSE;
int state     = TEXT;
int old_state = TEXT;
int cUKSZ = 0;

int USE_BUFFER = FALSE;
int lex_buff_size = 0;
typedef struct {
   int  tktyp;
   char tkval[NMSZ];
} tbuff;
tbuff lex_buff[5];

int  c;
int  lineno;
long charno;
long f_size;
int  icm;
                    /* !!!!!! */
char comment[CMSZ]; /* ⮬ 㤠, ᫨ ਨ    䨪஢ */
                    /* !!!!!! */

typedef struct {
    char name[TKSZ];
    int  kw_token;
    int  in_paragraph;
} kw_table;

/********** functions declarations ************/
void fixfile(FILE *, char*);
int  nextchar(FILE *, FILE *);
void nlproc (FILE *);
int  bsearch (char *, kw_table *, int);
int  nexttok (char *);
void lexinit();
int  yylex();
/**********************************************/

/* Possible KEYWORDS - directives */
kw_table keyword_table[KWSZ] = {
    {"A",           _C_KEYWORD, TRUE  },
    {"ADDRESS",     _C_KEYWORD, TRUE  },
    {"AREA",        _S_KEYWORD, TRUE  },
    {"B",           _C_KEYWORD, TRUE  },
    {"BASE",        _S_KEYWORD, FALSE },
    {"BASEFONT",    _C_KEYWORD, TRUE  },
    {"BIG",         _C_KEYWORD, TRUE  },
    {"BLINK",       _C_KEYWORD, TRUE  },
    {"BLOCKQUOTE",  _C_KEYWORD, TRUE  },
    {"BODY",        _C_KEYWORD, FALSE }, /* _BODY */
    {"BR",          _S_KEYWORD, TRUE  },
    {"CAPTION",     _C_KEYWORD, FALSE },
    {"CENTER",      _C_KEYWORD, TRUE  },
    {"CITE",        _C_KEYWORD, TRUE  },
    {"CODE",        _C_KEYWORD, TRUE  },
    {"DD",          _S_KEYWORD, FALSE },
    {"DFN",         _C_KEYWORD, TRUE  },
    {"DIR",         _C_KEYWORD, FALSE },
    {"DIV",         _C_KEYWORD, TRUE  },
    {"DL",          _C_KEYWORD, FALSE },
    {"DT",          _S_KEYWORD, FALSE },
    {"EM",          _C_KEYWORD, TRUE  },
    {"EMBED",       _S_KEYWORD, TRUE  },
    {"FONT",        _C_KEYWORD, TRUE  },
    {"FORM",        _C_KEYWORD, FALSE },
    {"FRAME",       _S_KEYWORD, FALSE },
    {"FRAMESET",    _C_KEYWORD, FALSE }, /* _FRAMESET  */
    {"H1",          _C_KEYWORD, TRUE  },
    {"H2",          _C_KEYWORD, TRUE  },
    {"H3",          _C_KEYWORD, TRUE  },
    {"H4",          _C_KEYWORD, TRUE  },
    {"H5",          _C_KEYWORD, TRUE  },
    {"H6",          _C_KEYWORD, TRUE  },
    {"HEAD",        _C_KEYWORD, FALSE }, /* _HEAD */
    {"HR",          _S_KEYWORD, TRUE  },
    {"HTML",        _C_KEYWORD, FALSE }, /* _HTML */
    {"I",           _C_KEYWORD, TRUE  },
    {"IMG",         _S_KEYWORD, TRUE  },
    {"INPUT",       _S_KEYWORD, FALSE },
    {"ISINDEX",     _S_KEYWORD, FALSE },
    {"KBD",         _C_KEYWORD, TRUE  },
    {"LI",          _S_KEYWORD, FALSE },
    {"LINK",        _S_KEYWORD, FALSE },
    {"MAP",         _C_KEYWORD, TRUE  },
    {"MENU",        _C_KEYWORD, FALSE },
    {"META",        _S_KEYWORD, TRUE  },
    {"NEXTID",      _S_KEYWORD, FALSE },
    {"NOBR",        _C_KEYWORD, TRUE  },
    {"NOFRAMES",    _C_KEYWORD, FALSE },
    {"OL",          _C_KEYWORD, FALSE },
    {"OPTION",      _S_KEYWORD, FALSE },
    {"P",           _C_KEYWORD, FALSE }, /* NB!!! */
    {"PRE",         _C_KEYWORD, FALSE },
    {"S",           _C_KEYWORD, TRUE  },
    {"SAMP",        _C_KEYWORD, TRUE  },
    {"SELECT",      _C_KEYWORD, FALSE },
    {"SMALL",       _C_KEYWORD, TRUE  },
    {"STRONG",      _C_KEYWORD, TRUE  },
    {"SUB",         _C_KEYWORD, TRUE  },
    {"SUP",         _C_KEYWORD, TRUE  },
    {"TABLE",       _C_KEYWORD, FALSE },
    {"TD",          _C_KEYWORD, FALSE },
    {"TEXTAREA",    _C_KEYWORD, FALSE },
    {"TH",          _C_KEYWORD, FALSE },
    {"TITLE",       _C_KEYWORD, FALSE },
    {"TR",          _C_KEYWORD, FALSE },
    {"TT",          _C_KEYWORD, TRUE  },
    {"U",           _C_KEYWORD, TRUE  },
    {"UL",          _C_KEYWORD, FALSE },
    {"VAR",         _C_KEYWORD, TRUE  },
    {"WBR",         _S_KEYWORD, TRUE  }
};

/* Possible KEYWORDS - attributes */
kw_table attr_table[ATSZ] = {
    {"ALIGN",       _A_KEYWORD },
    {"ALINK",       _A_KEYWORD },
    {"ALT",         _A_KEYWORD },
    {"BACKGROUND",  _A_KEYWORD },
    {"BGCOLOR",     _A_KEYWORD },
    {"BORDER",      _A_KEYWORD },
    {"CELLPADDING", _A_KEYWORD },
    {"CELLSPACING", _A_KEYWORD },
    {"CHECKED",     _A_KEYWORD },
    {"CLEAR",       _A_KEYWORD },
    {"COLOR",       _A_KEYWORD },
    {"COLS",        _A_KEYWORD },
    {"COLSPAN",     _A_KEYWORD },
    {"CONTENT",     _A_KEYWORD },
    {"COORDS",      _A_KEYWORD },
    {"ENCTYPE",     _A_KEYWORD },
    {"HEIGHT",      _A_KEYWORD },
    {"HREF",        _A_KEYWORD },
    {"HSPACE",      _A_KEYWORD },
    {"HTTP-EQUIV",  _A_KEYWORD },
    {"ISMAP",       _A_KEYWORD },
    {"LINK",        _A_KEYWORD },
    {"LOWSRC",      _A_KEYWORD },
    {"MARGINHEIGHT",_A_KEYWORD },
    {"MARGINWIDTH", _A_KEYWORD },
    {"MAXLENGTH",   _A_KEYWORD },
    {"MULTIPLE",    _A_KEYWORD },
    {"NAME",        _A_KEYWORD },
    {"NORESIZE",    _A_KEYWORD },
    {"NOSHADE",     _A_KEYWORD },
    {"NOWRAP",      _A_KEYWORD },
    {"PROMPT",      _A_KEYWORD },
    {"REL",         _A_KEYWORD },
    {"REV",         _A_KEYWORD },
    {"ROWS",        _A_KEYWORD },
    {"ROWSPAN",     _A_KEYWORD },
    {"SCROLLING",   _A_KEYWORD },
    {"SELECTED",    _A_KEYWORD },
    {"SHAPE",       _A_KEYWORD },
    {"SIZE",        _A_KEYWORD },
    {"SRC",         _A_KEYWORD },
    {"TARGET",      _A_KEYWORD },
    {"TEXT",        _A_KEYWORD },
    {"TYPE",        _A_KEYWORD },
    {"URL",         _A_KEYWORD },
    {"USEMAP",      _A_KEYWORD },
    {"VALIGN",      _A_KEYWORD },
    {"VLINK",       _A_KEYWORD },
    {"VSPACE",      _A_KEYWORD },
    {"WIDTH",       _A_KEYWORD },
    {"WRAP",        _A_KEYWORD }
};

/* Possible KEYWORDS - attribute's values */
kw_table aval_table[AVSZ] = {
    {"ABSBOTTOM",   _V_KEYWORD },
    {"ABSMIDDLE",   _V_KEYWORD },
    {"ALL",         _V_KEYWORD },
    {"BASELINE",    _V_KEYWORD },
    {"BOTTOM",      _V_KEYWORD },
    {"CENTER",      _V_KEYWORD },
    {"CHECKBOX",    _V_KEYWORD },
    {"CIRCLE",      _V_KEYWORD },
    {"DISC",        _V_KEYWORD },
    {"HIDDEN",      _V_KEYWORD },
    {"IMAGE",       _V_KEYWORD },
    {"JUSTIFY",     _V_KEYWORD },
    {"LEFT",        _V_KEYWORD },
    {"MIDDLE",      _V_KEYWORD },
    {"NOHREF",      _V_KEYWORD },
    {"OFF",         _V_KEYWORD },
    {"PASSWORD",    _V_KEYWORD },
    {"PHYSICAL",    _V_KEYWORD },
    {"RADIO",       _V_KEYWORD },
    {"RESET",       _V_KEYWORD },
    {"RIGHT",       _V_KEYWORD },
    {"SQUARE",      _V_KEYWORD },
    {"SUBMIT",      _V_KEYWORD },
    {"TEXT",        _V_KEYWORD },
    {"TEXTTOP",     _V_KEYWORD },
    {"TOP",         _V_KEYWORD },
    {"VIRTUAL",     _V_KEYWORD },
    {"_blank",      _V_KEYWORD },
    {"_parent",     _V_KEYWORD },
    {"_self",       _V_KEYWORD },
    {"_top",        _V_KEYWORD }
};

/*
 * 㭪樨
 */

void fixfile(FILE *filetofix, char* fname)
{
   fclose(filetofix);
   filetofix = fopen(fname, "a");
}

int nextchar(FILE *so, FILE *li)

/*
 *  ᫥騩 ᨬ  input stream    ᠦ 
 *  ⨭ 䠩
 */

{
  int c;

  c = getc(so);
  putc(c, li);
  charno ++;

  return(c);
}

void nlproc( FILE *li )

/*
 * 뢠  ⪭㫨  CR/LF. ⠥ ᮮ饭  訡
 *  㦭 ᨬ  砥 室, 騢 lineno.
 */

{
  int i;

  if (errflag) {
    errflag = FALSE;
    fprintf(li, "ERROR/:");
    for (i=0;i<errpos;i++) {
      fprintf(li, " ");
    }
    fprintf(li, "^ %d\n", errtoken);
  }

  lineno++;
  charno = 0;
  fprintf(li,"%4d  ", lineno);
  fixfile(li, lifn);
}

/*
 * 㭪 ୮ ᪠   ⠡ . 頥 
 *    ᨢ  -1, ᫨ 祣  諠.
 * ᨢ ⮨  ᥩ  ﬨ : ப ASCII/Z-⠭ -
 * 祢 ᫮ HTML  int'᪮ ⨯ ⮣ KEYWORD'
 */

int bsearch( char word[], kw_table word_table[], int tbsize)
{
  int low, high, middle;
  int i;

  low = 0;
  high = tbsize - 1;

  while (low <= high) {
    middle = (low + high) / 2;
    i = strcmp(word, word_table[middle].name);
    if (i < 0) {
      high = middle - 1;
    } else if (i > 0) {
        low = middle + 1;
    } else {
        return (middle);
    }
  }

  return (-1);
}

int nexttok(char *val)

/*
 * Main function of the module.
 *   Variables: c1    -- next symbol (one char lookahead)
 *        tokenvalue  -- a string containing current token value
 *        tokentype   -- int var defining current token's type
 */

{
  register int i, i2;
  char tokenvalue[NMSZ];
  int c1, tokentype, tokenend;

  i = 0;

  while (isspace(c)) /* c is always one char ahead */ {
      if (c == '\n') nlproc(listfp);
      if (state == PRE) {
        if (debug_lex) { fprintf(tracefp, "LEX(%3d): state = %s. in_close=%s,in_open=%s. Parsing white spaces.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        tokenvalue[0] = c; tokenvalue[1] = '\0';
        tokentype = _IDENTIFIER;
        c = nextchar(infp, listfp); charno++;
        strcpy(val, tokenvalue);  return(tokentype);
      }
      c = nextchar(infp, listfp); charno++;
  }

  if (c == EOF) {
     if (debug_lex) { fprintf(tracefp, "LEX(%3d): state = %s. in_close=%s,in_open=%s.EOF found.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE");  fixfile(tracefp, trfn);}
     strcpy(val, "");
     return(NULL);
  }

         /*  STATE == TEXT || STATE == PRE  */

  if ( ( (state == TEXT) || (state == PRE) ) && (c == '<') ) {
     ungetc(c1 = getc(infp), infp);
     old_state = state; state = KEYWORD;
     if (c1 == '\/') {
        c = nextchar(infp, listfp); charno++;
        c = nextchar(infp, listfp); charno++;
        tokenvalue[0] = '<'; tokenvalue[1] = '\/'; tokenvalue[2] = '\0';
        tokentype = _CloseTag; in_close = TRUE; in_open = FALSE;
        if (debug_lex) { fprintf(tracefp, "LEX(%3d): '_CloseTag' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE");  fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     } else {
        c = nextchar(infp, listfp); charno++;
        tokenvalue[0] = '<';  tokenvalue[1] = '\0';
        tokentype = _OpenTag;  in_close = FALSE; in_open = TRUE;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): '_OpenTag' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE");  fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     }
  }

  if ( (state == TEXT) || (state == PRE) ) {
     while ( ( !isspace(c) ) && ( c!='<') && (i < NMSZ-1) && c!=EOF) {
       tokenvalue[i++] = c;
       c = nextchar(infp, listfp); charno++;
     }
     tokenvalue[i++] = '\0';
     tokentype = _IDENTIFIER;
     if (debug_lex) {fprintf(tracefp, "LEX(%3d): Identifier '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
     strcpy(val, tokenvalue);  return(tokentype);
  }

                /*  STATE == KEYWORD  */

  if (state == KEYWORD) {

     if (c == '\!') { /*  Comments! */
        while ((c != '>') && ( c != EOF )) {
           c = nextchar(infp, listfp); charno++;
        }
        state = ATTR;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): Comments tag found (value ignored). state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        tokenvalue[0] = '\0'; tokentype = _IDENTIFIER;
        strcpy(val, tokenvalue); return(tokentype);
     }

     while (isalnum(c)) {
        tokenvalue[i++] = toupper(c);
        c = nextchar(infp, listfp); charno++;
     }
     tokenvalue[i++] = '\0';

     if ((i = bsearch(tokenvalue, keyword_table, KWSZ)) >= 0) {
        tokentype = keyword_table[i].kw_token;
        state = ATTR;
        if ( strcmp("PRE", tokenvalue) == 0 ) {
           if (in_close) old_state = TEXT;
           else          old_state = PRE;
        }
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): Keyword '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     } else {
        /* Skip unknown KEYWORD - with all its attributes! */
        while ( ( c != '>' ) && ( c != EOF ) ) {
           c = nextchar(infp, listfp); charno++;
        }
        state = ATTR;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): Unknown keyword '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        tokenvalue[0] = '\0'; tokentype = _IDENTIFIER;
        strcpy(val, tokenvalue); return(tokentype);
     }
  } /* end if for (state == KEYWORD) */

              /*  STATE == ATTR  */

  if (state == ATTR) {

     if ( c == '>' ) {    /* No more attributes! */
        c = nextchar(infp, listfp); charno++;
        tokenvalue[0] = '\>';    tokenvalue[1] = '\0';
        tokentype = _EndTag;
        state = old_state; in_open = in_close = FALSE;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): '_EndTag' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     }
/************* - that meant it's the end of quoted avalue
       else if (c == '\"') {
        c = nextchar(infp, listfp); charno++;
        state = ATTR;
        tokenvalue[0] = '\"';   tokenvalue[1] = '\0';
        tokentype = c;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): '\"' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     }
*************/
       else {
        while (isalnum(c)) {
           tokenvalue[i++] = toupper(c);
           c = nextchar(infp, listfp); charno++;
        }
        tokenvalue[i++] = '\0';

        if ((i = bsearch(tokenvalue, attr_table, ATSZ)) >= 0) {
            tokentype = attr_table[i].kw_token;
            state = AVALUE;
            if (debug_lex) {fprintf(tracefp, "LEX(%3d): Attribute keyword '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
            strcpy(val, tokenvalue); return(tokentype);
        } else { /* Unknown attribute. Actually, it's much more easier      */
                 /* just ignoring it in YACC then trying to skip it here... */
            tokentype = _IDENTIFIER;
            state = AVALUE;
            if (debug_lex) {fprintf(tracefp, "LEX(%3d): Unknown attribute keyword '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
            strcpy(val, tokenvalue); return(tokentype);
        }

     }
  } /* end if for (state == ATTR) */

             /*  STATE == AVALUE  */

  if (state == AVALUE) {
     if ( c == '>' ) {
        tokenvalue[0] = '\>'; tokenvalue[1] = '\0';
        tokentype = _EndTag;
        state = old_state; in_open = in_close = FALSE;
        c = nextchar(infp, listfp); charno++;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): '_EndTag' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     } else if ( c == '\"' ) {
        tokenvalue[0] = '\"'; tokenvalue[1] = '\0';
        tokentype = c;
        state = IN_QUOTE; c = nextchar(infp, listfp); charno++;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): '\"' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     } else if ( !isalnum(c) ) {  /* i.e. '=', '+', '-' - all but '"' */
        tokenvalue[0] = c;  tokenvalue[1] = '\0';
        tokentype = c;  c = nextchar(infp, listfp); charno++;
        /* state = ATTR; - if (it was realy a GLUCK) { delete it!!! } */
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     } else if ( isdigit(c) ) {
        while ( ( !isspace(c) ) && ( c != '\>') ) {
           tokenvalue[i++] = toupper(c);
           c = nextchar(infp, listfp); charno++;
        }
        tokenvalue[i++] = '\0';
        tokentype = _NUM;
        state = ATTR;
        if (debug_lex) {fprintf(tracefp, "LEX(%3d): Numeric avalue '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        strcpy(val, tokenvalue); return(tokentype);
     } else if ( isalpha(c) ) {
        ungetc(c1 = getc(infp), infp);
        if (!isalpha(c)) {    /* f.e. TYPE = i in Ordered List */
           tokenvalue[0] = c;  tokenvalue[1] = '\0';
           tokentype = _V_KEYWORD;
           if (debug_lex) {fprintf(tracefp, "LEX(%3d): Attribute value '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
           strcpy(val, tokenvalue); return(tokentype);
        }
        while ( isalnum(c) ) {
           tokenvalue[i++] = toupper(c);
           c = nextchar(infp, listfp); charno++;
        }
        tokenvalue[i++] = '\0';
        state = ATTR;
        if ((i = bsearch(tokenvalue, aval_table, AVSZ)) >= 0) {
           tokentype = aval_table[i].kw_token;
           if (debug_lex) {fprintf(tracefp, "LEX(%3d): Attribute value keyword '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        } else {
           tokentype = _IDENTIFIER;
           if (debug_lex) {fprintf(tracefp, "LEX(%3d): Unknown attribute value keyword '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno, tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
        }
        strcpy(val, tokenvalue); return(tokentype);
     }
  } /* end if for (state == ATTR) */


             /*  STATE == IN_QUOTE  */

  if (state == IN_QUOTE) {
    if ( c == '\"' ) {
       c = nextchar(infp, listfp); charno++;
       state = ATTR;
       tokenvalue[0] = '\"';   tokenvalue[1] = '\0';
       tokentype = '\"';
       if (debug_lex) {fprintf(tracefp, "LEX(%3d): '\"' found. state = %s. in_close=%s,in_open=%s.\n", lineno, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
       strcpy(val, tokenvalue); return(tokentype);
    }
/******************
    if ( c == '\#' ) tokentype = _COLOR_PALETTE;
    else             tokentype = _IDENTIFIER;
******************/
    tokentype = _QUOTED_ATTR;  /* maybe URL, maybe rain, maybe snow... :) */
    /* state = IN_QUOTE; */
    while ( c != '\"' ) {
       tokenvalue[i++] = c;
       c = nextchar(infp, listfp);  charno++;
    }
    tokenvalue[i++] = '\0';
    /* c = nextchar(infp, listfp); Skip the closing quotation mark */
    if (debug_lex) {fprintf(tracefp, "LEX(%3d): Quoted attribute value '%s' found. state = %s. in_close=%s,in_open=%s.\n", lineno,
                           tokenvalue, states[state], in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE"); fixfile(tracefp, trfn);}
    strcpy(val, tokenvalue); return(tokentype);

  } /* end if for (state == IN_QUOTE) */

  return 0; /*    ... */
}

/*
 * 樠
 */

void lexinit()
{
  int i;

  icm      = 0;
  lineno   = 1;
  charno   = 1;


/*****************************
  f_size = filelength(fileno(infp));
  if (f_size == 0) f_size = 1;
  if (debug_lex) printf("\n\nFileSize: %6ld.\n\n", f_size);
*****************************/

  fprintf(listfp,"%4d  ", lineno); fixfile(listfp, lifn);
  c = nextchar(infp, listfp);

}

/*
 *   yyparse()
 */

int yylex()
{
  int  tktyp;
  char tkval[NMSZ];

  if (!USE_BUFFER) {
     tktyp = nexttok(tkval);
     switch (tktyp) {
        case _OpenTag: {
           if (debug_yacc) {fprintf(tracefp, "LEX(%3d): yylex looks one token ahead.\n", lineno); fixfile(tracefp, trfn);}
           tktyp = nexttok(tkval);
           if (opened_par) {
              if (tktyp == _C_KEYWORD || tktyp == _S_KEYWORD) {
                 if (!keyword_table[bsearch(tkval, keyword_table, KWSZ)].in_paragraph) {
                    lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
                    lex_buff[1].tktyp = _OpenTag  ; strcpy(lex_buff[1].tkval, "<");
                    lex_buff[2].tktyp = _EndTag   ; strcpy(lex_buff[2].tkval, ">");
                    lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[3].tkval, "P");
                    lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[4].tkval, "</");
                    lex_buff_size = 5; USE_BUFFER = TRUE;
                    opened_par = (strcmp(tkval, "P")==0);
                    if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"No strange within 'P'!\" (case _OpenTag).\n", lineno); fixfile(tracefp, trfn);}
                 } else {
                    lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
                    lex_buff[1].tktyp = _OpenTag  ; strcpy(lex_buff[1].tkval, "<");
                    lex_buff_size = 2; USE_BUFFER = TRUE;
                    if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"Allowed within 'P'!\" (case _OpenTag).\n", lineno); fixfile(tracefp, trfn);}
                 }
              }
           } else { // i.e. opened_par == FALSE
              opened_par = (strcmp(tkval, "P")==0);
              lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
              lex_buff[1].tktyp = _OpenTag  ; strcpy(lex_buff[1].tkval, "<");
              lex_buff_size = 2; USE_BUFFER = TRUE;
              if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"NO ALARM!\" (case _OpenTag).\n", lineno); fixfile(tracefp, trfn);}
           }
           break;
        }

        case _CloseTag: {
           if (debug_yacc) {fprintf(tracefp, "LEX(%3d): yylex looks one token ahead.\n", lineno); fixfile(tracefp, trfn);}
           tktyp = nexttok(tkval);
           if (opened_par) {
              if (keyword_table[bsearch(tkval, keyword_table, KWSZ)].in_paragraph) {
                 lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
                 lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkval, "<");
                 lex_buff_size = 2; USE_BUFFER = TRUE;
                 if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"Allowed within 'P'!\" (case _CloseTag).\n", lineno); fixfile(tracefp, trfn);}
              } else {
                 if (strcmp(tkval, "P")==0) {
                    lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
                    lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkval, "</");
                    lex_buff_size = 2; USE_BUFFER = TRUE;
                    opened_par = FALSE;
                    if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"Normal closing 'P'!\" (case _CloseTag).\n", lineno); fixfile(tracefp, trfn);}
                 } else {
                    lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
                    lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkval, "</");
                    lex_buff[2].tktyp = _EndTag   ; strcpy(lex_buff[2].tkval, ">");
                    lex_buff[3].tktyp = _C_KEYWORD; strcpy(lex_buff[3].tkval, "P");
                    lex_buff[4].tktyp = _CloseTag ; strcpy(lex_buff[4].tkval, "</");
                    lex_buff_size = 5; USE_BUFFER = TRUE;
                    opened_par = FALSE;
                    if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"Forced closing 'P'!\" (case _CloseTag).\n", lineno); fixfile(tracefp, trfn);}
                 }
              }
           } else {
              lex_buff[0].tktyp = tktyp     ; strcpy(lex_buff[0].tkval, tkval);
              lex_buff[1].tktyp = _CloseTag ; strcpy(lex_buff[1].tkval, "</");
              lex_buff_size = 2; USE_BUFFER = TRUE;
              if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"NO ALARM!\" (case _CloseTag).\n", lineno); fixfile(tracefp, trfn);}
           }
           
           break;
        }

        case _IDENTIFIER: {
           if ( !(in_open || in_close) && (!opened_par)) {
              lex_buff[0].tktyp = _IDENTIFIER; strcpy(lex_buff[0].tkval, tkval);
              lex_buff[1].tktyp = _EndTag    ; strcpy(lex_buff[1].tkval, ">");
              lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkval, "P");
              lex_buff[3].tktyp = _OpenTag   ; strcpy(lex_buff[3].tkval, "<");
              lex_buff_size = 4; USE_BUFFER = TRUE;
              opened_par = TRUE;
              if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"Identifier found! 'P'!\" (case _OpenTag).\n", lineno); fixfile(tracefp, trfn);}
           }
           break;
        }

        case 0: { // EOF
           if (opened_par) {
              lex_buff[0].tktyp = tktyp      ; strcpy(lex_buff[0].tkval, tkval);
              lex_buff[1].tktyp = _EndTag    ; strcpy(lex_buff[1].tkval, ">");
              lex_buff[2].tktyp = _C_KEYWORD ; strcpy(lex_buff[2].tkval, "P");
              lex_buff[3].tktyp = _CloseTag  ; strcpy(lex_buff[3].tkval, "</");
              lex_buff_size = 4; USE_BUFFER = TRUE;
              opened_par = TRUE;
              if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() fills buffer by scheme \"EOF\" (case _OpenTag).\n", lineno); fixfile(tracefp, trfn);}
           }
        }

     }
  }

  if (USE_BUFFER) {
            tktyp = lex_buff[--lex_buff_size].tktyp;
     strcpy(tkval,  lex_buff[  lex_buff_size].tkval);
     if (debug_lex) {fprintf(tracefp, "LEX(%3d): yylex() uses buffer: tktyp = %4d, tkval = {%s}. opened_par=%s, in_close=%s, in_open=%s.\n",
                            lineno, tktyp, tkval, opened_par?"TRUE":"FALSE", in_close?"TRUE":"FALSE", in_open?"TRUE":"FALSE");fixfile(tracefp, trfn);}
     if (lex_buff_size == 0) USE_BUFFER = FALSE;
  }

  yylval.pchar = (char *) malloc(1 + strlen(tkval));
  strcpy(yylval.pchar, tkval);
  if (debug_lex)  {fprintf(tracefp, "LEX(%3d): LEX returns to yyparse: tktype = %4d (%s), tkval = {%s}\n", lineno, tktyp,
                                tktyp>256?tktypes[tktyp-257]:tkval, tkval); fixfile(tracefp, trfn);}
  return(tktyp);

}

/******** To run only lexical analisys use the following yylex():  *****
  do tktyp = nexttok(tkval); while (tktyp>0); - !!!!!!!!!!!!!
***********************************************************************/

            /************** END of HTM_LEX.C **************/

