%{
/****************************************************************************

                    SCANNER for  'C C O U N T'  PROGRAM

                         L E X  SPECIFICATION FILE

*****************************************************************************

  Description:

  'ccount.l' is the lex specification file of 'ccount' program that 
  supplies results of the structure of a C source file. Before a C source 
  file can be parsed, there must be a lexical analysis, that is, a module 
  that recognizes tokens like keywords, identifiers, type names, constants, 
  operators etc. 
  Such a module is specified by this file and then created as C source code 
  by the command 'lex ccount.l' (--> lex.yy.c). Finally 'lex.yy.c' is 
  included in 'y.tab.c', the module of the syntactic analysis.
  
  See also 'README.1'!

*****************************************************************************

  Author: Joerg Lawrenz, Universitaet Karlsruhe
  Date:   12/1/93

  Portions Copyright (c) 1989, 1990 James  A.  Roskind

*****************************************************************************

I.  How does 'ccount.l' recognize an unknown type name (defined by user)?

  When an identifier is read, 'ccount.l' compares the context with four 
  selected rules by looking at the following words. If one rule can be 
  applied, the variable 'isaTYPE' is set to 1. Then 'ccount.l' checks
  if the type name is a keyword (e.g. 'int') by rejecting the rule.
  If the list of keywords has been passed without any action, 'ccount.l' 
  comes to the single 'identifier' pattern. There the token 'TYPEDEFname' 
  is returned if 'isaTYPE' is 1 or if the identifier has been found in the
  hash table without prefix. If necessary the type name is entered in the
  hash table.
  
  Examples for the four rules:        

  1.  mytype a;

  2.  mytype **a;
      mytype (*f)();
      mytype (*a)[];

  3.  mytype) a,          casting
      mytype) a)
      mytype) a}

  4.  mytype *,           pointer type as parameter
      mytype **)

  Up to now there are no conflicts between these rules and other C structures.

*****************************************************************************

II.  Further details:

  a) Rule 5:

  Another rule (5) comparing the identifier context recognizes constructions 
  to declare function prototypes in ANSI as well as Kernighan/Ritchie style 
  like
        extern void error A((char* msg, int level));

  by using the state variable 'flag':

              extern   void error A     (  ( char* msg, int level ) );
  flag     0         1 2    3     4 -1 -2 -2 ....................-2 0
  ignore                          |     |                           |
                                 
  --------------------------------------------------------------------------

  b) Identifier:

  If 'ccount.l' considers an identifier, it also checks if the identifier
  is the name of a macro in declaration part (token 'MACRODEF' will be
  returned in this case) or if the identifier has to be ignored. For this,
  'ccount.l' adds the according prefix ('&' or '!') to the identifier and 
  looks for this word in the hash table.

  --------------------------------------------------------------------------

  c) Handling of '#if'-preprocessor directives (see also 'README.1', 2.2):

  If 'ccount.l' reads a '#if'-preprocessor directive which it finds also
  in the hash table, the state variable 'pp_comment' and the count variable
  'if_count' are set to 1. The line number is indicated and saved in
  'BeginLine'. Then 'ReadPPline()' counts the directive line and reads the 
  following lines up to the first '#' by treating them as 'preprocessor 
  comment'. 'pp_comment' is set to 2. Now 'ccount.l' looks for the rest 
  of the directive. If it is the according 'endif', 'else' or 'elif' 
  ('if_count' == 1 ?), 'pp_comment' is set to 0 and 'ccount.l' returns 
  to the normal read mode. Otherwise or if the directive itself is part of 
  PP comment, 'ReadPPline' is called again treating all up to the next '#' 
  as PP comment, and so on.


**********************    DECLARATIONS     *********************************/

/* The following global variables and functions are already defined 
   in 'y.tab.c':  */

extern int lines;       
extern long bytes;      
extern long comments;   
extern long Pcomments;  

extern int num_identifier;       
extern int num_symbols;          
extern int num_ppdirectives;     
extern long bytes_identifier;
extern long bytes_symbols;
extern long bytes_ppdirectives;

extern char lexem[];             
extern char last_identifier[];  

extern BOOL DeclFlag;
extern BOOL ElabTypeFlag;
extern BOOL Debug;

extern void henter();          /* enters a word in the hash table */
extern BOOL hfind();           /* finds a word in the hash table */

/***************************************************************************/
  
BOOL isaTYPE = 0;    /* state variable, see above I. */

int flag = 0;        /* state variable, see above II.a */

int if_count = 0;    /* count variable, see above II.c */
int BeginLine = 0;   /* line store variable, see above II.c */
int pp_comment = 0;  /* state variable, see above II.c */


/*******************************  DEFINES  ***********************************/

#define WHITE_RETURN(x)        bytes += yyleng;   /* just count bytes */

#define PA_KEYWORD_RETURN(x) { if (flag > 0)\
                                 if (flag == 1) flag++; else flag = 0;\
                               CountSymbol(x);\
                               RETURN_VAL(x) }  /* standard C PArser Keyword */

#define IDENTIFIER_RETURN(x) { strcpy(last_identifier, yytext);\
                               CountSymbol(x);\
                               RETURN_VAL(x) }

#define ASCIIOP_RETURN(x)    { CountSymbol((int)*yytext);\
                               RETURN_VAL((int)*yytext) }
                                              /* a single character operator */
#define NAMEDOP_RETURN(x)    { CountSymbol(x);\
                               RETURN_VAL(x) }
                                        /* a multichar operator, with a name */

#define NUMERICAL_RETURN(x)  { CountSymbol(x);\
                               RETURN_VAL(x) }     /* some sort of constant */
                          
#define LITERAL_RETURN(x)    { CountSymbol(x);\
                               RETURN_VAL(x) }           /* a string literal */

#define RETURN_VAL(x)        { isaTYPE = 0;\
                               bytes += yyleng;\
                               strcpy(lexem, yytext);\
                               if (Debug)\
                                 fprintf(stderr," %s %d ",yytext,x);\
                               return(x); }

/****************************************************************************/

%}

identifier [a-zA-Z_$][0-9a-zA-Z_$]*

exponent_part ([eE][-+]?[0-9]+)
fractional_constant (([0-9]*"."[0-9]+)|([0-9]+"."))
floating_constant (({fractional_constant}{exponent_part}?)|([0-9]+{exponent_part}))[FfLl]?

integer_suffix_opt (([uU]?[lL]?)|([lL][uU]))
decimal_constant [1-9][0-9]*{integer_suffix_opt}
octal_constant "0"[0-7]*{integer_suffix_opt}
hex_constant "0"[xX][0-9a-fA-F]+{integer_suffix_opt}*

simple_escape [abfnrtv'"?\\]
octal_escape  [0-7]{1,3}
hex_escape "x"[0-9a-fA-F]+

escape_sequence [\\]({simple_escape}|{octal_escape}|{hex_escape}|"\n")
c_char ([^'\\\n]|{escape_sequence})
s_char ([^"\n]|{escape_sequence})


h_tab [\011]
form_feed [\014]
v_tab [\013]
c_return [\015]

horizontal_white ([ ]|{h_tab})
B ({horizontal_white}|"\n")

%p 6000    /* number of positions (DEFAULT 2000) */

%%


{horizontal_white}+                      {WHITE_RETURN(' ');}
({v_tab}|{c_return}|{form_feed})+        {WHITE_RETURN(' ');}

({horizontal_white}|{v_tab}|{c_return}|{form_feed})*"\n"         
                               {lines++;
                                WHITE_RETURN('\n');
		        	}


{identifier}/{B}+{identifier}                     /* Rule 1 */
                               {
                                if (Debug)
                                   fprintf(stderr," RULE1>> ");
                                isaTYPE = !ElabTypeFlag && flag <= 1;
                                REJECT;
                                }

{identifier}{B}*"*"([\*\(]|{B})*{identifier}      /* Rule 2 */
                               {             
                                if (Debug)
                                   fprintf(stderr," RULE2>> ");
                                isaTYPE = DeclFlag && !ElabTypeFlag;
                                REJECT;
                                }

{identifier}{B}*")"{B}*{identifier}{B}*[,\)\}]    /* Rule 3 */
                               {             
                                if (Debug)
                                   fprintf(stderr," RULE3>> ");
                                isaTYPE = !ElabTypeFlag;
                                REJECT;
                                } 

{identifier}{B}*"*"("*"|{B})*[,\)]                /* Rule 4 */
                               {
                                if (Debug)
                                   fprintf(stderr," RULE4>> ");
                                isaTYPE = !ElabTypeFlag;
                                REJECT;
                                } 

{identifier}{B}+"*"*{B}*"(*"?{identifier}")"?{B}+{identifier}{B}*"("{B}*"(" 
                               {                  /* Rule 5 */
                                if (Debug)
                                   fprintf(stderr," RULE5>> ");
                                flag = 1;
                                REJECT;                                 
                                }


auto                {PA_KEYWORD_RETURN(AUTO);}
break               {PA_KEYWORD_RETURN(BREAK);}
case                {PA_KEYWORD_RETURN(CASE);}
char                {PA_KEYWORD_RETURN(CHAR);}
const               {PA_KEYWORD_RETURN(CONST);}
continue            {PA_KEYWORD_RETURN(CONTINUE);}
default             {PA_KEYWORD_RETURN(DEFAULT);}
do                  {PA_KEYWORD_RETURN(DO);}
double              {PA_KEYWORD_RETURN(DOUBLE);}
else                {PA_KEYWORD_RETURN(ELSE);}
enum                {ElabTypeFlag = 1; 
                     PA_KEYWORD_RETURN(ENUM);}
extern              {PA_KEYWORD_RETURN(EXTERN);}
float               {PA_KEYWORD_RETURN(FLOAT);}
for                 {PA_KEYWORD_RETURN(FOR);}
goto                {PA_KEYWORD_RETURN(GOTO);}
if                  {PA_KEYWORD_RETURN(IF);}
int                 {PA_KEYWORD_RETURN(INT);}
long                {PA_KEYWORD_RETURN(LONG);}
register            {PA_KEYWORD_RETURN(REGISTER);}
return              {PA_KEYWORD_RETURN(RETURN);}
short               {PA_KEYWORD_RETURN(SHORT);}
signed              {PA_KEYWORD_RETURN(SIGNED);}
sizeof              {PA_KEYWORD_RETURN(SIZEOF);}
static              {PA_KEYWORD_RETURN(STATIC);}
struct              {ElabTypeFlag = 1; 
                     PA_KEYWORD_RETURN(STRUCT);}
switch              {PA_KEYWORD_RETURN(SWITCH);}
typedef             {PA_KEYWORD_RETURN(TYPEDEF);}
union               {ElabTypeFlag = 1; 
                     PA_KEYWORD_RETURN(UNION);}
unsigned            {PA_KEYWORD_RETURN(UNSIGNED);}
void                {PA_KEYWORD_RETURN(VOID);}
volatile            {PA_KEYWORD_RETURN(VOLATILE);}
while               {PA_KEYWORD_RETURN(WHILE);}


{identifier}        {char str[STR_LEN];
                     if (flag > 0) flag++;
                     sprintf(str, "!");
                     if (hfind(strcat(str, yytext)) || flag == 4)
                             {
                              isaTYPE = 0;
                              if (flag == 4) flag = -1;
                              CountSymbol(IDENTIFIER);
                              WHITE_RETURN(IDENTIFIER);
                              }
                        else {
                              str[0] = '&';
                              if (hfind(str))
                                  IDENTIFIER_RETURN(MACRODEF)
                               else {
                                  if (hfind(yytext)) 
                                       isaTYPE = 1;
                                    else 
                                       if (isaTYPE) henter(yytext);

                                  if (isaTYPE) 
                                       IDENTIFIER_RETURN(TYPEDEFname)
                                    else 
                                       IDENTIFIER_RETURN(IDENTIFIER);
                                     }
                              }
                    }


{decimal_constant}  {NUMERICAL_RETURN(INTEGERconstant);}
{octal_constant}    {NUMERICAL_RETURN(OCTALconstant);}
{hex_constant}      {NUMERICAL_RETURN(HEXconstant);}
{floating_constant} {NUMERICAL_RETURN(FLOATINGconstant);}


"L"?[']{c_char}+[']     {NUMERICAL_RETURN(CHARACTERconstant);}

"L"?["]{s_char}*["]     {CountLines(yytext);
                         LITERAL_RETURN(STRINGliteral);
                         }

"("                  {if (flag == -1) {
                        flag = -2;
                        CountSymbol('(');
                        WHITE_RETURN(LP);
                       }
                       else
                        ASCIIOP_RETURN(LP);}

")"/{B}*[;\{]        {if (flag == -2) {
                        flag = 0;
                        CountSymbol(')');
                        WHITE_RETURN(RP);
                       }
                       else
                        ASCIIOP_RETURN(RP);}

")"                  {ASCIIOP_RETURN(RP);}
","                  {ASCIIOP_RETURN(COMMA);}
"{"                  {ASCIIOP_RETURN(LC);}
"}"                  {ASCIIOP_RETURN(RC);}
"["                  {ASCIIOP_RETURN(LB);}
"]"                  {ASCIIOP_RETURN(RB);}
"."                  {ASCIIOP_RETURN(DOT);}
"&"                  {ASCIIOP_RETURN(AND);}
"*"                  {ASCIIOP_RETURN(STAR);}
"+"                  {ASCIIOP_RETURN(PLUS);}
"-"                  {ASCIIOP_RETURN(MINUS);}
"~"                  {ASCIIOP_RETURN(NEGATE);}
"!"                  {ASCIIOP_RETURN(NOT);}
"/"                  {ASCIIOP_RETURN(DIV);}
"%"                  {ASCIIOP_RETURN(MOD);}
"<"                  {ASCIIOP_RETURN(LT);}
">"                  {ASCIIOP_RETURN(GT);}
"^"                  {ASCIIOP_RETURN(XOR);}
"|"                  {ASCIIOP_RETURN(PIPE);}
"?"                  {ASCIIOP_RETURN(QUESTION);}
":"                  {ASCIIOP_RETURN(COLON);}
";"                  {ASCIIOP_RETURN(SEMICOLON);}
"="                  {ASCIIOP_RETURN(ASSIGN);}
"->"                 {NAMEDOP_RETURN(ARROW);}
"++"                 {NAMEDOP_RETURN(ICR);}
"--"                 {NAMEDOP_RETURN(DECR);}
"<<"                 {NAMEDOP_RETURN(LS);}
">>"                 {NAMEDOP_RETURN(RS);}
"<="                 {NAMEDOP_RETURN(LE);}
">="                 {NAMEDOP_RETURN(GE);}
"=="                 {NAMEDOP_RETURN(EQ);}
"!="                 {NAMEDOP_RETURN(NE);}
"&&"                 {NAMEDOP_RETURN(ANDAND);}
"||"                 {NAMEDOP_RETURN(OROR);}
"*="                 {NAMEDOP_RETURN(MULTassign);}
"/="                 {NAMEDOP_RETURN(DIVassign);}
"%="                 {NAMEDOP_RETURN(MODassign);}
"+="                 {NAMEDOP_RETURN(PLUSassign);}
"-="                 {NAMEDOP_RETURN(MINUSassign);}
"<<="                {NAMEDOP_RETURN(LSassign);}
">>="                {NAMEDOP_RETURN(RSassign);}
"&="                 {NAMEDOP_RETURN(ANDassign);}
"^="                 {NAMEDOP_RETURN(ERassign);}
"|="                 {NAMEDOP_RETURN(ORassign);}
"..."                {NAMEDOP_RETURN(ELLIPSIS);}


"/*"                 {ReadComment();}                          

"#"{B}*("if"|"ifdef"|"ifndef").*    {
                      if (!pp_comment && hfind(yytext)) {
                              pp_comment = 1;
                              fprintf(stderr,"Line %d: %s\n",lines+1,yytext);
                              BeginLine = lines+1;
                           }
                      if (pp_comment) if_count++;
                      yyless(1);        /* yytext = "#" */
                      ReadPPline();
                      }

"#"/{B}*"endif"      {if (pp_comment && --if_count == 0) {
                              pp_comment = 0;
                              fprintf(stderr,"Length: %d\n",lines-BeginLine);
                           }
                      ReadPPline();
                      }

"#"/{B}*("else"|"elif")              {
                      if (if_count == 1)  {
                              pp_comment = 0;
                              if_count--;
                              fprintf(stderr,"Length: %d\n",lines-BeginLine);
                           }
                      ReadPPline();
                      }

"#"                  {ReadPPline();}


.                    /* Handling of unknown character */
                     {
                      fprintf(stderr, "\nline %d: lexical error",lines+1);
                      RETURN_VAL(0);
                      }

%%

/****************************   FUNCTIONS   ********************************

  CountSymbol(x)     -->  increases the respective number of identifiers
                          or symbols and the respective number of bytes
                          for the token x
  ReadComment()      -->  reads comment
  ReadPPline()       -->  reads preprocessor directives or PP comment
                          see above II.c
  CountLines(s)      -->  counts the number of lines in the string s

***************************************************************************/


CountSymbol (x)
int x;
{
  switch (x)
    {
    case IDENTIFIER:
    case TYPEDEFname:
    case MACRODEF:     num_identifier++;
	               bytes_identifier += yyleng;
                       break;

    default:           num_symbols++;
                       bytes_symbols += yyleng;
                       break;
    }
}

/***************************************************************************/

ReadComment()
{
  char c;

  while ((c=input()) != '*') {
           if (c == '\n') lines++;
           comments++;
         }

  comments++;
  if ((c=input()) == '/')
        comments += 3;
    else {
        unput(c);
        ReadComment();
      }
}

/***************************************************************************/

ReadPPline()
{
  char c;
  int ende = 0;

  if (pp_comment < 2) {      /* '#' is the beginning of a '#if'-PP directive *
     num_ppdirectives++;     /* that is no PP comment. */
     bytes_ppdirectives++;
     bytes++;         
     while (!ende)    
        switch (input())
         {
         case  '/': if ((c = input()) == '*')
                          ReadComment();
                    else {unput(c);
                          bytes_ppdirectives++; 
                          bytes++;}
                    break;
         
         case '\\': if ((c = input()) == '\n')
                         {lines++;
                          bytes_ppdirectives++;
                          bytes++;}
                    else  unput(c);
                    bytes_ppdirectives++;
                    bytes++;
                    break;

         case '\n': ende = 1;
                    lines++;
         default  : bytes_ppdirectives++;
                    bytes++;
                    break;
         }
  } else Pcomments++;               /* '#' is part of PP comment */

  if (pp_comment) {                 /* read PP comment up to the first '#' */
    while ((c = input()) != '#') {
           if (c == '\n') lines++;
           Pcomments++;
        }
    unput(c);
    pp_comment = 2;
   }
}

/***************************************************************************/

CountLines (string)
char *string;
{
  while (*string)
    if (*string++ == '\n') lines++;
}
