CONCISENESS CONTEST CLARIFICATION - new tokount.c 
Author Message
 CONCISENESS CONTEST CLARIFICATION - new tokount.c

Accidentally posted a version of tokount.c with a slight flaw.
This new version will not count any #include lines you care
to use.  They are free.

Also, you are allowed (read: encouraged) to use comments,
whitespace, and descriptive variable names since they don't
cost anything.  So there's no need to submit any obfuscated
code :-)

--
Mark Schnitzius

University of Central Florida

----------clip here--------------

/* K&R version, 1388 tokens excluding whitespace */

/****************************************************************************/
/*                                                                          */
/*       Program: tokount.c                                                 */
/*                                                                          */

/*                                                                          */
/*       Date:    April 15th 1993.                                          */
/*                                                                          */
/*       Purpose: Counts tokens in C programs, based on ANSI C Standard     */
/*                                                                          */
/*       Note:    This program is to be used only on correct program texts  */
/*                                                                          */
/****************************************************************************/

#include <ctype.h>                      /* isalnum isdigit isspace          */
/* #include <errno.h>                   /* errno                            */
extern int errno;
#include <stdio.h>                      /* EOF ferror FILE fopen fputs getc */
                                        /* perror printf stderr ungetc      */
/* #include <stdlib.h>                  /* EXIT_FAILURE EXIT_SUCCESS        */
#include <string.h>                     /* strchr strcmp                    */

/****************************************************************************/

#define TOKEN_MAX  512

typedef int E_CHAR;                                       /* all chars +EOF */

typedef enum {FALSE=0, TRUE=1} bool;                      /* suits my debug */

typedef enum
{HEADER, IDENTIFIER, NUMBER, CHARACTER, STRING, OPERATOR, PUNCTUATOR,
    OTHER, UNKNOWN, END_OF_FILE, WHITE_VERTICAL, WHITE_HORIZONTAL}
TOKEN_CLASS;

E_CHAR current_char, look_ahead='\n';                     /* start with new */

FILE *program_file;

char token_chars[TOKEN_MAX+1];

int token_count, token_length;

bool header_flag, echo_tokens;                            /* handle <....h> */

TOKEN_CLASS current_token;

/****************************************************************************/
E_CHAR next_char(/*void*/)  /* a function for ANSI C Translation Phases 1+2 */
/****************************************************************************/

{static E_CHAR next_one='\n';                /* internal character cache */

    current_char=look_ahead;                    /* copy external lookaheads */

    if(token_length<TOKEN_MAX)
      {token_chars[token_length++]=current_char;           /* assemble val */
        token_chars[token_length]='\0';}

    do {look_ahead=next_one; next_one=getc(program_file);   /* get next raw */

        if(look_ahead=='?' && next_one=='?')    /* ANSI trigraph 2.1.1.2(1) */

          {int next_two=getc(program_file);    /* look ahead one more char */
            char *translated=strchr("#[\\]^{|}~=(/)'<!>-"+9, next_two);
            if(translated)                      /* test trigraph recognised */
              {look_ahead=*(translated-9);     /* copy char to replace ??x */
                next_one=getc(program_file);}   /* read following character */
            else ungetc(next_two, program_file);}  /* otherwise put ch back */

        if(look_ahead!='\\' || next_one!='\n') return(current_char);  /* OK */

        next_one=getc(program_file);}           /* ANSI splicing 2.1.1.2(2) */

    while(1);}  /* I don't like infinite loops; this one makes code clearer */

/****************************************************************************/
TOKEN_CLASS next_token(/*void*/) /* function for ANSI C Translation Phase 3 */
/****************************************************************************/

#define Digits     '0': case '1': case '2': case '3': case '4': case '5':\
   case '6': case '7': case '8': case '9'
#define Double     '#': case '+': case '&': case '|'
#define Letter     'A': case 'B': case 'C': case 'D': case 'E': case 'F':\
 case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N':\
 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':\
 case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b':\
 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i':\
 case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p':\
 case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w':\
 case 'x': case 'y': case 'z': case '_'

#define Puncts     '(': case ')': case ',': case ';': case '[': case ']':\
 case '{': case '}'
#define Quotes     '"': case'\''
#define Simple     ':': case '?': case '~'
#define Single     '!': case '%': case '*': case'=' : case'^'
#define WhiteH     ' ': case'\t'
#define WhiteV    '\n': case'\v': case'\f'

{token_length=0; switch(next_char())

 case '.':   {if(!isdigit(look_ahead))                    /* period/ellipsis */
                {while(look_ahead=='.') next_char();     /* grab any number */
               case Puncts:     return(PUNCTUATOR);}                    /* arbitrary class */

            case Digits: while(isalnum(look_ahead) || look_ahead=='.' || look_ahead=='_')
                 if((next_char()=='e' || current_char=='E') /* opt exponent */
                 && ( look_ahead=='+' || look_ahead=='-')) next_char();
             return(NUMBER);                             /* unusual syntax! */

            case 'L':    if(look_ahead=='"' || look_ahead=='\'') next_char();  /* L"... */
             else                                        /* reserved words, */
             case Letter:    {while(isalnum(look_ahead) || look_ahead=='_') next_char();
                 return(IDENTIFIER);                     /* all identifiers */

                            case '<':        if(header_flag) current_char='>';       /* flag set in Ph4 */
                 else
                 case '>':           {if(look_ahead==current_char) next_char(); /* >> and << */
                     if(look_ahead=='=') next_char();    /*  >= >>= <= <<=  */
                     return(OPERATOR);}}                 /* compare & shift */

            case Quotes:{E_CHAR quote_char=current_char;             /* "_" '_' and <_> */
             do if(look_ahead=='\\') {next_char(); next_char();}  /* \n etc */
             while(next_char()!=quote_char && current_char!=EOF); /* match? */
             return(header_flag? HEADER: quote_char=='"'? STRING: CHARACTER);}

            case '/':    if(look_ahead=='*')                         /* start a comment */
              {next_char();                            /* kill current /, */
                 while(next_char()!='*' || look_ahead!='/')  /* and first * */
                     if(current_char==EOF) break;        /* don't lock loop */
                 next_char();    token_chars[0]=' ';     /* replace comment */
                 token_length=1; token_chars[1]='\0';    /* by single space */
             case WhiteH:     return(WHITE_HORIZONTAL);               /* may consolidate */
             case WhiteV:     return(WHITE_VERTICAL);                 /* in later phases */

             case '-':        if(look_ahead=='>') {next_char(); return(OPERATOR);} /* -> */
             case Double:     if(look_ahead==current_char){next_char(); return(OPERATOR);}}
            case Single: if(look_ahead=='=') next_char();            /* finds op= forms */
            case Simple: return(OPERATOR);                           /* deal with ++ += */

            case Others: return(OTHER);                              /* few unused char */
            case EOF:    return(END_OF_FILE);                        /* want EOF repeat */
            default:     return(UNKNOWN);}}                          /* is non printing */

/****************************************************************************/
TOKEN_CLASS print_token(/*void*/)          /* application specific function */
/****************************************************************************/

{current_token=next_token();    /* alter function to print selected info */

    if(current_token<END_OF_FILE)

      {switch (current_token)
         {
         case HEADER:
            token_count-=2;
            if (echo_tokens)
              printf("    #include %s shouldn't have counted; subtracting\n",
                   token_chars);
            break;
          case STRING:  
            if (echo_tokens)
                    printf("%5d-%d: %d [%s]\n", token_count+1, token_count+token_length,
                                current_token, token_chars);
                token_count += token_length;
                break;
          default:
                token_count++;
            if (echo_tokens)
                    printf("%5d: %d [%s]\n", token_count, current_token, token_chars);
          }
     }

    return(current_token);}

/****************************************************************************/
/* int main(int argc, char *argv[])/* program expects file-name as argument */
main(argc, argv) int argc; char *argv[];
/****************************************************************************/

{if((argc<2)||(argc>3)||((argc==3)&&(strcmp(argv[1],"-p"))))

   {fputs("Usage: tokount [-p] file.c\n",stderr); return(/*EXIT_FAILURE*/1);}

    echo_tokens=(argc==3);                     /* Print tokens if -p option */

    if(!(program_file=fopen(argv[argc-1], "r"))) {perror(argv[argc-1]); return(errno);}

        /*** do not alter this loop, put changes in print_token ***/

    do {while(print_token()==WHITE_VERTICAL||current_token==WHITE_HORIZONTAL);
        if(strcmp(token_chars, "#"))       continue;
        while(print_token()==WHITE_HORIZONTAL);
        if(strcmp(token_chars, "include")) continue;   /* double negative!! */
        header_flag=TRUE;
        while(print_token()==WHITE_HORIZONTAL);
        header_flag=FALSE;}
    while(current_token!=END_OF_FILE);

    if(ferror(program_file)) {perror(argv[argc-1]); return(errno);}

    printf("%d\n", token_count);

    return(/*EXIT_SUCCESS*/0);}

/********************************* the end **********************************/



Sun, 08 Oct 1995 22:42:18 GMT  
 
 [ 1 post ] 

 Relevant Pages 

1. CONCISENESS CONTEST ROUND IV -- tokount.c (new)

2. Conciseness Contest Round 5 -- tokount.c (the token counter)

3. CONCISENESS CONTEST ROUND 3 -- tokount.c

4. Conciseness Contest Round 2: tokount.c (token counter)

5. Programming Contest: Contest Clarification

6. CONCISENESS CONTEST ROUND V Winner #2

7. CONCISENESS CONTEST ROUND V Winner #2 (commented)

8. CONCISENESS CONTEST ROUND V Winner #1

9. CONCISENESS CONTEST ROUND V Results

10. Conciseness Contest Round 5 -- The Problem

11. Conciseness Contest Round 5 -- Announcement and Rules

12. CONCISENESS CONTEST ROUND 4 - the final word

 

 
Powered by phpBB® Forum Software