[This is about all I'm going to accept on this thread, and is about as
far as code is allowed to drift. -mod]
Quote:
>I don't know about a tool to specifically convert C++ to C comment styles
>but I do know a quick way to do it using an editor such as emacs which
>supports macros. Basically, record a macro which finds the string "//",
>deletes it and inserts "/*" executes the goto end of line command and
>inserts "*/".
This is just s://.*$:/* & */:
in ed or sed. The whole thing is probably a one-liner in PERL.
However, you should be aware that it has some problems:
(1) If // appears in a string or comment it will be trashed.
char *URL = "http://www.foo.bar.com/~zoo/snark.html";
will be trashed to
char *URL = "http:/* www.foo.bar.com/~zoo/snark.html"; */
/* Here we implement the MATLAB // operator */
will be trashed to
/* Here we implement the MATLAB /* operator */ */
(2) If /* appears in a // comment things will go wrong.
// Delete all files matching the pattern ./*/.core
will be trashed to
/* Delete all files matching the pattern ./*/.core */
which ends too early.
There really is _no_ substitute for a tool which understands C lexing.
Here's a lexer for ANSI C
me from outside HP (via Vick Khera of CMU), and I have sort of fixed it
up to conform to the latest draft of the standard (Dec 88). I'm not
sure if I missed out on anything.."
You will need to modify it to recognise // comments, which is as easy
as adding a line
"//" { cpp_comment(); }
and writing a suitable cpp_comment() function that
- writes /*
- copies characters to the end of the line,
inserting a ' ' before and after each '/'
- writes */
I leave this as an exercise for the reader.
---------------- scan.l ----------------
O [0-7]
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E [Ee][+-]?{D}+
FS [fFlL]
IS ([uU][lL]?|[lL][uU]?)
%{
#include <stdio.h>
#ifndef FILENAME_MAX
#define FILENAME_MAX 1024
#endif
char yyfilename[FILENAME_MAX+1];
#include "scanaux.h"
#include "y.tab.h"
#undef input
extern int input ();
%}
%%
"#" { line_number(); }
"/*" { comment(); }
"auto" { return AUTO; }
"break" { return BREAK; }
"case" { return CASE; }
"char" { return CHAR; }
"const" { return CONST; }
"continue" { return CONTINUE; }
"default" { return DEFAULT; }
"do" { return DO; }
"double" { return DOUBLE; }
"else" { return ELSE; }
"enum" { return ENUM; }
"extern" { return EXTERN; }
"float" { return FLOAT; }
"for" { return FOR; }
"goto" { return GOTO; }
"if" { return IF; }
"int" { return INT; }
"long" { return LONG; }
"register" { return REGISTER; }
"return" { return RETURN; }
"short" { return SHORT; }
"signed" { return SIGNED; }
"sizeof" { return SIZEOF; }
"static" { return STATIC; }
"struct" { return STRUCT; }
"switch" { return SWITCH; }
"typedef" { return TYPEDEF; }
"union" { return UNION; }
"unsigned" { return UNSIGNED; }
"void" { return VOID; }
"volatile" { return VOLATILE; }
"while" { return WHILE; }
{L}({L}|{D})* { return check_type(); }
0[xX]{H}+{IS}? { return CONSTANT; /* hexadecimal constant */}
0{O}*{IS}? { return CONSTANT; /* octal constant */}
[1-9]{D}*{IS}? { return CONSTANT; /* decimal constant */}
{D}+{E}{FS}? { return CONSTANT; }
{D}*"."{D}+({E})?{FS}? { return CONSTANT; }
{D}+"."{D}*({E})?{FS}? { return CONSTANT; }
'(\\.|[^\\'])+' { return CONSTANT; /* L'c' NOT handled */}
\"(\\.|[^\\"])*\" { return STRING_LITERAL; /* L"s" NOT handled */}
"..." { return ELLIPSIS; }
">>=" { return RIGHT_ASSIGN; }
"<<=" { return LEFT_ASSIGN; }
"+=" { return ADD_ASSIGN; }
"-=" { return SUB_ASSIGN; }
"*=" { return MUL_ASSIGN; }
"/=" { return DIV_ASSIGN; }
"%=" { return MOD_ASSIGN; }
"&=" { return AND_ASSIGN; }
"^=" { return XOR_ASSIGN; }
"|=" { return OR_ASSIGN; }
">>" { return RIGHT_OP; }
"<<" { return LEFT_OP; }
"++" { return INC_OP; }
"--" { return DEC_OP; }
"->" { return PTR_OP; }
"&&" { return AND_OP; }
"||" { return OR_OP; }
"<=" { return LE_OP; }
">=" { return GE_OP; }
"==" { return EQ_OP; }
"!=" { return NE_OP; }
";" { return ';'; }
"{" { return '{'; }
"}" { return '}'; }
"," { return ','; }
":" { return ':'; }
"=" { return '='; }
"(" { return '('; }
")" { return ')'; }
"[" { return '['; }
"]" { return ']'; }
"." { return '.'; }
"&" { return '&'; }
"!" { return '!'; }
"~" { return '~'; }
"-" { return '-'; }
"+" { return '+'; }
"*" { return '*'; }
"/" { return '/'; }
"%" { return '%'; }
"<" { return '<'; }
">" { return '>'; }
"^" { return '^'; }
"|" { return '|'; }
"?" { return '?'; }
[ \t\v\n\f] { }
. { /* ignore bad characters */ }
%%
int yycolumn = 0;
yywrap()
{
return 1;
}
int inpeek()
{
if (yysptr > yysbuf) {
/* retrieve pushed-back character */
return yysptr[-1];
} else {
int c = getc(yyin);
if (c == EOF) return 0;
ungetc(c, yyin);
return c;
}
}
int input()
{
if (yysptr > yysbuf) {
/* retrieve pushed-back character */
yytchar = *--yysptr;
} else {
yytchar = getc(yyin);
if (yytchar == EOF) return 0;
if (input_echo) output(yytchar);
}
/* count yycolumn and yylineno */
if (yytchar == '\n') {
yylineno++;
yycolumn = 0;
} else
if (yytchar == '\t') {
yycolumn += 8 - yycolumn%8;
} else {
yycolumn++;
}
return yytchar;
}
comment()
{
int c;
int oldline = yylineno, oldcol = yycolumn;
while ((c = input()) != 0 && (c != '*' || inpeek() != '/')) {}
if (c == 0) {
fprintf(stderr,
"Unterminated /*comment began at column %d of line %d\n",
oldcol, oldline);
exit(EXIT_FAILURE);
}
(void) input();
}
#define READWHILE(cond) while(cond) c = input();
line_number()
{
char c;
int oldline = yylineno;
/* skip spaces and tabs */
do c = input(); while (c == ' ' || c == '\t');
if (c == 'i' /*ident*/ || c == 'p') {
do c = input(); while (c != '\n');
return;
}
/* check for a line number */
if (c >= '0' && c <= '9') {
int line_num = 0;
while (c >= '0' && c <= '9') {
line_num = line_num * 10 + c - '0';
c = input();
}
if (line_num > 0) yylineno = line_num - 1;
/* skip spaces and tabs */
while (c == ' ' || c == '\t') c = input();
/* check for a file name in double quotes */
if (c == '"') {
char *yf = yyfilename;
while ((c = input()) != 0 && c != '"') *yf++ = c;
*yf = '\0';
if (c == 0) {
fprintf(stderr, "bad directive at line %d\n", oldline);
exit(EXIT_FAILURE);
}
/* skip any remaining spaces and tabs */
do c = input(); while (c == ' ' || c == '\t');
}
}
/* The ANSI standard admits no characters after the file name */
/* GCC stuffs other numbers there, so play safe */
while (c != '\n') c = input();
}
int check_type()
{
return lookup_tdname(yytext, yyleng) ? TYPE_NAME : IDENTIFIER;
}
---------------- end of file ----------------
--
Australian citizen since 14 August 1996. *Now* I can vote the xxxs out!
Richard A. O'Keefe; http://www.cs.rmit.edu.au/~ok; RMIT Comp.Sci.