Here's how you could do it:
/* recognize '...' otherwise see " as start of string: */
int c='\"', d='\'', e = '\012'; // comment line 3
/* recognize "..." otherwise see comments here: */
char s[] = "abc/*not a comment*/efg\"ZZ\'";
char t[] = "ABC//not a comment//EFG\x012\/\/";
char *p = ""; //
int dd = '/*'; // comment line 13
/*/*/
/**/
/*Z*/
/***/
/****/
/**A**/
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv)
{
FILE* f;
if (argc == 2 && (f = fopen(argv[1], "rt")) != NULL)
{
int c[2];
enum {
INITIAL,
CCOMMENT1,
CCOMMENT2,
CCOMMENT3,
CPPCOMMENT1,
CPPCOMMENT2,
STRING1,
STRING2,
CHAR1,
CHAR2,
} state = INITIAL;
if ((c[0] = fgetc(f)) == EOF)
return 0;
while ((c[1] = fgetc(f)) != EOF)
{
switch (state)
{
case INITIAL:
if (c[0] == '/' && c[1] == '*')
state = CCOMMENT1, printf("<C comment>\n");
else if (c[0] == '/' && c[1] == '/')
state = CPPCOMMENT1, printf("<C++ comment>\n");
else if (c[0] == '"')
state = STRING1, printf("<String literal>\n");
else if (c[0] == '\'')
state = CHAR1, printf("<Char literal>\n");
break;
case CCOMMENT1:
case CPPCOMMENT1:
/* skip * in /* and 2nd / in // */
state++;
break;
case CCOMMENT2:
if (c[0] == '*' && c[1] == '/')
state++, printf("\n</C comment>\n");
else
printf("%c", c[0]);
break;
case CCOMMENT3:
// skip / in */
state = INITIAL;
break;
case CPPCOMMENT2:
if (c[0] == '\n')
state = INITIAL, printf("\n</C++ comment>\n");
else
printf("%c", c[0]);
break;
case STRING1:
if (c[0] == '"')
state = INITIAL, printf("\n</String literal>\n");
else if (c[0] == '\\')
state = STRING2, printf("%c", c[0]);
else
printf("%c", c[0]);
break;
case STRING2:
// skip escaped character
state = STRING1, printf("%c", c[0]);
break;
case CHAR1:
if (c[0] == '\'')
state = INITIAL, printf("\n</Char literal>\n");
else if (c[0] == '\\')
state = CHAR2, printf("%c", c[0]);
else
printf("%c", c[0]);
break;
case CHAR2:
// skip escaped character
state = CHAR1, printf("%c", c[0]);
break;
}
c[0] = c[1];
}
fclose(f);
}
return 0;
}
Output of this program on its source code:
<C comment>
recognize '...' otherwise see " as start of string:
</C comment>
<Char literal>
\"
</Char literal>
<Char literal>
\'
</Char literal>
<Char literal>
\012
</Char literal>
<C++ comment>
comment line 3
</C++ comment>
<C comment>
recognize "..." otherwise see comments here:
</C comment>
<String literal>
abc/*not a comment*/efg\"ZZ\'
</String literal>
<String literal>
ABC//not a comment//EFG\x012\/\/
</String literal>
<String literal>
</String literal>
<C++ comment>
</C++ comment>
<Char literal>
/*
</Char literal>
<C++ comment>
comment line 13
</C++ comment>
<C comment>
/
</C comment>
<C comment>
</C comment>
<C comment>
Z
</C comment>
<C comment>
*
</C comment>
<C comment>
**
</C comment>
<C comment>
*A*
</C comment>
<String literal>
rt
</String literal>
<Char literal>
/
</Char literal>
<Char literal>
*
</Char literal>
<String literal>
<C comment>\n
</String literal>
<Char literal>
/
</Char literal>
<Char literal>
/
</Char literal>
<String literal>
<C++ comment>\n
</String literal>
<Char literal>
"
</Char literal>
<String literal>
<String literal>\n
</String literal>
<Char literal>
\'
</Char literal>
<String literal>
<Char literal>\n
</String literal>
<C comment>
skip * in /* and 2nd / in //
</C comment>
<Char literal>
*
</Char literal>
<Char literal>
/
</Char literal>
<String literal>
\n</C comment>\n
</String literal>
<String literal>
%c
</String literal>
<C++ comment>
skip / in */
</C++ comment>
<Char literal>
\n
</Char literal>
<String literal>
\n</C++ comment>\n
</String literal>
<String literal>
%c
</String literal>
<Char literal>
"
</Char literal>
<String literal>
\n</String literal>\n
</String literal>
<Char literal>
\\
</Char literal>
<String literal>
%c
</String literal>
<String literal>
%c
</String literal>
<C++ comment>
skip escaped character
</C++ comment>
<String literal>
%c
</String literal>
<Char literal>
\'
</Char literal>
<String literal>
\n</Char literal>\n
</String literal>
<Char literal>
\\
</Char literal>
<String literal>
%c
</String literal>
<String literal>
%c
</String literal>
<C++ comment>
skip escaped character
</C++ comment>
<String literal>
%c
</String literal>