2007年11月28日星期三

lex/yacc系列:在lex中处理多个输入文件

使用lex的文件尾处理程序,可以让程序处理多个输入文件。

yylex()到达文件结尾的时候,它调用yywrap(),该函数返回0或者1。如果返回1,意味着程序完成,没有更多的输入;如果返回值是0,词法分析程序假设yywrap()已经打开了另一个文件,然后继续从yyin读取数据。默认的yywrap()总是返回1。通过提供自己的yywrap()版本,可以使程序读取命令行传递的所有文件,一次读取一个。

下面是可以处理多个文件的单词计数程序的完整源代码:

%{
unsigned long charCount = 0, wordCount = 0, lineCount = 0;

#undef yywrap
%}

word [^ \t\n]+
eol \n

%%
{word} { ++wordCount; charCount += yyleng; }
{eol} { ++charCount; ++lineCount; }
. ++charCount;

%%
char **fileList;
unsigned nFiles;
unsigned currentFile = 0;
unsigned long totalCC = 0;
unsigned long totalWC = 0;
unsigned long totalLC = 0;

int
main(int argc, char *argv[])
{
 fileList = argv + 1;
 nFiles = argc - 1;

 if (nFiles == 1) {
  FILE *file;

  currentFile = 1;
  file = fopen(argv[1], "r");
  if (!file) {
   fprintf(stderr, "could not open file %s\n", argv[1]);
   exit(1);
  }
  yyin = file;
 }

 if (nFiles > 1)
  yywrap();

 yylex();

 if (nFiles > 1) {
  printf("%8lu %8lu %8lu %s\n", lineCount, wordCount, charCount,
         fileList[currentFile - 1]);
  totalCC += charCount;
  totalWC += wordCount;
  totalLC += lineCount;
  printf("%8lu %8lu %8lu total\n", totalLC, totalWC, totalCC);
 } else
  printf("%8lu %8lu %8lu\n", lineCount, wordCount, charCount);

 return 0;
}

int
yywrap()
{
 FILE *file = 0;

 if ((currentFile != 0) && (nFiles > 1) && (currentFile < nFiles)) {
  printf("%8lu %8lu %8lu %s\n", lineCount, wordCount, charCount,
         fileList[currentFile - 1]);
  totalCC += charCount;
  totalWC += wordCount;
  totalLC += lineCount;
  charCount = wordCount = lineCount = 0;
  fclose(yyin);
 }

 while (fileList[currentFile]) {
  file = fopen(fileList[currentFile], "r");
  ++currentFile;
  if (file) {
   yyin = file;
   break;
  }
  fprintf(stderr, "could not open file %s\n", fileList[currentFile - 1]);
 }

 return (file ? 0 : 1);
}

1 条评论:

匿名 说...

谢谢博主,写的很好