/* main.c  1992 dec 07  [gh]
+-----------------------------------------------------------------------------
| Abstract:
|    General purpose filter and file cleaning program.  It is named after an
|    excellent Norwegian detergent.
|
| Authorship:
|    Copyright (c) 1987-1995 Gisle Hannemyr.
|    This program is free software;  you can redistribute it and/or modify
|    it under the terms of the GNU General Public License, as published by
|    the Free Software Foundation. See the file "copying.txt" for details.
|
|    If you intend to distribute changed versions of this module, please add
|    a comment below describing what you have changed/added and why, and email
|    the file to me.  I maintain the module, and shall appreciate copies of
|    bug fixes and improved versions.
|    My email address is: <gisle@hannemyr.no>.
|
| Acknowledgments:
|    NeXT codes courtesy of Bjorn Asle Valde (valde@bergen.siodata.no)
|    SYS V.2 rename courtesy of Robert Andersson (ra@isncr.is.se)
|    VMS rename courtesy of Bjorn Larsen.
|    Thanks to Inge Arnesen for finding & fixing a bug, (and to Nils-Eivind
|    Naas for bringing it to my attention).
|
| History:
|    2.8 11 Aug 95 [gh] Fixed '%' inserted for leading space in ver. 2.7.
|    2.7 10 Aug 95 [gh] Converted it to ANSI C.
|    2.6 25 Aug 93 [gh] Made it handle CR sp sp sp NL properly
|    2.5  2 Jun 92 [gh] Made it easier to locate -g stuff.
|    2.4 13 Feb 92 [gh] Fixed bug in -l option parsing.
|    2.3 12 Feb 92 [gh] Added n switch to -l option.
|    2.2  6 Jan 92 [gh] Added -l option
|    2.1 29 Dec 89 [gh] Fixed pipe bug, added -v option, misc. speedups
|    2.0 22 Jan 89 [gh] Made it a filter, environment lookup, fixed pathbug.
|    1.6  7 Nov 88 [gh] Added ANSI interpretation.
|    1.5  6 Aug 88 [gh] Hacked it to not clobber original date.
|    1.4  7 Jul 88 [gh] Added general purpose conversion table.
|    1.3 13 Nov 87 [gh] Fixed find first so it works on true blue too.
|    1.2 11 Nov 87 [gh] Compensated for Turbo-C bug (isspace > 128 is bogus)
|    1.1 31 Aug 87 [gh] Added VMS.
|    1.0 30 Aug 87 [gh] Wrote it.
|
| Portability:
|    So far, PEP has been tested under MS-DOS and Unix.
|    The implementation dependencies are:
|      * How the compiler identify itself and the operating system.
|      * How microcomputer compilers simulate the UNIX end line terminator.
|      * How operating systems expands command line wildcards (eg. Unix does).
|
|    This dependencies are implemented using IFDEFs in pep.h. You should make
|    sure that exactly one of the following symbols (macro names) are defined:
|     * __MSDOS__  -- For MS-DOS and derivatives
|     * __UNIX__   -- For BSD UNIX (SYS V.3 and generic too?)
|     * __VMS__    -- For VMS
|
|    In addition, the following symbols may be twiddled if desired:
|     * STRICMP    -- Define this if linker complains about missing "stricmp"
|     * SYSV2      -- For SYS V.2 UNIX    (if no "rename" in standard lib)
|     * __TURBOC__ -- For Borlands TURBOC (undefine it if you want PEP to
|                     change the date on the files it filters).
|     * VMSV1      -- For VAX C V.1.x VMS (if no "rename" in standard lib)
|
|    Most compilers already predefines a macro that identify the target oper-
|    ating system.  Unfortunately, different vendors uses slightly different
|    symbols.  Please add to the section headlined  "canonize predefined
|    macroes" (below) if your compiler requires it.
|
|    Note: PEP makes some assumptions about standard headers.  See the file
|          "header.txt" for details.
|
|    Btw. -- these are the compilers I have used to make pep:
|     * MS-DOS,    Microsoft C ver. 5.1
|                  Borland C++ ver. 4.0
|     * Unix,      gcc 2.95.2
|
| Environment:
|    PEP -- should point to directory with conversion tables.
|
| Bugs:
|    * I do not recompile PEP on all operating systems at each release.
|      Software rot may cause a particular version to need some tweaking.
|      Please mail me if you experience this -- but please read the enclosed
|      file "header.txt" first.
+---------------------------------------------------------------------------*/


/*---( Includes )-----------------------------------------------------------*/

#define  MAIN

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "pep.h"
#ifdef __VMS__
#include <types.h>
#include <stat.h>
#else
#include <sys/types.h>
#include <sys/stat.h>
#endif
#ifdef __MSDOS__
#include <dos.h>
#include "bdmg.h"
#include <fcntl.h>
#include <io.h>
#endif


/*---( defines )------------------------------------------------------------*/

#define VERSION     "2.8"               /* Version number.                  */
#define TEMPFIL     "PEP.TMP"           /* Tempfile.                        */


/*---( constants )----------------------------------------------------------*/

/* This are split up because some compilers can't handle long strings. */
static char about[] = "\n\
Pep is a program that converts almost anything into plain text files.\n\n\
This program is free software;  you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License, as published by\n\
the Free Software Foundation. See the file \"copying.txt\" for details.\n\n\
Please report bugs!  Maintainer's email address is:\n\
   <gisle@hannemyr.no>.\n\n\
Updates will be posted on the following web sites:\n\
   http://folk.uio.no/gisle/enjoy/pep.html\n\n";
/* http://www.hannemyr.com/enjoy/pep.html\n\n"; */

static char usage[] = "   Usage: pep [options] [filename ...]\n\
   Valid options:\n\
\t-a          -- about pep\n\
\t-b          -- remove non ASCII-codes\n\
\t-c[size]    -- compress spaces to tabs\n\
\t-d+/-       -- convert to/from DEC 8 bit charset\n\
\t-e[guard]   -- interprete ANSI escape sequences\n\
\t-g[<file>]  -- get conversion table from file\n\
\t-h          -- print this quick summary\n\
\t-i+/-       -- convert to/from IBM-PC charset\n\
\t-k+/-       -- convert to/from \"Kman\" charset\n\
\t-l[n][size] -- split into lines of max. size\n\
\t-m+/-       -- convert to/from Macintosh charset\n\
\t-o[b]       -- write output to named files\n\
\t-p          -- display transformations and pause\n\
\t-s[size]    -- extract strings\n\
\t-t[size]    -- expand tabstops\n\
\t-u<term>    -- use special line terminator\n\
\t-v          -- terminate only paragraphs\n\
\t-w+/-       -- convert to/from WS document mode\n\
\t-x          -- expand non printing chars\n\
\t-z          -- zero parity bit\n";


/*---( variables )----------------------------------------------------------*/

static int  cright = TRUE;                  /* Flag copyright undisplayed   */
static int  guardl = 0;                     /* ANSI overwrite guard level   */

long LCount = 0L;                           /* Global line count            */
int  LineXx =  0;                           /* Horisontal position on line. */
int  ITabSz =  8;                           /* Input  tabulator size.       */
int  OTabSz =  8;                           /* Output tabulator size.       */
int  StrSiz =  4;                           /* String size for strings.     */
int  LinSiz =  0;                           /* Max line length.             */
#if __UNIX__ || __VMS__
int  EndOLn = '\n';                         /* Under UNIX, default is LF;   */
#else
int  EndOLn = -1;                           /* else, the default is CRLF.   */
#endif

static  int  backup = FALSE;                /* Keep backup copy.            */
        int  bflagb = FALSE;                /* Binary wash.                 */
        int  cflagc = FALSE;                /* Compress                     */
        int  dflagd = FALSE;                /* DEC character set.           */
static  int  eflage = FALSE;                /* ANSI escape sequences.       */
        int  gflagg = FALSE;                /* General fold table           */
        int  iflagi = FALSE;                /* IBM character set.           */
        int  kflagk = FALSE;                /* Kman character set.          */
        int  lflagl = TRUE;                 /* Have blank line after par.   */
        int  mflagm = FALSE;                /* MAC character set.           */
#ifdef __VMS__
static  int  oflago = TRUE;                 /* VMS has no pipes.            */
#else
static  int  oflago = FALSE;                /* Write output on files.       */
#endif
static  int  pflagp = FALSE;                /* Pause.                       */
        int  sflags = FALSE;                /* String extraction.           */
        int  tflagt = FALSE;                /* Tab expansion                */
static  int  uflagu = FALSE;                /* Use special line terminator. */
        int  vflagv = FALSE;                /* Terminate only paragraphs.   */
        int  wflag0 = FALSE;                /* From WS doc. mode to 7-bit.  */
        int  wflag1 = FALSE;                /* From 7-bit to WS doc. mode.  */
        int  xflagx = FALSE;
        int  zflagz = FALSE;


/*---( housekeeping )-------------------------------------------------------*/


/*
| Abs: Write control or meta character ii on stderr using standard conventions.
*/
static void fputctl(int ii)
{
   if ((ii < 0) || (ii > 255)) fputs("<BOGUS>",stderr);
   else if (ii == 255) fputs("M-del",stderr);
   else if (ii == 127) fputs("DEL",stderr);
   else {
      if (ii >= 128) { fputc('M',stderr); fputc('-',stderr); ii -= 128; }
      if (ii <  ' ') { fputc('^',stderr); ii += '@'; }
      fputc(ii,stderr);
   } /* else */
} /* fputctl */


/*
| Abs: Display copyright notice.
| Sef: Sets the cright flag FALSE so that we only displays it once.
*/
static void showcright(void)
{
    char platform[13];

    platform[0] = '\0';
#ifdef __MSDOS__
    strcat(platform, "MSDOS");
#endif
#ifdef __UNIX__
    strcat(platform, "Unix");
#endif
#ifdef __VMS__
    strcat(platform, "VMS");
#endif
   fprintf(stderr,"pep  ver. %s(%s); Copyright (c) 1995 Gisle Hannemyr\n",VERSION, platform);
   cright = FALSE;
} /* showcright */


/*
| Abs: Display message and abort.
*/
void mess(int err)
{
   if (cright) showcright();
   fputs("pep: ",stderr);
   switch(err) {
      case  1: fputs("incompatible options",       stderr); break;
      case  2: fputs("missing '+' or '-'",         stderr); break;
      case  3: fputs("bad guard digit",            stderr); break;
      case  4: fputs("invalid line terminator",    stderr); break;
      case  5: fputs("no more room",               stderr); break;
      case  6: fputs("bad conversion table",       stderr); break;
      case  7: fputs("no matching files",          stderr); break;
      case  8: fputs("cannot pause reading stdin", stderr); break;
      case  9: fputs("sorry, not yet implemented", stderr); break;
      case 10: fputs("line length must be >= 10",  stderr); break;
      default: fputs("unknown error",              stderr); break;
   } /* switch */
   putc('\n',stderr);
   exit(ERROR_EXIT);
} /* mess */


void showprogress(void)
{
   LCount++;
   if (oflago && ((LCount % 64) == 0)) fprintf(stderr,"\r%ld ",LCount);
} /* showprogress */


/*
| Abs: Parse desired line terminator.
| Des: r        = CR
|      n        = NL
|      s        = RS
|      #        = CRLF
|      -        = none
|      <number> = use this as the terminator
| Sef: IFrst, ILast, ILimit.
*/
static void getterm(char *ss)
{
   if       (*ss == 'n')  EndOLn = '\n';
   else if  (*ss == 'r')  EndOLn = '\r';
   else if  (*ss == 's')  EndOLn =   30;
   else if  (*ss == '#')  EndOLn =   -1;
   else if  (*ss == '-')  EndOLn =   -2;
   else if (isdigit(*ss)) EndOLn = atoi(ss);
   else mess(4);
} /* getterm */


/*
| Abs: Check toggle.
| Ret: TRUE if toggle is on, else off.
*/
static BOOL swchk(char dd)
{
   if ((dd != '+') && (dd != '-')) mess(2);
   return(dd == '+');
} /* swchk */


/*
| Abs: Check fold direction and set up direction flags.
| Des: IFrst set to 1 if folding to 8 bit character set.
| Sef: IFrst, ILast, ILimit.
*/
static void folddir(char dd)
{
   if (swchk(dd)) { IFrst = 1; ILast = 0; ILimit =  91; }
   else           { IFrst = 0; ILast = 1; ILimit = 128; }
} /* folddir */


/*
| Abs: Show transformations.
| Imp: Moved from main() coz MS-C don't like big main functions.
*/
static void showoptions(char *tabledir, char *cname)
{
   int  cc;

   fputs("Transformations:\n",stderr);
   if (!vflagv) fputs(" * stripping all trailing spaces;\n",stderr);

   if (gflagg) {
      fprintf(stderr," * translating using table in file \"%s\";\n",cname);
      readtable(tabledir,cname,TRUE);
   } /* if gflagg */

#ifdef __TURBOC__
   fputs(" * preserving file dates;\n",stderr);
#endif
   if (oflago) fputs(" * output file is input file (not stdout);\n",stderr);
   if (backup) fputs(" * creating .BAK copies of input file;\n",stderr);
   if (bflagb) fprintf(stderr," * %sing non ASCII-codes;\n", xflagx ? "expand" : "remov");
   if (dflagd || iflagi || mflagm) {
      char *machine;
      if      (dflagd) machine = "DEC";
      else if (iflagi) machine = "IBM";
      else if (mflagm) machine = "MAC";
      if (IFrst) fprintf(stderr," * Norwegian 7-bit ==> 8-bit %s charset", machine);
      else       fprintf(stderr," * 8-bit %s charset ==> norwegian 7-bit", machine);
      if (kflagk) fputs(",\n   using \\\\ for Norwegian \"OE\";\n",stderr);
      else        fputs(";\n",stderr);
   } /* if (dflagd || iflagi || mflagm) */
   if (sflags) fprintf(stderr," * extracting strings >= %d characters;\n",StrSiz);
   if (LinSiz) {
       fprintf(stderr," * splitting long lines into max. %d characters;\n",LinSiz);
       if (lflagl) fputs(" * separating paragraphs by blank lines;\n",stderr);
   }
   if (cflagc && tflagt) {
      fputs(" * optimizing out spaces from tabulation;\n",stderr);
      if (ITabSz != OTabSz) fprintf(stderr," * repacking tabs, size: %d --> %d;\n",ITabSz,OTabSz);
   } else {
      if (tflagt) fprintf(stderr," * expanding tabs, size: %d;\n",ITabSz);
      if (cflagc) fprintf(stderr," * inserting tabs, size: %d;\n",OTabSz);
   }
   if (eflage) fputs(" * interpreting ANSI escape sequences;\n",stderr);

   if (wflag0) fputs(" * WS doc. mode ==> 7-bit text files;\n",stderr);
   if (wflag1) fputs(" * 7-bit text files ==> WS doc. mode;\n",stderr);
   else {
      if (EndOLn ==  -2 ) fputs(" * removing line terminators",stderr);
      else {
         if (vflagv) fputs(" * terminating paragraphs only with ",stderr);
         else        fputs(" * terminating lines with ",stderr);
         if      (EndOLn ==  -1 ) fputs("CRLF",stderr);
         else if (EndOLn == '\n') fputs("LF",  stderr);
         else if (EndOLn == '\r') fputs("CR",  stderr);
         else if (EndOLn ==  30 ) fputs("RS",  stderr);
         else fputctl(EndOLn);
      } /* if EndOLn != -2 */
   } /* if else not WS */
#ifdef __VMS__
   fputs(" (Stream_LF);\n",stderr);
#else
   fputs(";\n",stderr);
#endif

   if (zflagz) fputs(" * zeroing parity bit;\n",stderr);
   fprintf(stderr," * %sing control characters.\n", xflagx ? "expand" : "remov");
   if (pflagp) {
      fputs("\nHit CTRL-C to abort, RETURN continue. ",stderr);
      cc = getc(stdin);
      if (cc == 3) exit(ERROR_EXIT);   /* Not necessary for cooked microes. */
   } /* if */
} /* showoptions */


/*
| Abs: Do a complete file.
| Sef: Zero line cont.
*/
static void dofile(int eflage)
{
#ifndef __VMS__
   /* Speed things up using a bigger I/O buffer. */
   if (setvbuf(Fdi,NULL,_IOFBF,16384) || setvbuf(Fdo,NULL,_IOFBF,16384))
      mess(5); /* No more room */
#endif
   LCount = 0L;
   if (eflage) doansi(guardl); else doplain();
} /* dofile */


/*---( main )---------------------------------------------------------------*/

main(int argc, char **argv)
{
   struct stat statbuf;
   unsigned int statype;
   unsigned int statmod;
   char *tabledir;
   char *cname, *nname, *ss;
   int  cc;
   char dd;
#ifdef __MSDOS__
   char *cp;
   struct DIRLIST *first, *last;
#endif
#ifdef __TURBOC__
   struct ftime *filtim;
   int hh;
#endif

#ifdef __MSDOS__
   _fmode = O_BINARY;         /* Tell MS-C & Borland C not to expand CRLF.  */
   tabledir = argv[0];        /* Look for tables in startup-directory.      */

   if (ss = strrchr(tabledir,DIRCHAR)) *ss = '\0';
   else tabledir = NULL;
#else
   tabledir = NULL;           /* No such startup convention for other OS's. */
#endif

   inittable();

   argc--; argv++;           /* skip program name  */
   while (argc && (**argv == '-')) {
      (*argv)++;             /* skip initial '-'   */
      cc = **argv;           /* option letter      */
      (*argv)++;             /* skip option letter */
      dd = **argv;           /* arg argument       */
      switch (cc) {
         case 'a': showcright(); fputs(about, stderr);
                   exit(NORML_EXIT);
         case 'b': bflagb++; break;
         case 'c': cflagc++; if (**argv != '\0') OTabSz = atoi(*argv); break;
         case 'd': dflagd++; folddir(dd); break;
         case 'e': eflage++; if (**argv != '\0') guardl = atoi(*argv);
                   tflagt++;
                   break;
         case 'g': gflagg++;
	   	   cname = *argv;
	   	   if (!*cname) {
			showcright();
			listtable(tabledir);
			exit(NORML_EXIT);
		   } /* if */
	   	   break;
         /*   'h': OK to fall thru' to default */
         case 'i': iflagi++; folddir(dd); break;
         case 'k': kflagk++; folddir(dd); break;
         case 'l': LinSiz = 72;
                   if (**argv != '\0') {
                        if (**argv == 'n') { lflagl = FALSE; (*argv)++; }
                        if (**argv != '\0') LinSiz = atoi(*argv);
                   } /* if */
                   break;
         case 'm': mflagm++; folddir(dd); break;
         case 'o': oflago++; if (dd == 'b') backup++; break;
         case 'p': pflagp++; break;
         case 's': sflags++; if (**argv != '\0') StrSiz = atoi(*argv); break;
         case 't': tflagt++; if (**argv != '\0') ITabSz = atoi(*argv); break;
         case 'u': uflagu++; if (**argv != '\0') getterm(*argv); else mess(4); break;
         case 'v': vflagv++; break;
         case 'w': if (swchk(dd))  tflagt = wflag1 = TRUE;
                   else            zflagz = wflag0 = TRUE;
                   break;
         case 'x': xflagx++; break;
         case 'z': zflagz++; break;
         default : showcright();
                   fputs(usage,stderr);
                   exit(NORML_EXIT);
      } /* switch */
      argc--; argv++;
   } /* while options */

   if (pflagp && !argc) mess(8);
   if (LinSiz && eflage) mess(1);
   if (LinSiz && vflagv) mess(1);
   if ((dflagd + gflagg + iflagi + mflagm + zflagz) > 1) mess(1);
   if ((guardl < 0) || (guardl > 2)) mess(3);
   if (kflagk && !(dflagd + mflagm)) iflagi++;
   if (!uflagu && dflagd && IFrst) EndOLn = '\n'; /* To DEC (Dec uses LF as terminator) */
   if (!uflagu && iflagi && IFrst) EndOLn = -1;   /* To IBM (Uses  CRLF  as terminator) */
   if (!uflagu && mflagm && IFrst) EndOLn = '\r'; /* To Mac (Mac uses CR as terminator) */
   if (LinSiz && (LinSiz < 10)) mess(10);

   Fdi = stdin;              /* Default */
   Fdo = stdout;
#ifdef __MSDOS__
   setmode(fileno(Fdi),_fmode); /* Make sure that even braindamaged  MS-DOS */
   setmode(fileno(Fdo),_fmode); /*    are transparent when redirecting i/o. */
#endif
   if (!argc) { /* Doing standard input */
      if      (pflagp) showoptions(tabledir,cname);
      else if (gflagg) readtable(tabledir,cname,FALSE);
      if      (oflago) mess(7);
      dofile(eflage);
   } else {
      showcright();
      showoptions(tabledir,cname);
#ifdef __MSDOS__
      first = expwildcard(argv);
      if (!first) mess(7);
      /* Inv: first now points to a start of linked list of files. */
      last = first;
      while (last) {
         cname = last->fnam;
         if (oflago) {
            putc('\r',stderr);
            putc('\n',stderr);
#else
      while (argc) {
         cname = *argv;
         if (oflago) {
            putc('\n',stderr);
#endif
            fputs(cname,stderr);
         } /* if oflago */

#ifdef __UNIX__
         if (lstat(cname,&statbuf)) {
#else
         if (stat(cname,&statbuf)) {
#endif
            if (!oflago) fputs(cname,stderr);
            fputs(": can't access\n", stderr);
            goto cont;
         } /* if not stat */
         statmod = statbuf.st_mode;
         statype = statbuf.st_mode &  S_IFMT;

         if (statype != S_IFREG) {
            if (!oflago) fputs(cname,stderr);
            if      (statype == S_IFDIR) fputs(": directory\n", stderr);
#ifdef __UNIX__
            else if (statype == S_IFLNK) fputs(": symbolic link\n", stderr);
#endif
            else                         fputs(": special file\n", stderr);
            goto cont;
         } /* if not a regular file */

         if ((Fdi = fopen(cname,"r")) == NULL) {
            if (!oflago) fputs(cname,stderr);
            fputs(": can't open\n", stderr);
            goto cont;
         }
         if (oflago) if ((Fdo = fopen(TEMPFIL,"w")) == NULL) {
            fputs(": can't create tmpfile\n",stderr); exit(ERROR_EXIT);
         }
#ifdef __TURBOC__
         if (!(filtim = (struct ftime *)malloc(sizeof(struct ftime))))
            mess(5); /* No more room */
         getftime(fileno(Fdi),filtim);
#endif
         if (oflago) putc('\n',stderr);
         dofile(eflage);
         if (oflago) fprintf(stderr,"\r   Done. %ld lines written.",LCount);

         if (ferror(Fdi) || ferror(Fdo)) {
            fputs("\npep: I/O error, file unchanged\n", stderr);
         } else {
            fclose(Fdi);
            if (oflago) {
               fclose(Fdo);
               if (chmod(TEMPFIL,statmod)) fputs("\npep: could not set mode\n",stderr);
#ifdef __TURBOC__
               if ((hh  = open(TEMPFIL,O_RDONLY|O_BINARY)) != -1) setftime(hh,filtim);
               close(hh);
#endif
            } /* if (oflago) */

            cc = 0; /* error flag */
            if (backup) {
               if (!(nname = (char *)malloc(strlen(cname)+5))) mess(5);
               strcpy(nname,cname);
               if (ss = strrchr(nname,'.')) *ss = '\0';
               strcat(nname,".BAK");
               unlink(nname);
#ifdef __UNIX__
               if (strcmp(nname,cname)) {  /* UNIX names are case sensitive */
#else
               if (stricmp(nname,cname)) { /*    other filenames are not.   */
#endif
                  if (rename(cname,nname)) cc++;
               } /* if same names */
            } else {
               if (oflago) if (unlink(cname)) cc++;
            } /* if .. else no backup */
            if (oflago) if (rename(TEMPFIL,cname)) cc++;
            if (cc) {
               fprintf(stderr, "\npep: error creating %s, file is %s\n",cname,TEMPFIL);
               exit(ERROR_EXIT);
            } /* if error */
         } /* if .. else close file */
cont:
#ifdef __MSDOS__
         last = last->next;
      } /* while */
   dispwildcard(first);
#else
         argc--; argv++;
      } /* while */
      if (oflago) putc('\n',stderr);
#endif
   } /* if .. else read named files */
   return(NORML_EXIT);
}  /* main */

/* EOF */
