/*
  rmcolumn: remove selected columns from a NoSQL table.

  Copyright (c) 1998,1999,2000,2001,2002,2003 Carlo Strozzi

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

  $Id: rmcolumn.c,v 1.2 2003/09/23 10:11:37 carlo Exp $

*/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/file.h>
#include <string.h>

#ifndef HELPDIR
#define HELPDIR "/usr/local/nosql/help"
#endif
#ifndef HELPFILE
#define HELPFILE (HELPDIR "/rmcolumn.txt")
#endif

/* malloc() block size, in bytes. Do not set to less than sizeof(int) */
#define ALLOCSIZ 32

/* max. column name length is 31 (plus the trailing zero) */
#define MAXCOLNAME 31

typedef struct Header {
  char *names,			/* column names */
       *cmd;			/* buffer for optional back-end cmd */
  int  top,			/* top used location in *names (0-n) */
       end,			/* last available position in *names */
       *offset,			/* element offsets in *names */
       otop,			/* top used location in *offset */
       oend,			/* last available position in *offset */
       ctop,			/* top used location in *cmd (0-n) */
       cend;			/* last available position in *cmd */
} Header;

struct {
  unsigned int header : 1;
  unsigned int debug : 1;
} flags;

/* global variables */
static char *progname;		/* global pointer to argv[0] */

/* Function declarations */

/* Unbuffered input routine */
static int getch(void) {
  char c;
  return (read(0, &c, 1) == 1) ? (unsigned char) c : EOF;
}

static int eusage(void) {
  fprintf(stderr, "Usage: %s [options] column-list'\n", progname);
  return 1;
}

static int etoolong(void) {
  fprintf(stderr, "%s: max. column name width (%d) exceeded\n",\
        progname,MAXCOLNAME-1);
  return 1;
}

static int colpos(Header *h, char *name) {

  int i;

  for (i = 0; i <= h->otop; i++)
      if (!strcmp(name, h->names + h->offset[i]) && h->offset[i] >= 0)
	return i;

  return -1;
}

/* Add new column to the table header */

static void addcol(Header *h, char *name) {

  static int init=1;

  /* init header if first time */

  if (init) {
     h->top = -1;
     h->end = -1;
     h->otop = -1;
     h->oend = -1;

     if ((h->names = (char *)malloc(h->end+=ALLOCSIZ)) == NULL ||
	  (h->offset = malloc(ALLOCSIZ*sizeof(int))) == NULL){
	perror(progname);
	exit(1);
     }
     h->oend += ALLOCSIZ;
     h->names[0] = '\0';			/* init string */
     init = 0;
  }

  /* ignore duplicated column names, if any */
  if (colpos(h, name) >= 0) return;

  /* use '>=' in comparison, to account for string terminator */
  if ((h->top+strlen(name)) >= h->end) { /* realloc needed */
     if ((h->names = (char *)realloc(h->names,h->end+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  /* add new column offset to the relevant array */
  if (h->otop >= h->oend) {		/* realloc needed */
     if ((h->offset = realloc(h->offset,ALLOCSIZ*sizeof(int))) == NULL) {
	perror(progname);
	exit(1);
     }
     h->oend += ALLOCSIZ;
  }

  /* add offset for the new column, relative to h->names */
  *(h->offset + ++h->otop) = (h->top + 1);

  /* add new name to column names */
  h->top += (sprintf(h->names + h->top + 1, "%s", name) + 1);
}

static void rmcol(Header *h, char *name) {

  int i;

  /* a column is logically removed by setting the associated
     field no. to -1 */

  if ((i=colpos(h, name)) >= 0) h->offset[i] = -1;
}

static int addcmd(Header *h, const char *cmd, int len) {

  static int init=1;
  int l;

  /* init h->cmd if first time */

  if (init) {
     h->ctop = -1;
     h->cend = -1;

     if ((h->cmd = (char *)malloc(h->cend+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
     h->cmd[0] = '\0';			/* init string (mandatory!) */
     init = 0;
  }

  l = strlen(cmd);

  if (len <= 0 || len > l) len = l;		/* set default */

  /* use '>=' in comparison, to account for string terminator */
  if ((h->ctop+len) >= h->cend) { /* realloc needed */
     if ((h->cmd = (char *)realloc(h->cmd,h->cend+=ALLOCSIZ)) == NULL) {
	perror(progname);
	exit(1);
     }
  }

  strncat(h->cmd, cmd, len);		/* append passed string to cmd */
  h->ctop += len;
  return len;
}

static void printhdr(Header *h) {

  int i, j;

  if (!flags.header) return;

   for (i=j=0; i <= h->otop; i++) {
      if (h->offset[i] >= 0) {

	 /* prepend TAB if not first column */
	 if (j++) printf("\t\001");
	 else printf("\001");

	 printf("%s", h->names + h->offset[i]);
      }
   }

   if (j) printf("\n");		/* add NL if at least one column */
}

int main(int argc, char *argv[]) {

  int i=0, j=0, min_args=2;
  char c;

  char tmpbuf[MAXCOLNAME] = "";	 	/* local work buffer */

  Header h;

  FILE *ifile = NULL, *ofile = NULL;

  const char *awk[] = {
      "BEGIN{FS=OFS=\"\\t\";}{print ",
      "}"
  };

  flags.header = 1; 		/* default it to print the header */

  progname = argv[0];

  while (++i < argc && *argv[i] == '-') {

    min_args++;

    if (!strcmp(argv[i], "-x") ||
  	     !strcmp(argv[i], "--debug")) flags.debug = 1;

    else if (!strcmp(argv[i], "-N") ||
  	     !strcmp(argv[i], "--no-header")) flags.header = 0;

    else if (!strcmp(argv[i], "-i") ||
    	     !strcmp(argv[i], "--input")) {

      if (++i >= argc || *argv[i] == '-') exit(eusage());

      min_args++;

      if ((ifile = freopen(argv[i], "r", stdin)) < 0) {
         perror(argv[i]);
         exit(1);
      }
    }

    else if (!strcmp(argv[i], "-o") ||
    	     !strcmp(argv[i], "--output")) {

      if (++i >= argc || *argv[i] == '-') exit(eusage());

      min_args++;

      if ((ofile = freopen(argv[i], "w", stdout)) < 0) {
    	 perror(argv[i]);
    	 exit(1);
      }
    }

    else if (!strcmp(argv[i], "-h") ||
    	     !strcmp(argv[i], "--help")) {

      execlp("grep","grep","-v","^#",HELPFILE,(char *) 0);
      perror("grep");
      exit(1);
    }
  }

  if (argc < min_args) exit(eusage());

  j = i;					/* save current value */
  i = 0;					/* Re-use counter */
  while ((c = getch()) != EOF) {

     if (c == '\001') continue;			/* ignore SOH chars */

     if (i >= MAXCOLNAME) exit(etoolong());

     if (c != '\t' && c != '\n') {
        tmpbuf[i++] = c;
	continue;
     }

     tmpbuf[i++] = '\0';			/* set terminator */
     addcol(&h, tmpbuf);			/* append to header */
     i = 0;

     if (c == '\n') {

	while (j < argc) rmcol(&h, argv[j++]);	/* handle removal request */

	addcmd(&h, awk[0], 0);			/* init AWK program */

	for (i=j=0; i <= h.otop; i++) {
	    if (h.offset[i] >= 0) {	/* skip deleted columns */
	       /* prepend comma if not first column */
	       if (j++) sprintf(tmpbuf, ",$(%d)", i+1);
	       else sprintf(tmpbuf, "$(%d)", i+1);
	       addcmd(&h, tmpbuf, 0);
	    }
	}

	addcmd(&h, awk[1], 0);

	if (flags.debug) fprintf(stderr, "mawk %s\n", h.cmd);

	if (!j) exit(0);		/* exit if no columns left */

	printhdr(&h);			/* print header to stdout */

        fflush(NULL);	/* Make sure the header is output */
        execlp("mawk", "mawk", h.cmd, (char *) 0);
        perror("mawk");
        exit(1);
     }
  }

  exit(0);			/* Not reached */
}

/* EOF */
