V7/usr/src/cmd/refer/inv1.c

Find at most related files.
including files from this version of Unix.

# include "stdio.h"
# include "assert.h"

main(argc, argv)
	char *argv[];
{
/* make inverted file indexes.  Reads a stream from mkey which
   gives record pointer items and keys.  Generates a set of files
      a. NHASH pointers to file b.
      b. lists of record numbers.
      c. record pointer items.

   these files are named xxx.ia, xxx.ib, xxx.ic;
   where xxx is taken from arg1.
   If the files exist they are updated.
*/

FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
int nhash 256;
int appflg 1;
int keepkey 0, pipein 0;
char nma[100], nmb[100], nmc[100], com[100], nmd[100];
char tmpa[20], tmpb[20], tmpc[20];
char *remove NULL;
int chatty 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
int i,j,k;
long keys;
int iflong =0;
char *sortdir;

sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp";
while (argv[1][0] == '-')
	{
	switch(argv[1][1])
		{
		case 'h': /* size of hash table */
			nhash = atoi (argv[1]+2); break;
		case 'n': /* new, don't append */
			appflg=0; break;
		case 'a': /* append to old file */
			appflg=1; break;
		case 'v': /* verbose output */
			chatty=1; break;
		case 'd': /* keep keys on file .id for check on searching */
			keepkey=1; break;
		case 'p': /* pipe into sort (saves space, costs time)*/
			pipein = 1; break;
		case 'i': /* input is on file, not stdin */
			close(0);
			if (open(argv[2], 0) != 0)
				err("Can't read input %s", argv[2]);
			if (argv[1][2]=='u') /* unlink */
				remove = argv[2];
			argc--; argv++;
			break;
		}
	argc--;
	argv++;
	}

strcpy (nma, argc >= 2 ? argv[1] : "Index");
strcpy (nmb, nma);
strcpy (nmc, nma);
strcpy (nmd, nma);
strcat (nma, ".ia");
strcat (nmb, ".ib");
strcat (nmc, ".ic");
strcat (nmd, ".id");

sprintf(tmpa, "junk%di", getpid());
if (pipein)
	{
	pipe(fp); fr=fp[0]; fw=fp[1];
	if ( (pfork=fork()) == 0)
		{
		close(fw);
		close(0);
		_assert(dup(fr)==0);
		close(fr);
		execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
		execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
		_assert(0);
		}
	_assert(pfork!= -1);
	close(fr);
		fta = fopen("/dev/null", "w");
		close(fta->_file);
		fta->_file = fw;
	}
else /* use tmp file */
	{
	fta = fopen(tmpa, "w");
	_assert (fta != NULL);
	}
fb = 0;
if (appflg )
	{
	if (fb = fopen(nmb, "r"))
		{
		sprintf(tmpb, "junk%dj", getpid());
		ftb = fopen(tmpb, "w");
		if (ftb==NULL)
			err("Can't get scratch file %s",tmpb);
		nhash = recopy(ftb, fb, fopen(nma, "r"));
		fclose(ftb);
		}
	else
		appflg=0;
	}
fc = fopen(nmc,  appflg ? "a" : "w");
if (keepkey)
fd = keepkey ? fopen(nmd, "w") : 0;
docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
fclose(stdin);
if (remove != NULL)
	unlink(remove);
fclose(fta);
if (pipein)
	{
	pwait = wait(&status);
	printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
	_assert(pwait==pfork);
	_assert(status==0);
	}
else
	{
	sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
	system(com);
	}

if (appflg)
	{
	sprintf(tmpc, "junk%dk", getpid());
	sprintf(com, "mv %s %s", tmpa, tmpc);
	system(com);
	sprintf(com, "sort -T %s  -m %s %s -o %s", sortdir,
		tmpb, tmpc, tmpa);
	system(com);
	}
fta = fopen(tmpa, "r");
fa = fopen(nma, "w");
fb = fopen(nmb, "w");
whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
fclose(fta);
# ifndef D1
unlink(tmpa);
# endif
if (appflg)
	{
	unlink(tmpb);
	unlink(tmpc);
	}
if (chatty)
	
	printf ("%ld key occurrences,  %d hashes, %d docs\n",
		keys, hashes, docs);
}