V7/usr/src/cmd/join.c
/* join F1 F2 on stuff */
#include <stdio.h>
#define F1 0
#define F2 1
#define NFLD 20 /* max field per line */
#define comp() cmp(ppi[F1][j1],ppi[F2][j2])
FILE *f[2];
char buf[2][BUFSIZ]; /*input lines */
char *ppi[2][NFLD]; /* pointers to fields in lines */
char *s1,*s2;
int j1 = 1; /* join of this field of file 1 */
int j2 = 1; /* join of this field of file 2 */
int olist[2*NFLD]; /* output these fields */
int olistf[2*NFLD]; /* from these files */
int no; /* number of entries in olist */
int sep1 = ' '; /* default field separator */
int sep2 = '\t';
char* null = "";
int unpub1;
int unpub2;
int aflg;
main(argc, argv)
char *argv[];
{
int i;
int n1, n2;
long top2, bot2;
long ftell();
while (argc > 1 && argv[1][0] == '-') {
if (argv[1][1] == '\0')
break;
switch (argv[1][1]) {
case 'a':
switch(argv[1][2]) {
case '1':
aflg |= 1;
break;
case '2':
aflg |= 2;
break;
default:
aflg |= 3;
}
break;
case 'e':
null = argv[2];
argv++;
argc--;
break;
case 't':
sep1 = sep2 = argv[1][2];
break;
case 'o':
for (no = 0; no < 2*NFLD; no++) {
if (argv[2][0] == '1' && argv[2][1] == '.') {
olistf[no] = F1;
olist[no] = atoi(&argv[2][2]);
} else if (argv[2][0] == '2' && argv[2][1] == '.') {
olist[no] = atoi(&argv[2][2]);
olistf[no] = F2;
} else
break;
argc--;
argv++;
}
break;
case 'j':
if (argv[1][2] == '1')
j1 = atoi(argv[2]);
else if (argv[1][2] == '2')
j2 = atoi(argv[2]);
else
j1 = j2 = atoi(argv[2]);
argc--;
argv++;
break;
}
argc--;
argv++;
}
for (i = 0; i < no; i++)
olist[i]--; /* 0 origin */
if (argc != 3)
error("usage: join [-j1 x -j2 y] [-o list] file1 file2");
j1--;
j2--; /* everyone else believes in 0 origin */
s1 = ppi[F1][j1];
s2 = ppi[F2][j2];
if (argv[1][0] == '-')
f[F1] = stdin;
else if ((f[F1] = fopen(argv[1], "r")) == NULL)
error("can't open %s", argv[1]);
if ((f[F2] = fopen(argv[2], "r")) == NULL)
error("can't open %s", argv[2]);
#define get1() n1=input(F1)
#define get2() n2=input(F2)
get1();
bot2 = ftell(f[F2]);
get2();
while(n1>0 && n2>0 || aflg!=0 && n1+n2>0) {
if(n1>0 && n2>0 && comp()>0 || n1==0) {
if(aflg&2) output(0, n2);
bot2 = ftell(f[F2]);
get2();
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
if(aflg&1) output(n1, 0);
get1();
} else /*(n1>0 && n2>0 && comp()==0)*/ {
while(n2>0 && comp()==0) {
output(n1, n2);
top2 = ftell(f[F2]);
get2();
}
fseek(f[F2], bot2, 0);
get2();
get1();
for(;;) {
if(n1>0 && n2>0 && comp()==0) {
output(n1, n2);
get2();
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
fseek(f[F2], bot2, 0);
get2();
get1();
} else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
fseek(f[F2], top2, 0);
bot2 = top2;
get2();
break;
}
}
}
}
return(0);
}
input(n) /* get input line and split into fields */
{
register int i, c;
char *bp;
char **pp;
bp = buf[n];
pp = ppi[n];
if (fgets(bp, BUFSIZ, f[n]) == NULL)
return(0);
for (i = 0; ; i++) {
if (sep1 == ' ') /* strip multiples */
while ((c = *bp) == sep1 || c == sep2)
bp++; /* skip blanks */
else
c = *bp;
if (c == '\n' || c == '\0')
break;
*pp++ = bp; /* record beginning */
while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
bp++;
*bp++ = '\0'; /* mark end by overwriting blank */
/* fails badly if string doesn't have \n at end */
}
*pp = 0;
return(i);
}
output(on1, on2) /* print items from olist */
int on1, on2;
{
int i;
char *temp;
if (no <= 0) { /* default case */
printf("%s", on1? ppi[F1][j1]: ppi[F2][j2]);
for (i = 0; i < on1; i++)
if (i != j1)
printf("%c%s", sep1, ppi[F1][i]);
for (i = 0; i < on2; i++)
if (i != j2)
printf("%c%s", sep1, ppi[F2][i]);
printf("\n");
} else {
for (i = 0; i < no; i++) {
temp = ppi[olistf[i]][olist[i]];
if(olistf[i]==F1 && on1<=olist[i] ||
olistf[i]==F2 && on2<=olist[i] ||
*temp==0)
temp = null;
printf("%s", temp);
if (i == no - 1)
printf("\n");
else
printf("%c", sep1);
}
}
}
error(s1, s2, s3, s4, s5)
char *s1;
{
fprintf(stderr, "join: ");
fprintf(stderr, s1, s2, s3, s4, s5);
fprintf(stderr, "\n");
exit(1);
}
cmp(s1, s2)
char *s1, *s2;
{
return(strcmp(s1, s2));
}