lynx2gph

Unnamed repository; edit this file 'description' to name the repository.
git clone https://git.inz.fi/lynx2gph
Log | Files | Refs | README

lynx2gph.c (7774B)


      1 #define _POSIX_C_SOURCE 200809L
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 #include <stdbool.h>
      6 #include <ctype.h>
      7 
      8 #include "config.h"
      9 
     10 struct segment {
     11 	struct segment *next;
     12 	const char *text;
     13 	unsigned detail;
     14 };
     15 
     16 struct line
     17 {
     18 	struct segment *segs;
     19 	struct line *next;
     20 	char *text;
     21 };
     22 
     23 struct lq {
     24 	unsigned *ref_n;
     25 	size_t maxref;
     26 	size_t sz;
     27 	struct line *head;
     28 	struct line *tail;
     29 };
     30 
     31 static inline void *memdup(const void *data, size_t sz)
     32 {
     33 	return memcpy(malloc(sz), data, sz);
     34 }
     35 
     36 #define dup(...) (memdup((__VA_ARGS__), sizeof(*(__VA_ARGS__))))
     37 
     38 struct cb_data {
     39 	struct lq *q;
     40 	struct segment **tail;
     41 };
     42 
     43 static inline bool nolinks(const struct segment *segs)
     44 {
     45 	return !segs || (segs->text && !segs->next);
     46 }
     47 
     48 static inline unsigned onelink(const struct segment *segs)
     49 {
     50 	if (!segs)
     51 		return 0;
     52 
     53 	if (!segs->text) {
     54 		if (nolinks(segs->next))
     55 			return segs->detail;
     56 	} else if (segs->next && !segs->next->text) {
     57 		if (nolinks(segs->next->next))
     58 			return segs->next->detail;
     59 	}
     60 
     61 	return 0;
     62 }
     63 
     64 void seg_add(const char *text, unsigned detail, void *data)
     65 {
     66 	struct cb_data *d = data;
     67 	struct segment *s = dup(&(struct segment){ .text = text, .detail = detail });
     68 
     69 	*d->tail = (*d->tail)->next = s;
     70 
     71 	if (text)
     72 		return;
     73 
     74 	if (detail - 1 >= d->q->sz) {
     75 		size_t nsz = d->q->sz ? d->q->sz * 2 : 32;
     76 		while (detail - 1 >= nsz)
     77 			nsz *= 2;
     78 		d->q->ref_n = realloc(d->q->ref_n, nsz * sizeof(*d->q->ref_n));
     79 		memset(d->q->ref_n + d->q->sz, 0, (nsz - d->q->sz) * sizeof(*d->q->ref_n));
     80 		d->q->sz = nsz;
     81 	}
     82 	if (detail >= d->q->maxref)
     83 		d->q->maxref = detail - 1;
     84 
     85 	d->q->ref_n[detail - 1]++;
     86 }
     87 
     88 unsigned ref_foreach(const char *l, void (*cb)(const char *text, unsigned ref, void *data), void *data)
     89 {
     90 	unsigned rv = 0;
     91 	int curref = -1;
     92 	const char *s;
     93 	const char *i;
     94 
     95 	for (i = l; *i; i++) {
     96 		switch (*i) {
     97 		case '[':
     98 			s = i;
     99 			curref = 0;
    100 			break;
    101 		case '0':
    102 		case '1':
    103 		case '2':
    104 		case '3':
    105 		case '4':
    106 		case '5':
    107 		case '6':
    108 		case '7':
    109 		case '8':
    110 		case '9':
    111 			if (curref >= 0)
    112 				curref = curref * 10 + *i - '0';
    113 			break;
    114 		case ']':
    115 			if (curref < 0)
    116 				break;
    117 			if (s != l)
    118 				cb(l, s - l, data);
    119 			cb(NULL, curref, data);
    120 			l = i + 1;
    121 			rv++;
    122 
    123 			/* fall-through */
    124 		default:
    125 			curref = -1;
    126 		}
    127 	}
    128 
    129 	if (*l)
    130 		cb(l, i - l, data);
    131 
    132 	return rv;
    133 }
    134 
    135 void lq_push(struct lq *q, const char *line)
    136 {
    137 	struct line *l = malloc(sizeof(*l));
    138 
    139 	if (q->tail)
    140 		q->tail->next = l;
    141 	else
    142 		q->head = l;
    143 	q->tail = l;
    144 	l->next = NULL;
    145 	l->text = strdup(line);
    146 	l->segs = NULL;
    147 
    148 	ref_foreach(l->text, seg_add, &(struct cb_data){ .q = q, .tail = (void *)&l });
    149 }
    150 
    151 unsigned refof(const char * const *refs, unsigned ref)
    152 {
    153 	unsigned n = 1;
    154 	while (--ref)
    155 		if (*refs++)
    156 			n++;
    157 	return n;
    158 }
    159 
    160 void printline(struct line *l, const char * const *refs)
    161 {
    162 	struct segment *s;
    163 
    164 	printf("t");
    165 	for (s = l->segs; s; s = s->next) {
    166 		if (s->text)
    167 			printf("%.*s", (int)s->detail, s->text);
    168 		else
    169 			printf("[%u]", refof(refs, s->detail));
    170 	}
    171 	puts("");
    172 }
    173 
    174 void print_text_line(struct line *l)
    175 {
    176 	if (!l->segs)
    177 		printf("t\n");
    178 	else
    179 		printf("t%.*s\n", (int)l->segs->detail, l->segs->text);
    180 }
    181 
    182 bool parse_gopherurl(const char *url, char *hostbuf, char *portbuf, char *selbuf, char *type)
    183 {
    184 	size_t fs;
    185 	const char *s;
    186 
    187 	if (strncmp(url, "gopher", 6))
    188 		return false;
    189 	url += 6;
    190 	if (*url == 's')
    191 		url++;
    192 	if (*url++ != ':' || *url++ != '/' || *url++ != '/')
    193 		return false;
    194 
    195 	fs = strcspn(url, "/");
    196 
    197 	if (*url == '[') {
    198 		s = memchr(url, ']', fs);
    199 		if (!s)
    200 			return false;
    201 		sprintf(hostbuf, "%.*s", (int)(s - url - 1), url + 1);
    202 		if (s[1] == ':')
    203 			sprintf(portbuf, "%.*s", (int)(fs - (s - url) - 2), s + 2);
    204 		else
    205 			strcpy(portbuf, "70");
    206 	} else if ((s = memchr(url, ':', fs))) {
    207 		sprintf(hostbuf, "%.*s", (int)(s - url), url);
    208 		sprintf(portbuf, "%.*s", (int)(fs - (s - url + 1)), s + 1);
    209 	} else {
    210 		sprintf(hostbuf, "%.*s", (int)fs, url);
    211 		strcpy(portbuf, "70");
    212 	}
    213 	url += fs;
    214 	if (*url)
    215 		url++;
    216 
    217 	if (*url)
    218 		*type = *url++;
    219 	else
    220 		*type = '1';
    221 
    222 	strcpy(selbuf, url);
    223 
    224 	return true;
    225 }
    226 
    227 void print_text_only(struct line *l)
    228 {
    229 	struct segment *s;
    230 
    231 	for (s = l->segs; s; s = s->next)
    232 		if (s->text)
    233 			printf("%.*s", (int)s->detail, s->text);
    234 }
    235 
    236 const char *basename(const char *url)
    237 {
    238 	const char *bn = strrchr(url, '/');
    239 	if (bn)
    240 		return bn + 1;
    241 	return url;
    242 }
    243 
    244 const char *extof(const char *fn)
    245 {
    246 	const char *del = strrchr(fn, '.');
    247 	if (del)
    248 		return del + 1;
    249 	return fn;
    250 }
    251 
    252 char guesstype(const char *url, bool full)
    253 {
    254 	const char *ext = extof(basename(url));
    255 	size_t i;
    256 
    257 	if (!*ext) {
    258 		if (full)
    259 			return 'h';
    260 		return '1';
    261 	}
    262 
    263 	for (i = 0; i < sizeof(typemap) / sizeof(*typemap); i++)
    264 		if (!strcmp(ext, typemap[i].name))
    265 			return typemap[i].type;
    266 	return '9';
    267 }
    268 
    269 bool isfullurl(const char *s)
    270 {
    271 	while (isalpha(*s))
    272 		s++;
    273 	return !strncmp(s, "://", 3);
    274 }
    275 
    276 void print_onelink(const char *url, void (*ptxt)(void *txt), void *txt)
    277 {
    278 	char ghost[512];
    279 	char gport[24];
    280 	char stor[512];
    281 	char type;
    282 
    283 	if (parse_gopherurl(url, ghost, gport, stor, &type)) {
    284 		printf("[%c|", type);
    285 		ptxt(txt);
    286 		printf("|%s|%s|%s]\n", stor, ghost, gport);
    287 	} else {
    288 		bool full = isfullurl(url);
    289 		const char *upfx = full ? "URL:" : "";
    290 		type = full ? 'h' : guesstype(url, !!upfx);
    291 		printf("[%c|", type);
    292 		ptxt(txt);
    293 		printf("|%s%s|server|port]\n", upfx, url);
    294 	}
    295 }
    296 
    297 struct prurl {
    298 	unsigned ref;
    299 	const char *url;
    300 };
    301 
    302 void print_ref(void *data)
    303 {
    304 	struct prurl *u = data;
    305 	printf("%4u. %s", u->ref, u->url);
    306 }
    307 
    308 void print_onelink_ref(const char *url, unsigned ref)
    309 {
    310 	print_onelink(url, print_ref, &(struct prurl){ .ref = ref, .url = url });
    311 }
    312 
    313 void print_onelink_line(struct line *l, const char *url)
    314 {
    315 	print_onelink(url, print_text_only, l);
    316 }
    317 
    318 const char *resolve_link(const char *link, const char *base, size_t bnl, size_t protol)
    319 {
    320 	if (!strncmp(link, base, bnl))
    321 		return link + bnl;
    322 	if (!strncmp(link, base, protol))
    323 		return link + protol;
    324 	return link;
    325 }
    326 
    327 void dumprefs(char **refs, size_t n)
    328 {
    329 	unsigned r = 1;
    330 	unsigned i;
    331 
    332 	for (i = 0; i < n; i++)
    333 		if (refs[i])
    334 			break;
    335 	if (i == n)
    336 		return;
    337 
    338 	printf("tReferences:\nt\n");
    339 	for (; i < n; i++)
    340 		if (refs[i])
    341 			print_onelink_ref(refs[i], r++);
    342 }
    343 
    344 bool sa_pfxof(const char * const *pfxs, size_t n, const char *str)
    345 {
    346 	while (n--)
    347 		if (!strncmp(pfxs[n], str, strlen(pfxs[n])))
    348 			return true;
    349 	return false;
    350 }
    351 
    352 int main(int argc, char **argv)
    353 {
    354 	char linebuf[1024];
    355 	char **refs;
    356 	struct lq q = { 0 };
    357 	struct line *l;
    358 	const char *baseurl = argv[1];
    359 	const char *prot = strstr(baseurl, "://");
    360 	const char *bls = strrchr(baseurl, '/');
    361 
    362 	if (!baseurl || !prot || !(prot = strchr(prot + 3, '/')))
    363 		return 1;
    364 
    365 	while (fgets(linebuf, sizeof(linebuf), stdin)) {
    366 		size_t ll = strcspn(linebuf, "\r\n");
    367 
    368 		linebuf[ll] = '\0';
    369 
    370 		if (!strcmp(linebuf, "References"))
    371 			break;
    372 
    373 		lq_push(&q, linebuf);
    374 	}
    375 
    376 	refs = calloc((q.maxref + 1), sizeof(*refs));
    377 	while (fgets(linebuf, sizeof(linebuf), stdin)) {
    378 		size_t ll = strcspn(linebuf, "\r\n");
    379 		int urlpos;
    380 		char dummy[2];
    381 		unsigned ref;
    382 
    383 		linebuf[ll] = '\0';
    384 		if (sscanf(linebuf, " %u%1[.] %n", &ref, dummy, &urlpos) < 2)
    385 			continue;
    386 
    387 		refs[ref - 1] = strdup(resolve_link(linebuf + urlpos, baseurl, bls - baseurl + 1, prot - baseurl));
    388 		if (sa_pfxof(gph_prefixes, sizeof(gph_prefixes) / sizeof(*gph_prefixes), refs[ref - 1])) {
    389 			char *dp = strrchr(refs[ref - 1], '.');
    390 			if (dp && !strncmp(dp + 1, "htm", 3))
    391 				strcpy(dp + 1, "gph");
    392 		}
    393 	}
    394 
    395 	for (l = q.head; l; l = l->next) {
    396 		unsigned oneref = onelink(l->segs);
    397 
    398 		if (nolinks(l->segs)) {
    399 			print_text_line(l);
    400 		} else if (oneref && q.ref_n[oneref - 1] == 1) {
    401 			print_onelink_line(l, refs[oneref - 1]);
    402 			free(refs[oneref - 1]);
    403 			refs[oneref - 1] = NULL;
    404 		} else {
    405 			printline(l, refs);
    406 		}
    407 	}
    408 
    409 	dumprefs(refs, q.maxref);
    410 
    411 	return 0;
    412 }