Initial import - lynx2gph - Unnamed repository; edit this file 'description' to name the repository.

commit de231a555b093caeb0b8bcb2661e462290cf31a3
Author: Santtu Lakkala <inz@inz.fi>
Date:   Sat, 19 Feb 2022 16:55:11 +0200

Initial import

Diffstat:
A Makefile  | 5 +++++
A README  | 6 ++++++
A config.h  | 44 ++++++++++++++++++++++++++++++++++++++++++++
A lynx2gph.c  | 412 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

4 files changed, 467 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+CFLAGS = -std=c99 -W -Wall
+NAME = lynx2gph
+
+${NAME}: ${NAME}.c
+	cc $< -o $@ ${CFLAGS}
diff --git a/README b/README
@@ -0,0 +1,6 @@
+lynx2gph
+
+Simple tool to convert lynx -dump output to GPH format (as interpreted by
+geomyidae or tskrtt).
+
+Usage: lynx -dump "$URI" | lynx2gph "$URI"
diff --git a/config.h b/config.h
@@ -0,0 +1,44 @@
+static const struct {
+	char type;
+	const char *name;
+} typemap[] = {
+	{ '0', "txt" },
+	{ '0', "md" },
+	{ '0', "sh" },
+	{ '0', "mk" },
+	{ '0', "c" },
+	{ '0', "h" },
+	{ '0', "pl" },
+	{ '0', "s" },
+	{ '0', "css" },
+	{ '0', "js" },
+	{ '0', "xml" },
+	{ '0', "mbox" },
+	{ '0', "sha256" },
+	{ '0', "patch" },
+	{ '0', "diff" },
+	{ '0', "conf" },
+	{ '0', "vim" },
+	{ '0', "Makefile" },
+	{ '0', "README" },
+	{ '0', "TODO" },
+	{ '0', "cgitrc" },
+	{ '1', "gophermap" },
+	{ '1', "gph" },
+	{ 'h', "html" },
+	{ 'h', "htm" },
+	{ 'I', "gif" },
+	{ 'I', "png" },
+	{ 'I', "webp" },
+	{ 'I', "jpg" },
+	{ 'I', "jpeg" },
+	{ 'I', "bmp" },
+	{ 'I', "xpm" },
+};
+
+static const char default_type = '9';
+
+static const char *gph_prefixes[] = {
+	"pages/",
+	"posts/",
+};
diff --git a/lynx2gph.c b/lynx2gph.c
@@ -0,0 +1,412 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <ctype.h>
+
+#include "config.h"
+
+struct segment {
+	struct segment *next;
+	const char *text;
+	unsigned detail;
+};
+
+struct line
+{
+	struct segment *segs;
+	struct line *next;
+	char *text;
+};
+
+struct lq {
+	unsigned *ref_n;
+	size_t maxref;
+	size_t sz;
+	struct line *head;
+	struct line *tail;
+};
+
+static inline void *memdup(const void *data, size_t sz)
+{
+	return memcpy(malloc(sz), data, sz);
+}
+
+#define dup(...) (memdup((__VA_ARGS__), sizeof(*(__VA_ARGS__))))
+
+struct cb_data {
+	struct lq *q;
+	struct segment **tail;
+};
+
+static inline bool nolinks(const struct segment *segs)
+{
+	return !segs || (segs->text && !segs->next);
+}
+
+static inline unsigned onelink(const struct segment *segs)
+{
+	if (!segs)
+		return 0;
+
+	if (!segs->text) {
+		if (nolinks(segs->next))
+			return segs->detail;
+	} else if (segs->next && !segs->next->text) {
+		if (nolinks(segs->next->next))
+			return segs->next->detail;
+	}
+
+	return 0;
+}
+
+void seg_add(const char *text, unsigned detail, void *data)
+{
+	struct cb_data *d = data;
+	struct segment *s = dup(&(struct segment){ .text = text, .detail = detail });
+
+	*d->tail = (*d->tail)->next = s;
+
+	if (text)
+		return;
+
+	if (detail - 1 >= d->q->sz) {
+		size_t nsz = d->q->sz ? d->q->sz * 2 : 32;
+		while (detail - 1 >= nsz)
+			nsz *= 2;
+		d->q->ref_n = realloc(d->q->ref_n, nsz * sizeof(*d->q->ref_n));
+		memset(d->q->ref_n + d->q->sz, 0, (nsz - d->q->sz) * sizeof(*d->q->ref_n));
+		d->q->sz = nsz;
+	}
+	if (detail >= d->q->maxref)
+		d->q->maxref = detail - 1;
+
+	d->q->ref_n[detail - 1]++;
+}
+
+unsigned ref_foreach(const char *l, void (*cb)(const char *text, unsigned ref, void *data), void *data)
+{
+	unsigned rv = 0;
+	int curref = -1;
+	const char *s;
+	const char *i;
+
+	for (i = l; *i; i++) {
+		switch (*i) {
+		case '[':
+			s = i;
+			curref = 0;
+			break;
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			if (curref >= 0)
+				curref = curref * 10 + *i - '0';
+			break;
+		case ']':
+			if (curref < 0)
+				break;
+			if (s != l)
+				cb(l, s - l, data);
+			cb(NULL, curref, data);
+			l = i + 1;
+			rv++;
+
+			/* fall-through */
+		default:
+			curref = -1;
+		}
+	}
+
+	if (*l)
+		cb(l, i - l, data);
+
+	return rv;
+}
+
+void lq_push(struct lq *q, const char *line)
+{
+	struct line *l = malloc(sizeof(*l));
+
+	if (q->tail)
+		q->tail->next = l;
+	else
+		q->head = l;
+	q->tail = l;
+	l->next = NULL;
+	l->text = strdup(line);
+	l->segs = NULL;
+
+	ref_foreach(l->text, seg_add, &(struct cb_data){ .q = q, .tail = (void *)&l });
+}
+
+unsigned refof(const char * const *refs, unsigned ref)
+{
+	unsigned n = 1;
+	while (--ref)
+		if (*refs++)
+			n++;
+	return n;
+}
+
+void printline(struct line *l, const char * const *refs)
+{
+	struct segment *s;
+
+	printf("t");
+	for (s = l->segs; s; s = s->next) {
+		if (s->text)
+			printf("%.*s", (int)s->detail, s->text);
+		else
+			printf("[%u]", refof(refs, s->detail));
+	}
+	puts("");
+}
+
+void print_text_line(struct line *l)
+{
+	if (!l->segs)
+		printf("t\n");
+	else
+		printf("t%.*s\n", (int)l->segs->detail, l->segs->text);
+}
+
+bool parse_gopherurl(const char *url, char *hostbuf, char *portbuf, char *selbuf, char *type)
+{
+	size_t fs;
+	const char *s;
+
+	if (strncmp(url, "gopher", 6))
+		return false;
+	url += 6;
+	if (*url == 's')
+		url++;
+	if (*url++ != ':' || *url++ != '/' || *url++ != '/')
+		return false;
+
+	fs = strcspn(url, "/");
+
+	if (*url == '[') {
+		s = memchr(url, ']', fs);
+		if (!s)
+			return false;
+		sprintf(hostbuf, "%.*s", (int)(s - url - 1), url + 1);
+		if (s[1] == ':')
+			sprintf(portbuf, "%.*s", (int)(fs - (s - url) - 2), s + 2);
+		else
+			strcpy(portbuf, "70");
+	} else if ((s = memchr(url, ':', fs))) {
+		sprintf(hostbuf, "%.*s", (int)(s - url), url);
+		sprintf(portbuf, "%.*s", (int)(fs - (s - url + 1)), s + 1);
+	} else {
+		sprintf(hostbuf, "%.*s", (int)fs, url);
+		strcpy(portbuf, "70");
+	}
+	url += fs;
+	if (*url)
+		url++;
+
+	if (*url)
+		*type = *url++;
+	else
+		*type = '1';
+
+	strcpy(selbuf, url);
+
+	return true;
+}
+
+void print_text_only(struct line *l)
+{
+	struct segment *s;
+
+	for (s = l->segs; s; s = s->next)
+		if (s->text)
+			printf("%.*s", (int)s->detail, s->text);
+}
+
+const char *basename(const char *url)
+{
+	const char *bn = strrchr(url, '/');
+	if (bn)
+		return bn + 1;
+	return url;
+}
+
+const char *extof(const char *fn)
+{
+	const char *del = strrchr(fn, '.');
+	if (del)
+		return del + 1;
+	return fn;
+}
+
+char guesstype(const char *url, bool full)
+{
+	const char *ext = extof(basename(url));
+	size_t i;
+
+	if (!*ext) {
+		if (full)
+			return 'h';
+		return '1';
+	}
+
+	for (i = 0; i < sizeof(typemap) / sizeof(*typemap); i++)
+		if (!strcmp(ext, typemap[i].name))
+			return typemap[i].type;
+	return '9';
+}
+
+bool isfullurl(const char *s)
+{
+	while (isalpha(*s))
+		s++;
+	return !strncmp(s, "://", 3);
+}
+
+void print_onelink(const char *url, void (*ptxt)(void *txt), void *txt)
+{
+	char ghost[512];
+	char gport[24];
+	char stor[512];
+	char type;
+
+	if (parse_gopherurl(url, ghost, gport, stor, &type)) {
+		printf("[%c|", type);
+		ptxt(txt);
+		printf("|%s|%s|%s]\n", stor, ghost, gport);
+	} else {
+		bool full = isfullurl(url);
+		const char *upfx = full ? "URL:" : "";
+		type = full ? 'h' : guesstype(url, !!upfx);
+		printf("[%c|", type);
+		ptxt(txt);
+		printf("|%s%s|server|port]\n", upfx, url);
+	}
+}
+
+struct prurl {
+	unsigned ref;
+	const char *url;
+};
+
+void print_ref(void *data)
+{
+	struct prurl *u = data;
+	printf("%4u. %s", u->ref, u->url);
+}
+
+void print_onelink_ref(const char *url, unsigned ref)
+{
+	print_onelink(url, print_ref, &(struct prurl){ .ref = ref, .url = url });
+}
+
+void print_onelink_line(struct line *l, const char *url)
+{
+	print_onelink(url, print_text_only, l);
+}
+
+const char *resolve_link(const char *link, const char *base, size_t bnl, size_t protol)
+{
+	if (!strncmp(link, base, bnl))
+		return link + bnl;
+	if (!strncmp(link, base, protol))
+		return link + protol;
+	return link;
+}
+
+void dumprefs(char **refs, size_t n)
+{
+	unsigned r = 1;
+	unsigned i;
+
+	for (i = 0; i < n; i++)
+		if (refs[i])
+			break;
+	if (i == n)
+		return;
+
+	printf("tReferences:\nt\n");
+	for (; i < n; i++)
+		if (refs[i])
+			print_onelink_ref(refs[i], r++);
+}
+
+bool sa_pfxof(const char * const *pfxs, size_t n, const char *str)
+{
+	while (n--)
+		if (!strncmp(pfxs[n], str, strlen(pfxs[n])))
+			return true;
+	return false;
+}
+
+int main(int argc, char **argv)
+{
+	char linebuf[1024];
+	char **refs;
+	struct lq q = { 0 };
+	struct line *l;
+	const char *baseurl = argv[1];
+	const char *prot = strstr(baseurl, "://");
+	const char *bls = strrchr(baseurl, '/');
+
+	if (!baseurl || !prot || !(prot = strchr(prot + 3, '/')))
+		return 1;
+
+	while (fgets(linebuf, sizeof(linebuf), stdin)) {
+		size_t ll = strcspn(linebuf, "\r\n");
+
+		linebuf[ll] = '\0';
+
+		if (!strcmp(linebuf, "References"))
+			break;
+
+		lq_push(&q, linebuf);
+	}
+
+	refs = calloc((q.maxref + 1), sizeof(*refs));
+	while (fgets(linebuf, sizeof(linebuf), stdin)) {
+		size_t ll = strcspn(linebuf, "\r\n");
+		int urlpos;
+		char dummy[2];
+		unsigned ref;
+
+		linebuf[ll] = '\0';
+		if (sscanf(linebuf, " %u%1[.] %n", &ref, dummy, &urlpos) < 2)
+			continue;
+
+		refs[ref - 1] = strdup(resolve_link(linebuf + urlpos, baseurl, bls - baseurl + 1, prot - baseurl));
+		if (sa_pfxof(gph_prefixes, sizeof(gph_prefixes) / sizeof(*gph_prefixes), refs[ref - 1])) {
+			char *dp = strrchr(refs[ref - 1], '.');
+			if (dp && !strncmp(dp + 1, "htm", 3))
+				strcpy(dp + 1, "gph");
+		}
+	}
+
+	for (l = q.head; l; l = l->next) {
+		unsigned oneref = onelink(l->segs);
+
+		if (nolinks(l->segs)) {
+			print_text_line(l);
+		} else if (oneref && q.ref_n[oneref - 1] == 1) {
+			print_onelink_line(l, refs[oneref - 1]);
+			free(refs[oneref - 1]);
+			refs[oneref - 1] = NULL;
+		} else {
+			printline(l, refs);
+		}
+	}
+
+	dumprefs(refs, q.maxref);
+
+	return 0;
+}

	lynx2gph Unnamed repository; edit this file 'description' to name the repository.
	git clone https://git.inz.fi/lynx2gph
	Log \| Files \| Refs \| README

A	Makefile	\|	5	+++++
A	README	\|	6	++++++
A	config.h	\|	44	++++++++++++++++++++++++++++++++++++++++++++
A	lynx2gph.c	\|	412	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++