commit de231a555b093caeb0b8bcb2661e462290cf31a3
Author: Santtu Lakkala <inz@inz.fi>
Date: Sat, 19 Feb 2022 16:55:11 +0200
Initial import
Diffstat:
A | Makefile | | | 5 | +++++ |
A | README | | | 6 | ++++++ |
A | config.h | | | 44 | ++++++++++++++++++++++++++++++++++++++++++++ |
A | lynx2gph.c | | | 412 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 467 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+CFLAGS = -std=c99 -W -Wall
+NAME = lynx2gph
+
+${NAME}: ${NAME}.c
+ cc $< -o $@ ${CFLAGS}
diff --git a/README b/README
@@ -0,0 +1,6 @@
+lynx2gph
+
+Simple tool to convert lynx -dump output to GPH format (as interpreted by
+geomyidae or tskrtt).
+
+Usage: lynx -dump "$URI" | lynx2gph "$URI"
diff --git a/config.h b/config.h
@@ -0,0 +1,44 @@
+static const struct {
+ char type;
+ const char *name;
+} typemap[] = {
+ { '0', "txt" },
+ { '0', "md" },
+ { '0', "sh" },
+ { '0', "mk" },
+ { '0', "c" },
+ { '0', "h" },
+ { '0', "pl" },
+ { '0', "s" },
+ { '0', "css" },
+ { '0', "js" },
+ { '0', "xml" },
+ { '0', "mbox" },
+ { '0', "sha256" },
+ { '0', "patch" },
+ { '0', "diff" },
+ { '0', "conf" },
+ { '0', "vim" },
+ { '0', "Makefile" },
+ { '0', "README" },
+ { '0', "TODO" },
+ { '0', "cgitrc" },
+ { '1', "gophermap" },
+ { '1', "gph" },
+ { 'h', "html" },
+ { 'h', "htm" },
+ { 'I', "gif" },
+ { 'I', "png" },
+ { 'I', "webp" },
+ { 'I', "jpg" },
+ { 'I', "jpeg" },
+ { 'I', "bmp" },
+ { 'I', "xpm" },
+};
+
+static const char default_type = '9';
+
+static const char *gph_prefixes[] = {
+ "pages/",
+ "posts/",
+};
diff --git a/lynx2gph.c b/lynx2gph.c
@@ -0,0 +1,412 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <ctype.h>
+
+#include "config.h"
+
+struct segment {
+ struct segment *next;
+ const char *text;
+ unsigned detail;
+};
+
+struct line
+{
+ struct segment *segs;
+ struct line *next;
+ char *text;
+};
+
+struct lq {
+ unsigned *ref_n;
+ size_t maxref;
+ size_t sz;
+ struct line *head;
+ struct line *tail;
+};
+
+static inline void *memdup(const void *data, size_t sz)
+{
+ return memcpy(malloc(sz), data, sz);
+}
+
+#define dup(...) (memdup((__VA_ARGS__), sizeof(*(__VA_ARGS__))))
+
+struct cb_data {
+ struct lq *q;
+ struct segment **tail;
+};
+
+static inline bool nolinks(const struct segment *segs)
+{
+ return !segs || (segs->text && !segs->next);
+}
+
+static inline unsigned onelink(const struct segment *segs)
+{
+ if (!segs)
+ return 0;
+
+ if (!segs->text) {
+ if (nolinks(segs->next))
+ return segs->detail;
+ } else if (segs->next && !segs->next->text) {
+ if (nolinks(segs->next->next))
+ return segs->next->detail;
+ }
+
+ return 0;
+}
+
+void seg_add(const char *text, unsigned detail, void *data)
+{
+ struct cb_data *d = data;
+ struct segment *s = dup(&(struct segment){ .text = text, .detail = detail });
+
+ *d->tail = (*d->tail)->next = s;
+
+ if (text)
+ return;
+
+ if (detail - 1 >= d->q->sz) {
+ size_t nsz = d->q->sz ? d->q->sz * 2 : 32;
+ while (detail - 1 >= nsz)
+ nsz *= 2;
+ d->q->ref_n = realloc(d->q->ref_n, nsz * sizeof(*d->q->ref_n));
+ memset(d->q->ref_n + d->q->sz, 0, (nsz - d->q->sz) * sizeof(*d->q->ref_n));
+ d->q->sz = nsz;
+ }
+ if (detail >= d->q->maxref)
+ d->q->maxref = detail - 1;
+
+ d->q->ref_n[detail - 1]++;
+}
+
+unsigned ref_foreach(const char *l, void (*cb)(const char *text, unsigned ref, void *data), void *data)
+{
+ unsigned rv = 0;
+ int curref = -1;
+ const char *s;
+ const char *i;
+
+ for (i = l; *i; i++) {
+ switch (*i) {
+ case '[':
+ s = i;
+ curref = 0;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (curref >= 0)
+ curref = curref * 10 + *i - '0';
+ break;
+ case ']':
+ if (curref < 0)
+ break;
+ if (s != l)
+ cb(l, s - l, data);
+ cb(NULL, curref, data);
+ l = i + 1;
+ rv++;
+
+ /* fall-through */
+ default:
+ curref = -1;
+ }
+ }
+
+ if (*l)
+ cb(l, i - l, data);
+
+ return rv;
+}
+
+void lq_push(struct lq *q, const char *line)
+{
+ struct line *l = malloc(sizeof(*l));
+
+ if (q->tail)
+ q->tail->next = l;
+ else
+ q->head = l;
+ q->tail = l;
+ l->next = NULL;
+ l->text = strdup(line);
+ l->segs = NULL;
+
+ ref_foreach(l->text, seg_add, &(struct cb_data){ .q = q, .tail = (void *)&l });
+}
+
+unsigned refof(const char * const *refs, unsigned ref)
+{
+ unsigned n = 1;
+ while (--ref)
+ if (*refs++)
+ n++;
+ return n;
+}
+
+void printline(struct line *l, const char * const *refs)
+{
+ struct segment *s;
+
+ printf("t");
+ for (s = l->segs; s; s = s->next) {
+ if (s->text)
+ printf("%.*s", (int)s->detail, s->text);
+ else
+ printf("[%u]", refof(refs, s->detail));
+ }
+ puts("");
+}
+
+void print_text_line(struct line *l)
+{
+ if (!l->segs)
+ printf("t\n");
+ else
+ printf("t%.*s\n", (int)l->segs->detail, l->segs->text);
+}
+
+bool parse_gopherurl(const char *url, char *hostbuf, char *portbuf, char *selbuf, char *type)
+{
+ size_t fs;
+ const char *s;
+
+ if (strncmp(url, "gopher", 6))
+ return false;
+ url += 6;
+ if (*url == 's')
+ url++;
+ if (*url++ != ':' || *url++ != '/' || *url++ != '/')
+ return false;
+
+ fs = strcspn(url, "/");
+
+ if (*url == '[') {
+ s = memchr(url, ']', fs);
+ if (!s)
+ return false;
+ sprintf(hostbuf, "%.*s", (int)(s - url - 1), url + 1);
+ if (s[1] == ':')
+ sprintf(portbuf, "%.*s", (int)(fs - (s - url) - 2), s + 2);
+ else
+ strcpy(portbuf, "70");
+ } else if ((s = memchr(url, ':', fs))) {
+ sprintf(hostbuf, "%.*s", (int)(s - url), url);
+ sprintf(portbuf, "%.*s", (int)(fs - (s - url + 1)), s + 1);
+ } else {
+ sprintf(hostbuf, "%.*s", (int)fs, url);
+ strcpy(portbuf, "70");
+ }
+ url += fs;
+ if (*url)
+ url++;
+
+ if (*url)
+ *type = *url++;
+ else
+ *type = '1';
+
+ strcpy(selbuf, url);
+
+ return true;
+}
+
+void print_text_only(struct line *l)
+{
+ struct segment *s;
+
+ for (s = l->segs; s; s = s->next)
+ if (s->text)
+ printf("%.*s", (int)s->detail, s->text);
+}
+
+const char *basename(const char *url)
+{
+ const char *bn = strrchr(url, '/');
+ if (bn)
+ return bn + 1;
+ return url;
+}
+
+const char *extof(const char *fn)
+{
+ const char *del = strrchr(fn, '.');
+ if (del)
+ return del + 1;
+ return fn;
+}
+
+char guesstype(const char *url, bool full)
+{
+ const char *ext = extof(basename(url));
+ size_t i;
+
+ if (!*ext) {
+ if (full)
+ return 'h';
+ return '1';
+ }
+
+ for (i = 0; i < sizeof(typemap) / sizeof(*typemap); i++)
+ if (!strcmp(ext, typemap[i].name))
+ return typemap[i].type;
+ return '9';
+}
+
+bool isfullurl(const char *s)
+{
+ while (isalpha(*s))
+ s++;
+ return !strncmp(s, "://", 3);
+}
+
+void print_onelink(const char *url, void (*ptxt)(void *txt), void *txt)
+{
+ char ghost[512];
+ char gport[24];
+ char stor[512];
+ char type;
+
+ if (parse_gopherurl(url, ghost, gport, stor, &type)) {
+ printf("[%c|", type);
+ ptxt(txt);
+ printf("|%s|%s|%s]\n", stor, ghost, gport);
+ } else {
+ bool full = isfullurl(url);
+ const char *upfx = full ? "URL:" : "";
+ type = full ? 'h' : guesstype(url, !!upfx);
+ printf("[%c|", type);
+ ptxt(txt);
+ printf("|%s%s|server|port]\n", upfx, url);
+ }
+}
+
+struct prurl {
+ unsigned ref;
+ const char *url;
+};
+
+void print_ref(void *data)
+{
+ struct prurl *u = data;
+ printf("%4u. %s", u->ref, u->url);
+}
+
+void print_onelink_ref(const char *url, unsigned ref)
+{
+ print_onelink(url, print_ref, &(struct prurl){ .ref = ref, .url = url });
+}
+
+void print_onelink_line(struct line *l, const char *url)
+{
+ print_onelink(url, print_text_only, l);
+}
+
+const char *resolve_link(const char *link, const char *base, size_t bnl, size_t protol)
+{
+ if (!strncmp(link, base, bnl))
+ return link + bnl;
+ if (!strncmp(link, base, protol))
+ return link + protol;
+ return link;
+}
+
+void dumprefs(char **refs, size_t n)
+{
+ unsigned r = 1;
+ unsigned i;
+
+ for (i = 0; i < n; i++)
+ if (refs[i])
+ break;
+ if (i == n)
+ return;
+
+ printf("tReferences:\nt\n");
+ for (; i < n; i++)
+ if (refs[i])
+ print_onelink_ref(refs[i], r++);
+}
+
+bool sa_pfxof(const char * const *pfxs, size_t n, const char *str)
+{
+ while (n--)
+ if (!strncmp(pfxs[n], str, strlen(pfxs[n])))
+ return true;
+ return false;
+}
+
+int main(int argc, char **argv)
+{
+ char linebuf[1024];
+ char **refs;
+ struct lq q = { 0 };
+ struct line *l;
+ const char *baseurl = argv[1];
+ const char *prot = strstr(baseurl, "://");
+ const char *bls = strrchr(baseurl, '/');
+
+ if (!baseurl || !prot || !(prot = strchr(prot + 3, '/')))
+ return 1;
+
+ while (fgets(linebuf, sizeof(linebuf), stdin)) {
+ size_t ll = strcspn(linebuf, "\r\n");
+
+ linebuf[ll] = '\0';
+
+ if (!strcmp(linebuf, "References"))
+ break;
+
+ lq_push(&q, linebuf);
+ }
+
+ refs = calloc((q.maxref + 1), sizeof(*refs));
+ while (fgets(linebuf, sizeof(linebuf), stdin)) {
+ size_t ll = strcspn(linebuf, "\r\n");
+ int urlpos;
+ char dummy[2];
+ unsigned ref;
+
+ linebuf[ll] = '\0';
+ if (sscanf(linebuf, " %u%1[.] %n", &ref, dummy, &urlpos) < 2)
+ continue;
+
+ refs[ref - 1] = strdup(resolve_link(linebuf + urlpos, baseurl, bls - baseurl + 1, prot - baseurl));
+ if (sa_pfxof(gph_prefixes, sizeof(gph_prefixes) / sizeof(*gph_prefixes), refs[ref - 1])) {
+ char *dp = strrchr(refs[ref - 1], '.');
+ if (dp && !strncmp(dp + 1, "htm", 3))
+ strcpy(dp + 1, "gph");
+ }
+ }
+
+ for (l = q.head; l; l = l->next) {
+ unsigned oneref = onelink(l->segs);
+
+ if (nolinks(l->segs)) {
+ print_text_line(l);
+ } else if (oneref && q.ref_n[oneref - 1] == 1) {
+ print_onelink_line(l, refs[oneref - 1]);
+ free(refs[oneref - 1]);
+ refs[oneref - 1] = NULL;
+ } else {
+ printline(l, refs);
+ }
+ }
+
+ dumprefs(refs, q.maxref);
+
+ return 0;
+}