lynx2gph

Unnamed repository; edit this file 'description' to name the repository.
git clone https://git.inz.fi/lynx2gph
Log | Files | Refs | README

commit de231a555b093caeb0b8bcb2661e462290cf31a3
Author: Santtu Lakkala <inz@inz.fi>
Date:   Sat, 19 Feb 2022 16:55:11 +0200

Initial import

Diffstat:
AMakefile | 5+++++
AREADME | 6++++++
Aconfig.h | 44++++++++++++++++++++++++++++++++++++++++++++
Alynx2gph.c | 412+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 467 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,5 @@ +CFLAGS = -std=c99 -W -Wall +NAME = lynx2gph + +${NAME}: ${NAME}.c + cc $< -o $@ ${CFLAGS} diff --git a/README b/README @@ -0,0 +1,6 @@ +lynx2gph + +Simple tool to convert lynx -dump output to GPH format (as interpreted by +geomyidae or tskrtt). + +Usage: lynx -dump "$URI" | lynx2gph "$URI" diff --git a/config.h b/config.h @@ -0,0 +1,44 @@ +static const struct { + char type; + const char *name; +} typemap[] = { + { '0', "txt" }, + { '0', "md" }, + { '0', "sh" }, + { '0', "mk" }, + { '0', "c" }, + { '0', "h" }, + { '0', "pl" }, + { '0', "s" }, + { '0', "css" }, + { '0', "js" }, + { '0', "xml" }, + { '0', "mbox" }, + { '0', "sha256" }, + { '0', "patch" }, + { '0', "diff" }, + { '0', "conf" }, + { '0', "vim" }, + { '0', "Makefile" }, + { '0', "README" }, + { '0', "TODO" }, + { '0', "cgitrc" }, + { '1', "gophermap" }, + { '1', "gph" }, + { 'h', "html" }, + { 'h', "htm" }, + { 'I', "gif" }, + { 'I', "png" }, + { 'I', "webp" }, + { 'I', "jpg" }, + { 'I', "jpeg" }, + { 'I', "bmp" }, + { 'I', "xpm" }, +}; + +static const char default_type = '9'; + +static const char *gph_prefixes[] = { + "pages/", + "posts/", +}; diff --git a/lynx2gph.c b/lynx2gph.c @@ -0,0 +1,412 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <ctype.h> + +#include "config.h" + +struct segment { + struct segment *next; + const char *text; + unsigned detail; +}; + +struct line +{ + struct segment *segs; + struct line *next; + char *text; +}; + +struct lq { + unsigned *ref_n; + size_t maxref; + size_t sz; + struct line *head; + struct line *tail; +}; + +static inline void *memdup(const void *data, size_t sz) +{ + return memcpy(malloc(sz), data, sz); +} + +#define dup(...) (memdup((__VA_ARGS__), sizeof(*(__VA_ARGS__)))) + +struct cb_data { + struct lq *q; + struct segment **tail; +}; + +static inline bool nolinks(const struct segment *segs) +{ + return !segs || (segs->text && !segs->next); +} + +static inline unsigned onelink(const struct segment *segs) +{ + if (!segs) + return 0; + + if (!segs->text) { + if (nolinks(segs->next)) + return segs->detail; + } else if (segs->next && !segs->next->text) { + if (nolinks(segs->next->next)) + return segs->next->detail; + } + + return 0; +} + +void seg_add(const char *text, unsigned detail, void *data) +{ + struct cb_data *d = data; + struct segment *s = dup(&(struct segment){ .text = text, .detail = detail }); + + *d->tail = (*d->tail)->next = s; + + if (text) + return; + + if (detail - 1 >= d->q->sz) { + size_t nsz = d->q->sz ? d->q->sz * 2 : 32; + while (detail - 1 >= nsz) + nsz *= 2; + d->q->ref_n = realloc(d->q->ref_n, nsz * sizeof(*d->q->ref_n)); + memset(d->q->ref_n + d->q->sz, 0, (nsz - d->q->sz) * sizeof(*d->q->ref_n)); + d->q->sz = nsz; + } + if (detail >= d->q->maxref) + d->q->maxref = detail - 1; + + d->q->ref_n[detail - 1]++; +} + +unsigned ref_foreach(const char *l, void (*cb)(const char *text, unsigned ref, void *data), void *data) +{ + unsigned rv = 0; + int curref = -1; + const char *s; + const char *i; + + for (i = l; *i; i++) { + switch (*i) { + case '[': + s = i; + curref = 0; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (curref >= 0) + curref = curref * 10 + *i - '0'; + break; + case ']': + if (curref < 0) + break; + if (s != l) + cb(l, s - l, data); + cb(NULL, curref, data); + l = i + 1; + rv++; + + /* fall-through */ + default: + curref = -1; + } + } + + if (*l) + cb(l, i - l, data); + + return rv; +} + +void lq_push(struct lq *q, const char *line) +{ + struct line *l = malloc(sizeof(*l)); + + if (q->tail) + q->tail->next = l; + else + q->head = l; + q->tail = l; + l->next = NULL; + l->text = strdup(line); + l->segs = NULL; + + ref_foreach(l->text, seg_add, &(struct cb_data){ .q = q, .tail = (void *)&l }); +} + +unsigned refof(const char * const *refs, unsigned ref) +{ + unsigned n = 1; + while (--ref) + if (*refs++) + n++; + return n; +} + +void printline(struct line *l, const char * const *refs) +{ + struct segment *s; + + printf("t"); + for (s = l->segs; s; s = s->next) { + if (s->text) + printf("%.*s", (int)s->detail, s->text); + else + printf("[%u]", refof(refs, s->detail)); + } + puts(""); +} + +void print_text_line(struct line *l) +{ + if (!l->segs) + printf("t\n"); + else + printf("t%.*s\n", (int)l->segs->detail, l->segs->text); +} + +bool parse_gopherurl(const char *url, char *hostbuf, char *portbuf, char *selbuf, char *type) +{ + size_t fs; + const char *s; + + if (strncmp(url, "gopher", 6)) + return false; + url += 6; + if (*url == 's') + url++; + if (*url++ != ':' || *url++ != '/' || *url++ != '/') + return false; + + fs = strcspn(url, "/"); + + if (*url == '[') { + s = memchr(url, ']', fs); + if (!s) + return false; + sprintf(hostbuf, "%.*s", (int)(s - url - 1), url + 1); + if (s[1] == ':') + sprintf(portbuf, "%.*s", (int)(fs - (s - url) - 2), s + 2); + else + strcpy(portbuf, "70"); + } else if ((s = memchr(url, ':', fs))) { + sprintf(hostbuf, "%.*s", (int)(s - url), url); + sprintf(portbuf, "%.*s", (int)(fs - (s - url + 1)), s + 1); + } else { + sprintf(hostbuf, "%.*s", (int)fs, url); + strcpy(portbuf, "70"); + } + url += fs; + if (*url) + url++; + + if (*url) + *type = *url++; + else + *type = '1'; + + strcpy(selbuf, url); + + return true; +} + +void print_text_only(struct line *l) +{ + struct segment *s; + + for (s = l->segs; s; s = s->next) + if (s->text) + printf("%.*s", (int)s->detail, s->text); +} + +const char *basename(const char *url) +{ + const char *bn = strrchr(url, '/'); + if (bn) + return bn + 1; + return url; +} + +const char *extof(const char *fn) +{ + const char *del = strrchr(fn, '.'); + if (del) + return del + 1; + return fn; +} + +char guesstype(const char *url, bool full) +{ + const char *ext = extof(basename(url)); + size_t i; + + if (!*ext) { + if (full) + return 'h'; + return '1'; + } + + for (i = 0; i < sizeof(typemap) / sizeof(*typemap); i++) + if (!strcmp(ext, typemap[i].name)) + return typemap[i].type; + return '9'; +} + +bool isfullurl(const char *s) +{ + while (isalpha(*s)) + s++; + return !strncmp(s, "://", 3); +} + +void print_onelink(const char *url, void (*ptxt)(void *txt), void *txt) +{ + char ghost[512]; + char gport[24]; + char stor[512]; + char type; + + if (parse_gopherurl(url, ghost, gport, stor, &type)) { + printf("[%c|", type); + ptxt(txt); + printf("|%s|%s|%s]\n", stor, ghost, gport); + } else { + bool full = isfullurl(url); + const char *upfx = full ? "URL:" : ""; + type = full ? 'h' : guesstype(url, !!upfx); + printf("[%c|", type); + ptxt(txt); + printf("|%s%s|server|port]\n", upfx, url); + } +} + +struct prurl { + unsigned ref; + const char *url; +}; + +void print_ref(void *data) +{ + struct prurl *u = data; + printf("%4u. %s", u->ref, u->url); +} + +void print_onelink_ref(const char *url, unsigned ref) +{ + print_onelink(url, print_ref, &(struct prurl){ .ref = ref, .url = url }); +} + +void print_onelink_line(struct line *l, const char *url) +{ + print_onelink(url, print_text_only, l); +} + +const char *resolve_link(const char *link, const char *base, size_t bnl, size_t protol) +{ + if (!strncmp(link, base, bnl)) + return link + bnl; + if (!strncmp(link, base, protol)) + return link + protol; + return link; +} + +void dumprefs(char **refs, size_t n) +{ + unsigned r = 1; + unsigned i; + + for (i = 0; i < n; i++) + if (refs[i]) + break; + if (i == n) + return; + + printf("tReferences:\nt\n"); + for (; i < n; i++) + if (refs[i]) + print_onelink_ref(refs[i], r++); +} + +bool sa_pfxof(const char * const *pfxs, size_t n, const char *str) +{ + while (n--) + if (!strncmp(pfxs[n], str, strlen(pfxs[n]))) + return true; + return false; +} + +int main(int argc, char **argv) +{ + char linebuf[1024]; + char **refs; + struct lq q = { 0 }; + struct line *l; + const char *baseurl = argv[1]; + const char *prot = strstr(baseurl, "://"); + const char *bls = strrchr(baseurl, '/'); + + if (!baseurl || !prot || !(prot = strchr(prot + 3, '/'))) + return 1; + + while (fgets(linebuf, sizeof(linebuf), stdin)) { + size_t ll = strcspn(linebuf, "\r\n"); + + linebuf[ll] = '\0'; + + if (!strcmp(linebuf, "References")) + break; + + lq_push(&q, linebuf); + } + + refs = calloc((q.maxref + 1), sizeof(*refs)); + while (fgets(linebuf, sizeof(linebuf), stdin)) { + size_t ll = strcspn(linebuf, "\r\n"); + int urlpos; + char dummy[2]; + unsigned ref; + + linebuf[ll] = '\0'; + if (sscanf(linebuf, " %u%1[.] %n", &ref, dummy, &urlpos) < 2) + continue; + + refs[ref - 1] = strdup(resolve_link(linebuf + urlpos, baseurl, bls - baseurl + 1, prot - baseurl)); + if (sa_pfxof(gph_prefixes, sizeof(gph_prefixes) / sizeof(*gph_prefixes), refs[ref - 1])) { + char *dp = strrchr(refs[ref - 1], '.'); + if (dp && !strncmp(dp + 1, "htm", 3)) + strcpy(dp + 1, "gph"); + } + } + + for (l = q.head; l; l = l->next) { + unsigned oneref = onelink(l->segs); + + if (nolinks(l->segs)) { + print_text_line(l); + } else if (oneref && q.ref_n[oneref - 1] == 1) { + print_onelink_line(l, refs[oneref - 1]); + free(refs[oneref - 1]); + refs[oneref - 1] = NULL; + } else { + printline(l, refs); + } + } + + dumprefs(refs, q.maxref); + + return 0; +}