lynx2gph.c (7774B)
1 #define _POSIX_C_SOURCE 200809L 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <stdbool.h> 6 #include <ctype.h> 7 8 #include "config.h" 9 10 struct segment { 11 struct segment *next; 12 const char *text; 13 unsigned detail; 14 }; 15 16 struct line 17 { 18 struct segment *segs; 19 struct line *next; 20 char *text; 21 }; 22 23 struct lq { 24 unsigned *ref_n; 25 size_t maxref; 26 size_t sz; 27 struct line *head; 28 struct line *tail; 29 }; 30 31 static inline void *memdup(const void *data, size_t sz) 32 { 33 return memcpy(malloc(sz), data, sz); 34 } 35 36 #define dup(...) (memdup((__VA_ARGS__), sizeof(*(__VA_ARGS__)))) 37 38 struct cb_data { 39 struct lq *q; 40 struct segment **tail; 41 }; 42 43 static inline bool nolinks(const struct segment *segs) 44 { 45 return !segs || (segs->text && !segs->next); 46 } 47 48 static inline unsigned onelink(const struct segment *segs) 49 { 50 if (!segs) 51 return 0; 52 53 if (!segs->text) { 54 if (nolinks(segs->next)) 55 return segs->detail; 56 } else if (segs->next && !segs->next->text) { 57 if (nolinks(segs->next->next)) 58 return segs->next->detail; 59 } 60 61 return 0; 62 } 63 64 void seg_add(const char *text, unsigned detail, void *data) 65 { 66 struct cb_data *d = data; 67 struct segment *s = dup(&(struct segment){ .text = text, .detail = detail }); 68 69 *d->tail = (*d->tail)->next = s; 70 71 if (text) 72 return; 73 74 if (detail - 1 >= d->q->sz) { 75 size_t nsz = d->q->sz ? d->q->sz * 2 : 32; 76 while (detail - 1 >= nsz) 77 nsz *= 2; 78 d->q->ref_n = realloc(d->q->ref_n, nsz * sizeof(*d->q->ref_n)); 79 memset(d->q->ref_n + d->q->sz, 0, (nsz - d->q->sz) * sizeof(*d->q->ref_n)); 80 d->q->sz = nsz; 81 } 82 if (detail >= d->q->maxref) 83 d->q->maxref = detail - 1; 84 85 d->q->ref_n[detail - 1]++; 86 } 87 88 unsigned ref_foreach(const char *l, void (*cb)(const char *text, unsigned ref, void *data), void *data) 89 { 90 unsigned rv = 0; 91 int curref = -1; 92 const char *s; 93 const char *i; 94 95 for (i = l; *i; i++) { 96 switch (*i) { 97 case '[': 98 s = i; 99 curref = 0; 100 break; 101 case '0': 102 case '1': 103 case '2': 104 case '3': 105 case '4': 106 case '5': 107 case '6': 108 case '7': 109 case '8': 110 case '9': 111 if (curref >= 0) 112 curref = curref * 10 + *i - '0'; 113 break; 114 case ']': 115 if (curref < 0) 116 break; 117 if (s != l) 118 cb(l, s - l, data); 119 cb(NULL, curref, data); 120 l = i + 1; 121 rv++; 122 123 /* fall-through */ 124 default: 125 curref = -1; 126 } 127 } 128 129 if (*l) 130 cb(l, i - l, data); 131 132 return rv; 133 } 134 135 void lq_push(struct lq *q, const char *line) 136 { 137 struct line *l = malloc(sizeof(*l)); 138 139 if (q->tail) 140 q->tail->next = l; 141 else 142 q->head = l; 143 q->tail = l; 144 l->next = NULL; 145 l->text = strdup(line); 146 l->segs = NULL; 147 148 ref_foreach(l->text, seg_add, &(struct cb_data){ .q = q, .tail = (void *)&l }); 149 } 150 151 unsigned refof(const char * const *refs, unsigned ref) 152 { 153 unsigned n = 1; 154 while (--ref) 155 if (*refs++) 156 n++; 157 return n; 158 } 159 160 void printline(struct line *l, const char * const *refs) 161 { 162 struct segment *s; 163 164 printf("t"); 165 for (s = l->segs; s; s = s->next) { 166 if (s->text) 167 printf("%.*s", (int)s->detail, s->text); 168 else 169 printf("[%u]", refof(refs, s->detail)); 170 } 171 puts(""); 172 } 173 174 void print_text_line(struct line *l) 175 { 176 if (!l->segs) 177 printf("t\n"); 178 else 179 printf("t%.*s\n", (int)l->segs->detail, l->segs->text); 180 } 181 182 bool parse_gopherurl(const char *url, char *hostbuf, char *portbuf, char *selbuf, char *type) 183 { 184 size_t fs; 185 const char *s; 186 187 if (strncmp(url, "gopher", 6)) 188 return false; 189 url += 6; 190 if (*url == 's') 191 url++; 192 if (*url++ != ':' || *url++ != '/' || *url++ != '/') 193 return false; 194 195 fs = strcspn(url, "/"); 196 197 if (*url == '[') { 198 s = memchr(url, ']', fs); 199 if (!s) 200 return false; 201 sprintf(hostbuf, "%.*s", (int)(s - url - 1), url + 1); 202 if (s[1] == ':') 203 sprintf(portbuf, "%.*s", (int)(fs - (s - url) - 2), s + 2); 204 else 205 strcpy(portbuf, "70"); 206 } else if ((s = memchr(url, ':', fs))) { 207 sprintf(hostbuf, "%.*s", (int)(s - url), url); 208 sprintf(portbuf, "%.*s", (int)(fs - (s - url + 1)), s + 1); 209 } else { 210 sprintf(hostbuf, "%.*s", (int)fs, url); 211 strcpy(portbuf, "70"); 212 } 213 url += fs; 214 if (*url) 215 url++; 216 217 if (*url) 218 *type = *url++; 219 else 220 *type = '1'; 221 222 strcpy(selbuf, url); 223 224 return true; 225 } 226 227 void print_text_only(struct line *l) 228 { 229 struct segment *s; 230 231 for (s = l->segs; s; s = s->next) 232 if (s->text) 233 printf("%.*s", (int)s->detail, s->text); 234 } 235 236 const char *basename(const char *url) 237 { 238 const char *bn = strrchr(url, '/'); 239 if (bn) 240 return bn + 1; 241 return url; 242 } 243 244 const char *extof(const char *fn) 245 { 246 const char *del = strrchr(fn, '.'); 247 if (del) 248 return del + 1; 249 return fn; 250 } 251 252 char guesstype(const char *url, bool full) 253 { 254 const char *ext = extof(basename(url)); 255 size_t i; 256 257 if (!*ext) { 258 if (full) 259 return 'h'; 260 return '1'; 261 } 262 263 for (i = 0; i < sizeof(typemap) / sizeof(*typemap); i++) 264 if (!strcmp(ext, typemap[i].name)) 265 return typemap[i].type; 266 return '9'; 267 } 268 269 bool isfullurl(const char *s) 270 { 271 while (isalpha(*s)) 272 s++; 273 return !strncmp(s, "://", 3); 274 } 275 276 void print_onelink(const char *url, void (*ptxt)(void *txt), void *txt) 277 { 278 char ghost[512]; 279 char gport[24]; 280 char stor[512]; 281 char type; 282 283 if (parse_gopherurl(url, ghost, gport, stor, &type)) { 284 printf("[%c|", type); 285 ptxt(txt); 286 printf("|%s|%s|%s]\n", stor, ghost, gport); 287 } else { 288 bool full = isfullurl(url); 289 const char *upfx = full ? "URL:" : ""; 290 type = full ? 'h' : guesstype(url, !!upfx); 291 printf("[%c|", type); 292 ptxt(txt); 293 printf("|%s%s|server|port]\n", upfx, url); 294 } 295 } 296 297 struct prurl { 298 unsigned ref; 299 const char *url; 300 }; 301 302 void print_ref(void *data) 303 { 304 struct prurl *u = data; 305 printf("%4u. %s", u->ref, u->url); 306 } 307 308 void print_onelink_ref(const char *url, unsigned ref) 309 { 310 print_onelink(url, print_ref, &(struct prurl){ .ref = ref, .url = url }); 311 } 312 313 void print_onelink_line(struct line *l, const char *url) 314 { 315 print_onelink(url, print_text_only, l); 316 } 317 318 const char *resolve_link(const char *link, const char *base, size_t bnl, size_t protol) 319 { 320 if (!strncmp(link, base, bnl)) 321 return link + bnl; 322 if (!strncmp(link, base, protol)) 323 return link + protol; 324 return link; 325 } 326 327 void dumprefs(char **refs, size_t n) 328 { 329 unsigned r = 1; 330 unsigned i; 331 332 for (i = 0; i < n; i++) 333 if (refs[i]) 334 break; 335 if (i == n) 336 return; 337 338 printf("tReferences:\nt\n"); 339 for (; i < n; i++) 340 if (refs[i]) 341 print_onelink_ref(refs[i], r++); 342 } 343 344 bool sa_pfxof(const char * const *pfxs, size_t n, const char *str) 345 { 346 while (n--) 347 if (!strncmp(pfxs[n], str, strlen(pfxs[n]))) 348 return true; 349 return false; 350 } 351 352 int main(int argc, char **argv) 353 { 354 char linebuf[1024]; 355 char **refs; 356 struct lq q = { 0 }; 357 struct line *l; 358 const char *baseurl = argv[1]; 359 const char *prot = strstr(baseurl, "://"); 360 const char *bls = strrchr(baseurl, '/'); 361 362 if (!baseurl || !prot || !(prot = strchr(prot + 3, '/'))) 363 return 1; 364 365 while (fgets(linebuf, sizeof(linebuf), stdin)) { 366 size_t ll = strcspn(linebuf, "\r\n"); 367 368 linebuf[ll] = '\0'; 369 370 if (!strcmp(linebuf, "References")) 371 break; 372 373 lq_push(&q, linebuf); 374 } 375 376 refs = calloc((q.maxref + 1), sizeof(*refs)); 377 while (fgets(linebuf, sizeof(linebuf), stdin)) { 378 size_t ll = strcspn(linebuf, "\r\n"); 379 int urlpos; 380 char dummy[2]; 381 unsigned ref; 382 383 linebuf[ll] = '\0'; 384 if (sscanf(linebuf, " %u%1[.] %n", &ref, dummy, &urlpos) < 2) 385 continue; 386 387 refs[ref - 1] = strdup(resolve_link(linebuf + urlpos, baseurl, bls - baseurl + 1, prot - baseurl)); 388 if (sa_pfxof(gph_prefixes, sizeof(gph_prefixes) / sizeof(*gph_prefixes), refs[ref - 1])) { 389 char *dp = strrchr(refs[ref - 1], '.'); 390 if (dp && !strncmp(dp + 1, "htm", 3)) 391 strcpy(dp + 1, "gph"); 392 } 393 } 394 395 for (l = q.head; l; l = l->next) { 396 unsigned oneref = onelink(l->segs); 397 398 if (nolinks(l->segs)) { 399 print_text_line(l); 400 } else if (oneref && q.ref_n[oneref - 1] == 1) { 401 print_onelink_line(l, refs[oneref - 1]); 402 free(refs[oneref - 1]); 403 refs[oneref - 1] = NULL; 404 } else { 405 printline(l, refs); 406 } 407 } 408 409 dumprefs(refs, q.maxref); 410 411 return 0; 412 }