commit b783f287c8b3e77bbd1eb94892ea645ba05e8770
parent 97ff66116b7633066375bbc6cc88be5b8587453b
Author: grunfink <grunfink@comam.es>
Date: Wed, 28 May 2025 07:56:44 +0200
New function rss_to_timeline().
Diffstat:
M | rss.c | | | 124 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | snac.h | | | 2 | ++ |
2 files changed, 126 insertions(+), 0 deletions(-)
diff --git a/rss.c b/rss.c
@@ -5,6 +5,8 @@
#include "xs_html.h"
#include "xs_regex.h"
#include "xs_time.h"
+#include "xs_match.h"
+#include "xs_curl.h"
#include "snac.h"
@@ -103,3 +105,125 @@ xs_str *rss_from_timeline(snac *user, const xs_list *timeline,
return xs_html_render_s(rss, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
}
+
+
+void rss_to_timeline(snac *user, const char *url)
+/* reads an RSS and inserts all ActivityPub posts into the user's timeline */
+{
+ xs *hdrs = xs_dict_new();
+ hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml");
+ hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT);
+
+ xs *payload = NULL;
+ int status;
+ int p_size;
+
+ xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0);
+
+ if (!valid_status(status) || !xs_is_string(payload))
+ return;
+
+ /* not an RSS? done */
+ const char *ctype = xs_dict_get(rsp, "content-type");
+ if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1)
+ return;
+
+ snac_log(user, xs_fmt("parsing RSS %s", url));
+
+ /* yes, parsing is done with regexes (now I have two problems blah blah blah) */
+ xs *links = xs_regex_select(payload, "<link>[^<]+</link>");
+ const char *link;
+
+ xs_list_foreach(links, link) {
+ xs *l = xs_replace(link, "<link>", "");
+ char *p = strchr(l, '<');
+
+ if (p == NULL)
+ continue;
+ *p = '\0';
+
+ /* skip this same URL */
+ if (strcmp(l, url) == 0)
+ continue;
+
+ snac_debug(user, 1, xs_fmt("RSS link: %s", l));
+
+ if (timeline_here(user, l)) {
+ snac_debug(user, 1, xs_fmt("RSS entry already in timeline %s", l));
+ continue;
+ }
+
+ /* special trick for Mastodon: convert from the alternate format */
+ if (strchr(l, '@') != NULL) {
+ xs *l2 = xs_split(l, "/");
+
+ if (xs_list_len(l2) == 5) {
+ const char *uid = xs_list_get(l2, 3);
+ if (*uid == '@') {
+ xs *guessed_id = xs_fmt("https:/" "/%s/users/%s/statuses/%s",
+ xs_list_get(l2, 2), uid + 1, xs_list_get(l2, -1));
+
+ if (timeline_here(user, guessed_id)) {
+ snac_debug(user, 1, xs_fmt("RSS entry already in timeline (alt) %s", guessed_id));
+ continue;
+ }
+ }
+ }
+ }
+
+ xs *obj = NULL;
+
+ if (!valid_status(object_get(l, &obj))) {
+ /* object is not here: bring it */
+ if (!valid_status(activitypub_request(user, l, &obj)))
+ continue;
+ }
+
+ if (xs_is_dict(obj)) {
+ const char *id = xs_dict_get(obj, "id");
+ const char *type = xs_dict_get(obj, "type");
+ const char *attr_to = get_atto(obj);
+
+ if (!xs_is_string(id) || !xs_is_string(type) || !xs_is_string(attr_to))
+ continue;
+
+ if (!xs_match(type, POSTLIKE_OBJECT_TYPE))
+ continue;
+
+ if (timeline_here(user, id)) {
+ snac_debug(user, 1, xs_fmt("RSS entry already in timeline (id) %s", id));
+ continue;
+ }
+
+ if (!valid_status(actor_request(user, attr_to, NULL)))
+ continue;
+
+ timeline_add(user, id, obj);
+ }
+ }
+}
+
+
+void rss_process(void)
+/* parses all RSS from all users */
+{
+ xs *list = user_list();
+ const char *uid;
+
+ xs_list_foreach(list, uid) {
+ snac user;
+
+ if (user_open(&user, uid)) {
+ const xs_list *rss = xs_dict_get(user.config, "rss");
+
+ if (xs_is_list(rss)) {
+ const char *url;
+
+ xs_list_foreach(rss, url)
+ rss_to_timeline(&user, url);
+ }
+
+ user_free(&user);
+ }
+ }
+}
diff --git a/snac.h b/snac.h
@@ -463,3 +463,5 @@ const char *lang_str(const char *str, const snac *user);
xs_str *rss_from_timeline(snac *user, const xs_list *timeline,
const char *title, const char *link, const char *desc);
+void rss_to_timeline(snac *user, const char *url);
+void rss_process(void);