Getting Bluesky (or Fediverse) RSS feeds to work
1 minute read •
Some popular news outlets don't have RSS feeds, but they do have Fediverse/Bluesky accounts. Bluesky supposedly allows rss feeds if you add /rss at the end of a profile URL, but for some reason, I couldn't get it to work OOTB.
No worries, worked w/ Claude to vibe-code a workaround.
- Grab the profile URL for the bsky page you want to follow
- The script below fetches content for newsraft. Make it executable and place in $PATH:
#!/usr/bin/env python3
"""Fetch microblog content and normalize to RSS for newsraft.
Handles two input types:
- Standard RSS/Atom feed URL: patches titleless items, rewrites <link>
to the first external URL found in the post body.
- Bluesky AT-URI (at://did:plc:.../app.bsky.feed.generator/<rkey>):
calls the unauthenticated getFeed XRPC endpoint and emits RSS.
Usage: fediverseXml.sh <feed_url_or_at_uri> <output_file>
"""
import json
import re
import sys
import urllib.parse
import urllib.request
from email.utils import formatdate
from xml.etree import ElementTree as ET
from xml.sax.saxutils import escape
# Domains that are the post itself, not external content
SELF_DOMAINS = {"mstdn.social", "bsky.app", "mastodon.social"}
BSKY_XRPC = "https://public.api.bsky.app/xrpc/app.bsky.feed.getFeed"
UA = "Mozilla/5.0 (fediverseXml)"
def strip_html(text):
text = re.sub(r"<br\s*/?>", " ", text)
text = re.sub(r"<[^>]+>", "", text)
return " ".join(text.split())
def find_external_url(text):
plain = strip_html(text)
urls = re.findall(r"https?://[^\s<>\"')\]]+", plain)
for url in urls:
if not any(d in url for d in SELF_DOMAINS):
return url.rstrip(".,;:")
return None
def fetch(url):
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=20) as resp:
return resp.read()
def patch_rss(feed_url, output):
data = fetch(feed_url)
tree = ET.ElementTree(ET.fromstring(data))
for item in tree.findall(".//item"):
title = item.find("title")
desc = item.find("description")
link = item.find("link")
if desc is None:
continue
text = strip_html(desc.text or "")
if title is None:
title = ET.SubElement(item, "title")
if not (title.text or "").strip():
title.text = text[:120]
ext_url = find_external_url(desc.text or "")
if ext_url and link is not None:
link.text = ext_url
tree.write(output, encoding="unicode", xml_declaration=True)
def build_rss_from_bsky(at_uri, output):
qs = urllib.parse.urlencode({"feed": at_uri, "limit": 30})
data = json.loads(fetch(f"{BSKY_XRPC}?{qs}"))
items = []
for entry in data.get("feed", []):
post = entry.get("post", {})
record = post.get("record", {}) or {}
text = (record.get("text") or "").strip()
if not text:
continue
# Prefer external embed URL; fall back to first URL in text;
# last resort is the bsky post itself
embed = post.get("embed") or {}
ext = (embed.get("external") or {}).get("uri")
link = ext or find_external_url(text) or post_url(post)
title = text[:120]
pub = record.get("createdAt") or post.get("indexedAt")
items.append((title, link, text, pub))
channel_title = at_uri.rsplit("/", 1)[-1]
write_rss(output, channel_title, items)
def post_url(post):
"""Build a bsky.app post URL from an at-uri post."""
uri = post.get("uri", "")
# at://did:plc:xxx/app.bsky.feed.post/<rkey>
m = re.match(r"at://([^/]+)/app\.bsky\.feed\.post/(.+)", uri)
if not m:
return "https://bsky.app"
did, rkey = m.groups()
return f"https://bsky.app/profile/{did}/post/{rkey}"
def write_rss(output, channel_title, items):
rfc822 = lambda iso: formatdate(localtime=False) if not iso else formatdate(
timeval=__import__("datetime").datetime.fromisoformat(
iso.replace("Z", "+00:00")
).timestamp(),
usegmt=True,
)
parts = [
'<?xml version="1.0" encoding="utf-8"?>',
'<rss version="2.0"><channel>',
f"<title>{escape(channel_title)}</title>",
"<link>https://bsky.app</link>",
f"<description>{escape(channel_title)}</description>",
]
for title, link, text, pub in items:
parts.append("<item>")
parts.append(f"<title>{escape(title)}</title>")
parts.append(f"<link>{escape(link)}</link>")
parts.append(f"<description>{escape(text)}</description>")
parts.append(f"<guid isPermaLink=\"false\">{escape(link)}</guid>")
parts.append(f"<pubDate>{rfc822(pub)}</pubDate>")
parts.append("</item>")
parts.append("</channel></rss>")
with open(output, "w", encoding="utf-8") as f:
f.write("".join(parts))
def main():
if len(sys.argv) != 3:
print(f"Usage: {sys.argv[0]} <feed_url_or_at_uri> <output_file>",
file=sys.stderr)
sys.exit(1)
feed_url, output = sys.argv[1], sys.argv[2]
try:
if feed_url.startswith("at://"):
build_rss_from_bsky(feed_url, output)
else:
patch_rss(feed_url, output)
except urllib.error.HTTPError as e:
print(f"{feed_url}: HTTP {e.code}", file=sys.stderr)
sys.exit(1)
except urllib.error.URLError as e:
print(f"{feed_url}: {e.reason}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"{feed_url}: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
- Create a file called
xmlFeeds.confin$XDG_CONFIG_HOME/newsraft/feedsor$HOME/.config/newsraft/feedsor$HOME/.newsraft/feeds - Place the profile URL in th file, followed by a file name that ends in
xml. It'll look something like this:
https://bsky.app/profile/did:plc:2iotnpqcz2pcanhti3apjs5j/rss wttw.xml- In newsraft's
feedsfile, point it to your newly created file (in my case,wttw.xml).
#inside of `feeds` file in newsraft/config. I left some examples on excluding irrelevant content ;)
file:///path/to/newsraft/config/wttw.xml "WTTW" < item-rule NOT (title LIKE '%Blackhawks%' OR title LIKE '%Sox%' OR title LIKE '%Cubs%' OR title LIKE '%Bulls%' OR title LIKE '%Bears%')- So now, when you manually refresh the python script above, it will refresh the feed. It would be preferable that this runs whenever I open newsraft, so I created a function inside of
.bash_aliases:
nr() {
echo "Loading .xml files.."
conf="${XDG_CONFIG_HOME:-$HOME/.config}/newsraft/xmlFeeds.conf"
dir="$HOME/path/to/newsraft/config"
while read -r url file; do
[ -z "$url" ] && continue
fediverseXml.sh "$url" "$dir/$file"
echo "Reading links..."
done < "$conf"
command newsraft "$@"
}