Sanitize RSS feed to remove control characters (#565)

This commit is contained in:
Sascha Ißbrücker
2023-10-27 19:59:06 +02:00
committed by GitHub
parent 314e4a9b74
commit de328c78e2
2 changed files with 18 additions and 3 deletions

View File

@@ -1,11 +1,12 @@
import unicodedata
from dataclasses import dataclass from dataclasses import dataclass
from django.contrib.syndication.views import Feed from django.contrib.syndication.views import Feed
from django.db.models import QuerySet from django.db.models import QuerySet
from django.urls import reverse from django.urls import reverse
from bookmarks.models import Bookmark, BookmarkSearch, FeedToken
from bookmarks import queries from bookmarks import queries
from bookmarks.models import Bookmark, BookmarkSearch, FeedToken
@dataclass @dataclass
@@ -14,6 +15,12 @@ class FeedContext:
query_set: QuerySet[Bookmark] query_set: QuerySet[Bookmark]
def sanitize(text: str):
# remove control characters
valid_chars = ['\n', '\r', '\t']
return ''.join(ch for ch in text if ch in valid_chars or unicodedata.category(ch)[0] != 'C')
class BaseBookmarksFeed(Feed): class BaseBookmarksFeed(Feed):
def get_object(self, request, feed_key: str): def get_object(self, request, feed_key: str):
feed_token = FeedToken.objects.get(key__exact=feed_key) feed_token = FeedToken.objects.get(key__exact=feed_key)
@@ -22,10 +29,10 @@ class BaseBookmarksFeed(Feed):
return FeedContext(feed_token, query_set) return FeedContext(feed_token, query_set)
def item_title(self, item: Bookmark): def item_title(self, item: Bookmark):
return item.resolved_title return sanitize(item.resolved_title)
def item_description(self, item: Bookmark): def item_description(self, item: Bookmark):
return item.resolved_description return sanitize(item.resolved_description)
def item_link(self, item: Bookmark): def item_link(self, item: Bookmark):
return item.url return item.url

View File

@@ -104,6 +104,14 @@ class FeedsTestCase(TestCase, BookmarkFactoryMixin):
self.assertContains(response, '<item>', count=0) self.assertContains(response, '<item>', count=0)
def test_strip_control_characters(self):
self.setup_bookmark(title='test\n\r\t\0\x08title', description='test\n\r\t\0\x08description')
response = self.client.get(reverse('bookmarks:feeds.all', args=[self.token.key]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, '<item>', count=1)
self.assertContains(response, f'<title>test\n\r\ttitle</title>', count=1)
self.assertContains(response, f'<description>test\n\r\tdescription</description>', count=1)
def test_unread_returns_404_for_unknown_feed_token(self): def test_unread_returns_404_for_unknown_feed_token(self):
response = self.client.get(reverse('bookmarks:feeds.unread', args=['foo'])) response = self.client.get(reverse('bookmarks:feeds.unread', args=['foo']))