From c2d8cde86b5d3600190e2b4193df4dae5952a051 Mon Sep 17 00:00:00 2001 From: Luca <33791936+luca1197@users.noreply.github.com> Date: Thu, 12 Jan 2023 21:06:36 +0100 Subject: [PATCH] Trim website metadata title and description (#383) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: trim fetched metadata placeholders * feat: implement trimming serverside * Add website loader tests * Address review comments Co-authored-by: Sascha Ißbrücker --- bookmarks/services/website_loader.py | 4 ++-- bookmarks/tests/test_website_loader.py | 27 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/bookmarks/services/website_loader.py b/bookmarks/services/website_loader.py index fb3fc40..9835456 100644 --- a/bookmarks/services/website_loader.py +++ b/bookmarks/services/website_loader.py @@ -29,9 +29,9 @@ def load_website_metadata(url: str): page_text = load_page(url) soup = BeautifulSoup(page_text, 'html.parser') - title = soup.title.string if soup.title is not None else None + title = soup.title.string.strip() if soup.title is not None else None description_tag = soup.find('meta', attrs={'name': 'description'}) - description = description_tag['content'] if description_tag is not None else None + description = description = description_tag['content'].strip() if description_tag and description_tag['content'] else None finally: return WebsiteMetadata(url=url, title=title, description=description) diff --git a/bookmarks/tests/test_website_loader.py b/bookmarks/tests/test_website_loader.py index bc3bf9c..6ee5325 100644 --- a/bookmarks/tests/test_website_loader.py +++ b/bookmarks/tests/test_website_loader.py @@ -25,6 +25,19 @@ class MockStreamingResponse: class WebsiteLoaderTestCase(TestCase): + def render_html_document(self, title, description): + return f''' + + + + + {title} + + + + + ''' + def test_load_page_returns_content(self): with mock.patch('requests.get') as mock_get: mock_get.return_value = MockStreamingResponse(num_chunks=10, chunk_size=1024) @@ -51,3 +64,17 @@ class WebsiteLoaderTestCase(TestCase): # Should have read first chunk, and second chunk containing closing head tag expected_content_size = 1 * 1024 * 1000 + len('') self.assertEqual(expected_content_size, len(content)) + + def test_load_website_metadata(self): + with mock.patch('bookmarks.services.website_loader.load_page') as mock_load_page: + mock_load_page.return_value = self.render_html_document('test title', 'test description') + metadata = website_loader.load_website_metadata('https://example.com') + self.assertEqual('test title', metadata.title) + self.assertEqual('test description', metadata.description) + + def test_load_website_metadata_trims_title_and_description(self): + with mock.patch('bookmarks.services.website_loader.load_page') as mock_load_page: + mock_load_page.return_value = self.render_html_document(' test title ', ' test description ') + metadata = website_loader.load_website_metadata('https://example.com') + self.assertEqual('test title', metadata.title) + self.assertEqual('test description', metadata.description)