diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py index 5c8300d..8587343 100644 --- a/bookmarks/services/tasks.py +++ b/bookmarks/services/tasks.py @@ -8,6 +8,7 @@ from django.contrib.auth.models import User from waybackpy.exceptions import WaybackError from bookmarks.models import Bookmark, UserProfile +from bookmarks.services.website_loader import DEFAULT_USER_AGENT logger = logging.getLogger(__name__) @@ -38,10 +39,10 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool): logger.debug(f'Create web archive link for bookmark: {bookmark}...') - wayback = waybackpy.Url(bookmark.url) + archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT) try: - archive = wayback.save() + archive.save() except WaybackError as error: logger.exception(f'Error creating web archive link for bookmark: {bookmark}...', exc_info=error) raise diff --git a/bookmarks/services/website_loader.py b/bookmarks/services/website_loader.py index 30bbcc4..ef4cefa 100644 --- a/bookmarks/services/website_loader.py +++ b/bookmarks/services/website_loader.py @@ -45,11 +45,14 @@ def load_page(url: str): return str(results.best()) +DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' + + def fake_request_headers(): return { "Accept": "text/html,application/xhtml+xml,application/xml", "Accept-Encoding": "gzip, deflate", "Dnt": "1", "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36", + "User-Agent": DEFAULT_USER_AGENT, } diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py index 01c0f51..d4c7e31 100644 --- a/bookmarks/tests/test_bookmarks_tasks.py +++ b/bookmarks/tests/test_bookmarks_tasks.py @@ -10,15 +10,13 @@ from bookmarks.services import tasks from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging -class MockWaybackUrl: - +class MockWaybackMachineSaveAPI: def __init__(self, archive_url: str): self.archive_url = archive_url def save(self): return self - class MockWaybackUrlWithSaveError: def save(self): raise NotImplementedError @@ -52,7 +50,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def test_create_web_archive_snapshot_should_update_snapshot_url(self): bookmark = self.setup_bookmark() - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')): + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')): tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False) self.run_pending_task(tasks._create_web_archive_snapshot_task) bookmark.refresh_from_db() @@ -60,7 +58,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.assertEqual(bookmark.web_archive_snapshot_url, 'https://example.com') def test_create_web_archive_snapshot_should_handle_missing_bookmark_id(self): - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')) as mock_wayback_url: + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')) as mock_wayback_url: tasks._create_web_archive_snapshot_task(123, False) self.run_pending_task(tasks._create_web_archive_snapshot_task) @@ -69,7 +67,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def test_create_web_archive_snapshot_should_handle_wayback_save_error(self): bookmark = self.setup_bookmark() - with patch.object(waybackpy, 'Url', + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackUrlWithSaveError()): with self.assertRaises(NotImplementedError): tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False) @@ -78,7 +76,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def test_create_web_archive_snapshot_should_skip_if_snapshot_exists(self): bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com') - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')): + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')): tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, False) self.run_pending_task(tasks._create_web_archive_snapshot_task) bookmark.refresh_from_db() @@ -88,7 +86,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): def test_create_web_archive_snapshot_should_force_update_snapshot(self): bookmark = self.setup_bookmark(web_archive_snapshot_url='https://example.com') - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')): + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')): tasks.create_web_archive_snapshot(self.get_or_create_test_user(), bookmark, True) self.run_pending_task(tasks._create_web_archive_snapshot_task) bookmark.refresh_from_db() @@ -117,7 +115,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark() self.setup_bookmark() - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')): + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')): tasks.schedule_bookmarks_without_snapshots(user) self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task) self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task) @@ -131,7 +129,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(web_archive_snapshot_url='https://example.com') self.setup_bookmark(web_archive_snapshot_url='https://example.com') - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://other.com')): + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://other.com')): tasks.schedule_bookmarks_without_snapshots(user) self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task) self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task) @@ -149,7 +147,7 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.setup_bookmark(user=other_user) self.setup_bookmark(user=other_user) - with patch.object(waybackpy, 'Url', return_value=MockWaybackUrl('https://example.com')): + with patch.object(waybackpy, 'WaybackMachineSaveAPI', return_value=MockWaybackMachineSaveAPI('https://example.com')): tasks.schedule_bookmarks_without_snapshots(user) self.run_pending_task(tasks._schedule_bookmarks_without_snapshots_task) self.run_all_pending_tasks(tasks._create_web_archive_snapshot_task) diff --git a/requirements.prod.txt b/requirements.prod.txt index 46f5b79..ad5312f 100644 --- a/requirements.prod.txt +++ b/requirements.prod.txt @@ -22,4 +22,4 @@ supervisor==4.2.2 typing-extensions==3.10.0.0 urllib3==1.26.6 uWSGI==2.0.18 -waybackpy==2.4.3 +waybackpy==3.0.6 diff --git a/requirements.txt b/requirements.txt index ef8563d..cc832d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,4 +28,4 @@ soupsieve==1.9.2 sqlparse==0.4.2 typing-extensions==3.10.0.0 urllib3==1.26.6 -waybackpy==2.4.3 +waybackpy==3.0.6