Add black code formatter

This commit is contained in:
Sascha Ißbrücker
2024-01-27 11:29:16 +01:00
parent 6775633be5
commit 98b9a9c1a0
128 changed files with 7181 additions and 4264 deletions

View File

@@ -11,7 +11,9 @@ from bookmarks.services import tasks
def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
# If URL is already bookmarked, then update it
existing_bookmark: Bookmark = Bookmark.objects.filter(owner=current_user, url=bookmark.url).first()
existing_bookmark: Bookmark = Bookmark.objects.filter(
owner=current_user, url=bookmark.url
).first()
if existing_bookmark is not None:
_merge_bookmark_data(bookmark, existing_bookmark)
@@ -68,8 +70,9 @@ def archive_bookmark(bookmark: Bookmark):
def archive_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(is_archived=True,
date_modified=timezone.now())
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
is_archived=True, date_modified=timezone.now()
)
def unarchive_bookmark(bookmark: Bookmark):
@@ -82,8 +85,9 @@ def unarchive_bookmark(bookmark: Bookmark):
def unarchive_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(is_archived=False,
date_modified=timezone.now())
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
is_archived=False, date_modified=timezone.now()
)
def delete_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
@@ -94,8 +98,9 @@ def delete_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
def tag_bookmarks(bookmark_ids: [Union[int, str]], tag_string: str, current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
owned_bookmark_ids = Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).values_list('id',
flat=True)
owned_bookmark_ids = Bookmark.objects.filter(
owner=current_user, id__in=sanitized_bookmark_ids
).values_list("id", flat=True)
tag_names = parse_tag_string(tag_string)
tags = get_or_create_tags(tag_names, current_user)
@@ -103,54 +108,69 @@ def tag_bookmarks(bookmark_ids: [Union[int, str]], tag_string: str, current_user
relationships = []
for tag in tags:
for bookmark_id in owned_bookmark_ids:
relationships.append(BookmarkToTagRelationShip(bookmark_id=bookmark_id, tag=tag))
relationships.append(
BookmarkToTagRelationShip(bookmark_id=bookmark_id, tag=tag)
)
# Insert all bookmark -> tag associations at once, should ignore errors if association already exists
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(date_modified=timezone.now())
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(
date_modified=timezone.now()
)
def untag_bookmarks(bookmark_ids: [Union[int, str]], tag_string: str, current_user: User):
def untag_bookmarks(
bookmark_ids: [Union[int, str]], tag_string: str, current_user: User
):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
owned_bookmark_ids = Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).values_list('id',
flat=True)
owned_bookmark_ids = Bookmark.objects.filter(
owner=current_user, id__in=sanitized_bookmark_ids
).values_list("id", flat=True)
tag_names = parse_tag_string(tag_string)
tags = get_or_create_tags(tag_names, current_user)
BookmarkToTagRelationShip = Bookmark.tags.through
for tag in tags:
# Remove all bookmark -> tag associations for the owned bookmarks and the current tag
BookmarkToTagRelationShip.objects.filter(bookmark_id__in=owned_bookmark_ids, tag=tag).delete()
BookmarkToTagRelationShip.objects.filter(
bookmark_id__in=owned_bookmark_ids, tag=tag
).delete()
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(date_modified=timezone.now())
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(
date_modified=timezone.now()
)
def mark_bookmarks_as_read(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(unread=False,
date_modified=timezone.now())
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
unread=False, date_modified=timezone.now()
)
def mark_bookmarks_as_unread(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(unread=True,
date_modified=timezone.now())
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
unread=True, date_modified=timezone.now()
)
def share_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(shared=True,
date_modified=timezone.now())
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
shared=True, date_modified=timezone.now()
)
def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(shared=False,
date_modified=timezone.now())
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
shared=False, date_modified=timezone.now()
)
def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark):

View File

@@ -13,40 +13,41 @@ def export_netscape_html(bookmarks: List[Bookmark]):
[append_bookmark(doc, bookmark) for bookmark in bookmarks]
append_list_end(doc)
return '\n\r'.join(doc)
return "\n\r".join(doc)
def append_header(doc: BookmarkDocument):
doc.append('<!DOCTYPE NETSCAPE-Bookmark-file-1>')
doc.append("<!DOCTYPE NETSCAPE-Bookmark-file-1>")
doc.append('<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">')
doc.append('<TITLE>Bookmarks</TITLE>')
doc.append('<H1>Bookmarks</H1>')
doc.append("<TITLE>Bookmarks</TITLE>")
doc.append("<H1>Bookmarks</H1>")
def append_list_start(doc: BookmarkDocument):
doc.append('<DL><p>')
doc.append("<DL><p>")
def append_bookmark(doc: BookmarkDocument, bookmark: Bookmark):
url = bookmark.url
title = html.escape(bookmark.resolved_title or '')
desc = html.escape(bookmark.resolved_description or '')
title = html.escape(bookmark.resolved_title or "")
desc = html.escape(bookmark.resolved_description or "")
if bookmark.notes:
desc += f'[linkding-notes]{html.escape(bookmark.notes)}[/linkding-notes]'
desc += f"[linkding-notes]{html.escape(bookmark.notes)}[/linkding-notes]"
tag_names = bookmark.tag_names
if bookmark.is_archived:
tag_names.append('linkding:archived')
tags = ','.join(tag_names)
toread = '1' if bookmark.unread else '0'
private = '0' if bookmark.shared else '1'
tag_names.append("linkding:archived")
tags = ",".join(tag_names)
toread = "1" if bookmark.unread else "0"
private = "0" if bookmark.shared else "1"
added = int(bookmark.date_added.timestamp())
doc.append(
f'<DT><A HREF="{url}" ADD_DATE="{added}" PRIVATE="{private}" TOREAD="{toread}" TAGS="{tags}">{title}</A>')
f'<DT><A HREF="{url}" ADD_DATE="{added}" PRIVATE="{private}" TOREAD="{toread}" TAGS="{tags}">{title}</A>'
)
if desc:
doc.append(f'<DD>{desc}')
doc.append(f"<DD>{desc}")
def append_list_end(doc: BookmarkDocument):
doc.append('</DL><p>')
doc.append("</DL><p>")

View File

@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
# register mime type for .ico files, which is not included in the default
# mimetypes of the Docker image
mimetypes.add_type('image/x-icon', '.ico')
mimetypes.add_type("image/x-icon", ".ico")
def _ensure_favicon_folder():
@@ -23,16 +23,16 @@ def _ensure_favicon_folder():
def _url_to_filename(url: str) -> str:
return re.sub(r'\W+', '_', url)
return re.sub(r"\W+", "_", url)
def _get_url_parameters(url: str) -> dict:
parsed_uri = urlparse(url)
return {
# https://example.com/foo?bar -> https://example.com
'url': f'{parsed_uri.scheme}://{parsed_uri.hostname}',
"url": f"{parsed_uri.scheme}://{parsed_uri.hostname}",
# https://example.com/foo?bar -> example.com
'domain': parsed_uri.hostname,
"domain": parsed_uri.hostname,
}
@@ -63,21 +63,21 @@ def load_favicon(url: str) -> str:
# Create favicon folder if not exists
_ensure_favicon_folder()
# Use scheme+hostname as favicon filename to reuse icon for all pages on the same domain
favicon_name = _url_to_filename(url_parameters['url'])
favicon_name = _url_to_filename(url_parameters["url"])
favicon_file = _check_existing_favicon(favicon_name)
if not favicon_file:
# Load favicon from provider, save to file
favicon_url = settings.LD_FAVICON_PROVIDER.format(**url_parameters)
logger.debug(f'Loading favicon from: {favicon_url}')
logger.debug(f"Loading favicon from: {favicon_url}")
with requests.get(favicon_url, stream=True) as response:
content_type = response.headers['Content-Type']
content_type = response.headers["Content-Type"]
file_extension = mimetypes.guess_extension(content_type)
favicon_file = f'{favicon_name}{file_extension}'
favicon_file = f"{favicon_name}{file_extension}"
favicon_path = _get_favicon_path(favicon_file)
with open(favicon_path, 'wb') as file:
with open(favicon_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
logger.debug(f'Saved favicon as: {favicon_path}')
logger.debug(f"Saved favicon as: {favicon_path}")
return favicon_file

View File

@@ -55,18 +55,20 @@ class TagCache:
self.cache[tag.name.lower()] = tag
def import_netscape_html(html: str, user: User, options: ImportOptions = ImportOptions()) -> ImportResult:
def import_netscape_html(
html: str, user: User, options: ImportOptions = ImportOptions()
) -> ImportResult:
result = ImportResult()
import_start = timezone.now()
try:
netscape_bookmarks = parse(html)
except:
logging.exception('Could not read bookmarks file.')
logging.exception("Could not read bookmarks file.")
raise
parse_end = timezone.now()
logger.debug(f'Parse duration: {parse_end - import_start}')
logger.debug(f"Parse duration: {parse_end - import_start}")
# Create and cache all tags beforehand
_create_missing_tags(netscape_bookmarks, user)
@@ -83,7 +85,7 @@ def import_netscape_html(html: str, user: User, options: ImportOptions = ImportO
tasks.schedule_bookmarks_without_favicons(user)
end = timezone.now()
logger.debug(f'Import duration: {end - import_start}')
logger.debug(f"Import duration: {end - import_start}")
return result
@@ -110,7 +112,7 @@ def _get_batches(items: List, batch_size: int):
num_items = len(items)
while offset < num_items:
batch = items[offset:min(offset + batch_size, num_items)]
batch = items[offset : min(offset + batch_size, num_items)]
if len(batch) > 0:
batches.append(batch)
offset = offset + batch_size
@@ -118,11 +120,13 @@ def _get_batches(items: List, batch_size: int):
return batches
def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
user: User,
options: ImportOptions,
tag_cache: TagCache,
result: ImportResult):
def _import_batch(
netscape_bookmarks: List[NetscapeBookmark],
user: User,
options: ImportOptions,
tag_cache: TagCache,
result: ImportResult,
):
# Query existing bookmarks
batch_urls = [bookmark.href for bookmark in netscape_bookmarks]
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
@@ -136,7 +140,13 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
try:
# Lookup existing bookmark by URL, or create new bookmark if there is no bookmark for that URL yet
bookmark = next(
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
(
bookmark
for bookmark in existing_bookmarks
if bookmark.url == netscape_bookmark.href
),
None,
)
if not bookmark:
bookmark = Bookmark(owner=user)
is_update = False
@@ -146,7 +156,7 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
_copy_bookmark_data(netscape_bookmark, bookmark, options)
# Validate bookmark fields, exclude owner to prevent n+1 database query,
# also there is no specific validation on owner
bookmark.clean_fields(exclude=['owner'])
bookmark.clean_fields(exclude=["owner"])
# Schedule for update or insert
if is_update:
bookmarks_to_update.append(bookmark)
@@ -155,20 +165,25 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
result.success = result.success + 1
except:
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str)
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + "..."
logging.exception("Error importing bookmark: " + shortened_bookmark_tag_str)
result.failed = result.failed + 1
# Bulk update bookmarks in DB
Bookmark.objects.bulk_update(bookmarks_to_update, ['url',
'date_added',
'date_modified',
'unread',
'shared',
'title',
'description',
'notes',
'owner'])
Bookmark.objects.bulk_update(
bookmarks_to_update,
[
"url",
"date_added",
"date_modified",
"unread",
"shared",
"title",
"description",
"notes",
"owner",
],
)
# Bulk insert new bookmarks into DB
Bookmark.objects.bulk_create(bookmarks_to_create)
@@ -183,13 +198,20 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
for netscape_bookmark in netscape_bookmarks:
# Lookup bookmark by URL again
bookmark = next(
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
(
bookmark
for bookmark in existing_bookmarks
if bookmark.url == netscape_bookmark.href
),
None,
)
if not bookmark:
# Something is wrong, we should have just created this bookmark
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + "..."
logging.warning(
f'Failed to assign tags to the bookmark: {shortened_bookmark_tag_str}. Could not find bookmark by URL.')
f"Failed to assign tags to the bookmark: {shortened_bookmark_tag_str}. Could not find bookmark by URL."
)
continue
# Get tag models by string, schedule inserts for bookmark -> tag associations
@@ -201,7 +223,9 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark, options: ImportOptions):
def _copy_bookmark_data(
netscape_bookmark: NetscapeBookmark, bookmark: Bookmark, options: ImportOptions
):
bookmark.url = netscape_bookmark.href
if netscape_bookmark.date_added:
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)

View File

@@ -25,29 +25,29 @@ class BookmarkParser(HTMLParser):
self.current_tag = None
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
self.notes = ''
self.toread = ''
self.private = ''
self.href = ""
self.add_date = ""
self.tags = ""
self.title = ""
self.description = ""
self.notes = ""
self.toread = ""
self.private = ""
def handle_starttag(self, tag: str, attrs: list):
name = 'handle_start_' + tag.lower()
name = "handle_start_" + tag.lower()
if name in dir(self):
getattr(self, name)({k.lower(): v for k, v in attrs})
self.current_tag = tag
def handle_endtag(self, tag: str):
name = 'handle_end_' + tag.lower()
name = "handle_end_" + tag.lower()
if name in dir(self):
getattr(self, name)()
self.current_tag = None
def handle_data(self, data):
name = f'handle_{self.current_tag}_data'
name = f"handle_{self.current_tag}_data"
if name in dir(self):
getattr(self, name)(data)
@@ -60,22 +60,22 @@ class BookmarkParser(HTMLParser):
def handle_start_a(self, attrs: Dict[str, str]):
vars(self).update(attrs)
tag_names = parse_tag_string(self.tags)
archived = 'linkding:archived' in self.tags
archived = "linkding:archived" in self.tags
try:
tag_names.remove('linkding:archived')
tag_names.remove("linkding:archived")
except ValueError:
pass
self.bookmark = NetscapeBookmark(
href=self.href,
title='',
description='',
notes='',
title="",
description="",
notes="",
date_added=self.add_date,
tag_names=tag_names,
to_read=self.toread == '1',
to_read=self.toread == "1",
# Mark as private by default, also when attribute is not specified
private=self.private != '0',
private=self.private != "0",
archived=archived,
)
@@ -84,9 +84,9 @@ class BookmarkParser(HTMLParser):
def handle_dd_data(self, data):
desc = data.strip()
if '[linkding-notes]' in desc:
self.notes = desc.split('[linkding-notes]')[1].split('[/linkding-notes]')[0]
self.description = desc.split('[linkding-notes]')[0]
if "[linkding-notes]" in desc:
self.notes = desc.split("[linkding-notes]")[1].split("[/linkding-notes]")[0]
self.description = desc.split("[linkding-notes]")[0]
def add_bookmark(self):
if self.bookmark:
@@ -95,14 +95,14 @@ class BookmarkParser(HTMLParser):
self.bookmark.notes = self.notes
self.bookmarks.append(self.bookmark)
self.bookmark = None
self.href = ''
self.add_date = ''
self.tags = ''
self.title = ''
self.description = ''
self.notes = ''
self.toread = ''
self.private = ''
self.href = ""
self.add_date = ""
self.tags = ""
self.title = ""
self.description = ""
self.notes = ""
self.toread = ""
self.private = ""
def parse(html: str) -> List[NetscapeBookmark]:

View File

@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
def get_or_create_tags(tag_names: List[str], user: User):
tags = [get_or_create_tag(tag_name, user) for tag_name in tag_names]
return unique(tags, operator.attrgetter('id'))
return unique(tags, operator.attrgetter("id"))
def get_or_create_tag(name: str, user: User):

View File

@@ -18,8 +18,10 @@ logger = logging.getLogger(__name__)
def is_web_archive_integration_active(user: User) -> bool:
background_tasks_enabled = not settings.LD_DISABLE_BACKGROUND_TASKS
web_archive_integration_enabled = \
user.profile.web_archive_integration == UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED
web_archive_integration_enabled = (
user.profile.web_archive_integration
== UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED
)
return background_tasks_enabled and web_archive_integration_enabled
@@ -31,28 +33,36 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
def _load_newest_snapshot(bookmark: Bookmark):
try:
logger.info(f'Load existing snapshot for bookmark. url={bookmark.url}')
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(bookmark.url)
logger.info(f"Load existing snapshot for bookmark. url={bookmark.url}")
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(
bookmark.url
)
existing_snapshot = cdx_api.newest()
if existing_snapshot:
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
bookmark.save(update_fields=['web_archive_snapshot_url'])
logger.info(f'Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}')
bookmark.save(update_fields=["web_archive_snapshot_url"])
logger.info(
f"Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}"
)
except NoCDXRecordFound:
logger.info(f'Could not find any snapshots for bookmark. url={bookmark.url}')
logger.info(f"Could not find any snapshots for bookmark. url={bookmark.url}")
except WaybackError as error:
logger.error(f'Failed to load existing snapshot. url={bookmark.url}', exc_info=error)
logger.error(
f"Failed to load existing snapshot. url={bookmark.url}", exc_info=error
)
def _create_snapshot(bookmark: Bookmark):
logger.info(f'Create new snapshot for bookmark. url={bookmark.url}...')
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT, max_tries=1)
logger.info(f"Create new snapshot for bookmark. url={bookmark.url}...")
archive = waybackpy.WaybackMachineSaveAPI(
bookmark.url, DEFAULT_USER_AGENT, max_tries=1
)
archive.save()
bookmark.web_archive_snapshot_url = archive.archive_url
bookmark.save(update_fields=['web_archive_snapshot_url'])
logger.info(f'Successfully created new snapshot for bookmark:. url={bookmark.url}')
bookmark.save(update_fields=["web_archive_snapshot_url"])
logger.info(f"Successfully created new snapshot for bookmark:. url={bookmark.url}")
@background()
@@ -72,10 +82,13 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
return
except TooManyRequestsError:
logger.error(
f'Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}')
f"Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}"
)
except WaybackError as error:
logger.error(f'Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}',
exc_info=error)
logger.error(
f"Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}",
exc_info=error,
)
# Load the newest snapshot as fallback
_load_newest_snapshot(bookmark)
@@ -102,7 +115,9 @@ def schedule_bookmarks_without_snapshots(user: User):
@background()
def _schedule_bookmarks_without_snapshots_task(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
bookmarks_without_snapshots = Bookmark.objects.filter(
web_archive_snapshot_url__exact="", owner=user
)
for bookmark in bookmarks_without_snapshots:
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
@@ -128,14 +143,16 @@ def _load_favicon_task(bookmark_id: int):
except Bookmark.DoesNotExist:
return
logger.info(f'Load favicon for bookmark. url={bookmark.url}')
logger.info(f"Load favicon for bookmark. url={bookmark.url}")
new_favicon_file = favicon_loader.load_favicon(bookmark.url)
if new_favicon_file != bookmark.favicon_file:
bookmark.favicon_file = new_favicon_file
bookmark.save(update_fields=['favicon_file'])
logger.info(f'Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon_file}')
bookmark.save(update_fields=["favicon_file"])
logger.info(
f"Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon_file}"
)
def schedule_bookmarks_without_favicons(user: User):
@@ -146,11 +163,13 @@ def schedule_bookmarks_without_favicons(user: User):
@background()
def _schedule_bookmarks_without_favicons_task(user_id: int):
user = get_user_model().objects.get(id=user_id)
bookmarks = Bookmark.objects.filter(favicon_file__exact='', owner=user)
bookmarks = Bookmark.objects.filter(favicon_file__exact="", owner=user)
tasks = []
for bookmark in bookmarks:
task = Task.objects.new_task(task_name='bookmarks.services.tasks._load_favicon_task', args=(bookmark.id,))
task = Task.objects.new_task(
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
)
tasks.append(task)
Task.objects.bulk_create(tasks)
@@ -168,7 +187,9 @@ def _schedule_refresh_favicons_task(user_id: int):
tasks = []
for bookmark in bookmarks:
task = Task.objects.new_task(task_name='bookmarks.services.tasks._load_favicon_task', args=(bookmark.id,))
task = Task.objects.new_task(
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
)
tasks.append(task)
Task.objects.bulk_create(tasks)

View File

@@ -14,8 +14,10 @@ class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
def newest(self):
unix_timestamp = int(time.time())
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
self.sort = 'closest'
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(
unix_timestamp
)
self.sort = "closest"
self.limit = -5
newest_snapshot = None
@@ -37,4 +39,4 @@ class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
super().add_payload(payload)
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
# makes searching for latest snapshots faster
payload['fastLatest'] = 'true'
payload["fastLatest"] = "true"

View File

@@ -18,9 +18,9 @@ class WebsiteMetadata:
def to_dict(self):
return {
'url': self.url,
'title': self.title,
'description': self.description,
"url": self.url,
"title": self.title,
"description": self.description,
}
@@ -34,22 +34,29 @@ def load_website_metadata(url: str):
start = timezone.now()
page_text = load_page(url)
end = timezone.now()
logger.debug(f'Load duration: {end - start}')
logger.debug(f"Load duration: {end - start}")
start = timezone.now()
soup = BeautifulSoup(page_text, 'html.parser')
soup = BeautifulSoup(page_text, "html.parser")
title = soup.title.string.strip() if soup.title is not None else None
description_tag = soup.find('meta', attrs={'name': 'description'})
description = description_tag['content'].strip() if description_tag and description_tag[
'content'] else None
description_tag = soup.find("meta", attrs={"name": "description"})
description = (
description_tag["content"].strip()
if description_tag and description_tag["content"]
else None
)
if not description:
description_tag = soup.find('meta', attrs={'property': 'og:description'})
description = description_tag['content'].strip() if description_tag and description_tag['content'] else None
description_tag = soup.find("meta", attrs={"property": "og:description"})
description = (
description_tag["content"].strip()
if description_tag and description_tag["content"]
else None
)
end = timezone.now()
logger.debug(f'Parsing duration: {end - start}')
logger.debug(f"Parsing duration: {end - start}")
finally:
return WebsiteMetadata(url=url, title=title, description=description)
@@ -73,30 +80,30 @@ def load_page(url: str):
else:
content = content + chunk
logger.debug(f'Loaded chunk (iteration={iteration}, total={size / 1024})')
logger.debug(f"Loaded chunk (iteration={iteration}, total={size / 1024})")
# Stop reading if we have parsed end of head tag
end_of_head = '</head>'.encode('utf-8')
end_of_head = "</head>".encode("utf-8")
if end_of_head in content:
logger.debug(f'Found closing head tag after {size} bytes')
logger.debug(f"Found closing head tag after {size} bytes")
content = content.split(end_of_head)[0] + end_of_head
break
# Stop reading if we exceed limit
if size > MAX_CONTENT_LIMIT:
logger.debug(f'Cancel reading document after {size} bytes')
logger.debug(f"Cancel reading document after {size} bytes")
break
if hasattr(r, '_content_consumed'):
logger.debug(f'Request consumed: {r._content_consumed}')
if hasattr(r, "_content_consumed"):
logger.debug(f"Request consumed: {r._content_consumed}")
# Use charset_normalizer to determine encoding that best matches the response content
# Several sites seem to specify the response encoding incorrectly, so we ignore it and use custom logic instead
# This is different from Response.text which does respect the encoding specified in the response first,
# before trying to determine one
results = from_bytes(content or '')
results = from_bytes(content or "")
return str(results.best())
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36"
def fake_request_headers():