mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-08-13 21:49:26 +02:00
Add black code formatter
This commit is contained in:
@@ -11,7 +11,9 @@ from bookmarks.services import tasks
|
||||
|
||||
def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User):
|
||||
# If URL is already bookmarked, then update it
|
||||
existing_bookmark: Bookmark = Bookmark.objects.filter(owner=current_user, url=bookmark.url).first()
|
||||
existing_bookmark: Bookmark = Bookmark.objects.filter(
|
||||
owner=current_user, url=bookmark.url
|
||||
).first()
|
||||
|
||||
if existing_bookmark is not None:
|
||||
_merge_bookmark_data(bookmark, existing_bookmark)
|
||||
@@ -68,8 +70,9 @@ def archive_bookmark(bookmark: Bookmark):
|
||||
def archive_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(is_archived=True,
|
||||
date_modified=timezone.now())
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
|
||||
is_archived=True, date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def unarchive_bookmark(bookmark: Bookmark):
|
||||
@@ -82,8 +85,9 @@ def unarchive_bookmark(bookmark: Bookmark):
|
||||
def unarchive_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(is_archived=False,
|
||||
date_modified=timezone.now())
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
|
||||
is_archived=False, date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def delete_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
@@ -94,8 +98,9 @@ def delete_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
|
||||
def tag_bookmarks(bookmark_ids: [Union[int, str]], tag_string: str, current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
owned_bookmark_ids = Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).values_list('id',
|
||||
flat=True)
|
||||
owned_bookmark_ids = Bookmark.objects.filter(
|
||||
owner=current_user, id__in=sanitized_bookmark_ids
|
||||
).values_list("id", flat=True)
|
||||
tag_names = parse_tag_string(tag_string)
|
||||
tags = get_or_create_tags(tag_names, current_user)
|
||||
|
||||
@@ -103,54 +108,69 @@ def tag_bookmarks(bookmark_ids: [Union[int, str]], tag_string: str, current_user
|
||||
relationships = []
|
||||
for tag in tags:
|
||||
for bookmark_id in owned_bookmark_ids:
|
||||
relationships.append(BookmarkToTagRelationShip(bookmark_id=bookmark_id, tag=tag))
|
||||
relationships.append(
|
||||
BookmarkToTagRelationShip(bookmark_id=bookmark_id, tag=tag)
|
||||
)
|
||||
|
||||
# Insert all bookmark -> tag associations at once, should ignore errors if association already exists
|
||||
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
|
||||
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(date_modified=timezone.now())
|
||||
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(
|
||||
date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def untag_bookmarks(bookmark_ids: [Union[int, str]], tag_string: str, current_user: User):
|
||||
def untag_bookmarks(
|
||||
bookmark_ids: [Union[int, str]], tag_string: str, current_user: User
|
||||
):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
owned_bookmark_ids = Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).values_list('id',
|
||||
flat=True)
|
||||
owned_bookmark_ids = Bookmark.objects.filter(
|
||||
owner=current_user, id__in=sanitized_bookmark_ids
|
||||
).values_list("id", flat=True)
|
||||
tag_names = parse_tag_string(tag_string)
|
||||
tags = get_or_create_tags(tag_names, current_user)
|
||||
|
||||
BookmarkToTagRelationShip = Bookmark.tags.through
|
||||
for tag in tags:
|
||||
# Remove all bookmark -> tag associations for the owned bookmarks and the current tag
|
||||
BookmarkToTagRelationShip.objects.filter(bookmark_id__in=owned_bookmark_ids, tag=tag).delete()
|
||||
BookmarkToTagRelationShip.objects.filter(
|
||||
bookmark_id__in=owned_bookmark_ids, tag=tag
|
||||
).delete()
|
||||
|
||||
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(date_modified=timezone.now())
|
||||
Bookmark.objects.filter(id__in=owned_bookmark_ids).update(
|
||||
date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def mark_bookmarks_as_read(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(unread=False,
|
||||
date_modified=timezone.now())
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
|
||||
unread=False, date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def mark_bookmarks_as_unread(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(unread=True,
|
||||
date_modified=timezone.now())
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
|
||||
unread=True, date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def share_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(shared=True,
|
||||
date_modified=timezone.now())
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
|
||||
shared=True, date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def unshare_bookmarks(bookmark_ids: [Union[int, str]], current_user: User):
|
||||
sanitized_bookmark_ids = _sanitize_id_list(bookmark_ids)
|
||||
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(shared=False,
|
||||
date_modified=timezone.now())
|
||||
Bookmark.objects.filter(owner=current_user, id__in=sanitized_bookmark_ids).update(
|
||||
shared=False, date_modified=timezone.now()
|
||||
)
|
||||
|
||||
|
||||
def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark):
|
||||
|
@@ -13,40 +13,41 @@ def export_netscape_html(bookmarks: List[Bookmark]):
|
||||
[append_bookmark(doc, bookmark) for bookmark in bookmarks]
|
||||
append_list_end(doc)
|
||||
|
||||
return '\n\r'.join(doc)
|
||||
return "\n\r".join(doc)
|
||||
|
||||
|
||||
def append_header(doc: BookmarkDocument):
|
||||
doc.append('<!DOCTYPE NETSCAPE-Bookmark-file-1>')
|
||||
doc.append("<!DOCTYPE NETSCAPE-Bookmark-file-1>")
|
||||
doc.append('<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">')
|
||||
doc.append('<TITLE>Bookmarks</TITLE>')
|
||||
doc.append('<H1>Bookmarks</H1>')
|
||||
doc.append("<TITLE>Bookmarks</TITLE>")
|
||||
doc.append("<H1>Bookmarks</H1>")
|
||||
|
||||
|
||||
def append_list_start(doc: BookmarkDocument):
|
||||
doc.append('<DL><p>')
|
||||
doc.append("<DL><p>")
|
||||
|
||||
|
||||
def append_bookmark(doc: BookmarkDocument, bookmark: Bookmark):
|
||||
url = bookmark.url
|
||||
title = html.escape(bookmark.resolved_title or '')
|
||||
desc = html.escape(bookmark.resolved_description or '')
|
||||
title = html.escape(bookmark.resolved_title or "")
|
||||
desc = html.escape(bookmark.resolved_description or "")
|
||||
if bookmark.notes:
|
||||
desc += f'[linkding-notes]{html.escape(bookmark.notes)}[/linkding-notes]'
|
||||
desc += f"[linkding-notes]{html.escape(bookmark.notes)}[/linkding-notes]"
|
||||
tag_names = bookmark.tag_names
|
||||
if bookmark.is_archived:
|
||||
tag_names.append('linkding:archived')
|
||||
tags = ','.join(tag_names)
|
||||
toread = '1' if bookmark.unread else '0'
|
||||
private = '0' if bookmark.shared else '1'
|
||||
tag_names.append("linkding:archived")
|
||||
tags = ",".join(tag_names)
|
||||
toread = "1" if bookmark.unread else "0"
|
||||
private = "0" if bookmark.shared else "1"
|
||||
added = int(bookmark.date_added.timestamp())
|
||||
|
||||
doc.append(
|
||||
f'<DT><A HREF="{url}" ADD_DATE="{added}" PRIVATE="{private}" TOREAD="{toread}" TAGS="{tags}">{title}</A>')
|
||||
f'<DT><A HREF="{url}" ADD_DATE="{added}" PRIVATE="{private}" TOREAD="{toread}" TAGS="{tags}">{title}</A>'
|
||||
)
|
||||
|
||||
if desc:
|
||||
doc.append(f'<DD>{desc}')
|
||||
doc.append(f"<DD>{desc}")
|
||||
|
||||
|
||||
def append_list_end(doc: BookmarkDocument):
|
||||
doc.append('</DL><p>')
|
||||
doc.append("</DL><p>")
|
||||
|
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# register mime type for .ico files, which is not included in the default
|
||||
# mimetypes of the Docker image
|
||||
mimetypes.add_type('image/x-icon', '.ico')
|
||||
mimetypes.add_type("image/x-icon", ".ico")
|
||||
|
||||
|
||||
def _ensure_favicon_folder():
|
||||
@@ -23,16 +23,16 @@ def _ensure_favicon_folder():
|
||||
|
||||
|
||||
def _url_to_filename(url: str) -> str:
|
||||
return re.sub(r'\W+', '_', url)
|
||||
return re.sub(r"\W+", "_", url)
|
||||
|
||||
|
||||
def _get_url_parameters(url: str) -> dict:
|
||||
parsed_uri = urlparse(url)
|
||||
return {
|
||||
# https://example.com/foo?bar -> https://example.com
|
||||
'url': f'{parsed_uri.scheme}://{parsed_uri.hostname}',
|
||||
"url": f"{parsed_uri.scheme}://{parsed_uri.hostname}",
|
||||
# https://example.com/foo?bar -> example.com
|
||||
'domain': parsed_uri.hostname,
|
||||
"domain": parsed_uri.hostname,
|
||||
}
|
||||
|
||||
|
||||
@@ -63,21 +63,21 @@ def load_favicon(url: str) -> str:
|
||||
# Create favicon folder if not exists
|
||||
_ensure_favicon_folder()
|
||||
# Use scheme+hostname as favicon filename to reuse icon for all pages on the same domain
|
||||
favicon_name = _url_to_filename(url_parameters['url'])
|
||||
favicon_name = _url_to_filename(url_parameters["url"])
|
||||
favicon_file = _check_existing_favicon(favicon_name)
|
||||
|
||||
if not favicon_file:
|
||||
# Load favicon from provider, save to file
|
||||
favicon_url = settings.LD_FAVICON_PROVIDER.format(**url_parameters)
|
||||
logger.debug(f'Loading favicon from: {favicon_url}')
|
||||
logger.debug(f"Loading favicon from: {favicon_url}")
|
||||
with requests.get(favicon_url, stream=True) as response:
|
||||
content_type = response.headers['Content-Type']
|
||||
content_type = response.headers["Content-Type"]
|
||||
file_extension = mimetypes.guess_extension(content_type)
|
||||
favicon_file = f'{favicon_name}{file_extension}'
|
||||
favicon_file = f"{favicon_name}{file_extension}"
|
||||
favicon_path = _get_favicon_path(favicon_file)
|
||||
with open(favicon_path, 'wb') as file:
|
||||
with open(favicon_path, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
file.write(chunk)
|
||||
logger.debug(f'Saved favicon as: {favicon_path}')
|
||||
logger.debug(f"Saved favicon as: {favicon_path}")
|
||||
|
||||
return favicon_file
|
||||
|
@@ -55,18 +55,20 @@ class TagCache:
|
||||
self.cache[tag.name.lower()] = tag
|
||||
|
||||
|
||||
def import_netscape_html(html: str, user: User, options: ImportOptions = ImportOptions()) -> ImportResult:
|
||||
def import_netscape_html(
|
||||
html: str, user: User, options: ImportOptions = ImportOptions()
|
||||
) -> ImportResult:
|
||||
result = ImportResult()
|
||||
import_start = timezone.now()
|
||||
|
||||
try:
|
||||
netscape_bookmarks = parse(html)
|
||||
except:
|
||||
logging.exception('Could not read bookmarks file.')
|
||||
logging.exception("Could not read bookmarks file.")
|
||||
raise
|
||||
|
||||
parse_end = timezone.now()
|
||||
logger.debug(f'Parse duration: {parse_end - import_start}')
|
||||
logger.debug(f"Parse duration: {parse_end - import_start}")
|
||||
|
||||
# Create and cache all tags beforehand
|
||||
_create_missing_tags(netscape_bookmarks, user)
|
||||
@@ -83,7 +85,7 @@ def import_netscape_html(html: str, user: User, options: ImportOptions = ImportO
|
||||
tasks.schedule_bookmarks_without_favicons(user)
|
||||
|
||||
end = timezone.now()
|
||||
logger.debug(f'Import duration: {end - import_start}')
|
||||
logger.debug(f"Import duration: {end - import_start}")
|
||||
|
||||
return result
|
||||
|
||||
@@ -110,7 +112,7 @@ def _get_batches(items: List, batch_size: int):
|
||||
num_items = len(items)
|
||||
|
||||
while offset < num_items:
|
||||
batch = items[offset:min(offset + batch_size, num_items)]
|
||||
batch = items[offset : min(offset + batch_size, num_items)]
|
||||
if len(batch) > 0:
|
||||
batches.append(batch)
|
||||
offset = offset + batch_size
|
||||
@@ -118,11 +120,13 @@ def _get_batches(items: List, batch_size: int):
|
||||
return batches
|
||||
|
||||
|
||||
def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
|
||||
user: User,
|
||||
options: ImportOptions,
|
||||
tag_cache: TagCache,
|
||||
result: ImportResult):
|
||||
def _import_batch(
|
||||
netscape_bookmarks: List[NetscapeBookmark],
|
||||
user: User,
|
||||
options: ImportOptions,
|
||||
tag_cache: TagCache,
|
||||
result: ImportResult,
|
||||
):
|
||||
# Query existing bookmarks
|
||||
batch_urls = [bookmark.href for bookmark in netscape_bookmarks]
|
||||
existing_bookmarks = Bookmark.objects.filter(owner=user, url__in=batch_urls)
|
||||
@@ -136,7 +140,13 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
|
||||
try:
|
||||
# Lookup existing bookmark by URL, or create new bookmark if there is no bookmark for that URL yet
|
||||
bookmark = next(
|
||||
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
|
||||
(
|
||||
bookmark
|
||||
for bookmark in existing_bookmarks
|
||||
if bookmark.url == netscape_bookmark.href
|
||||
),
|
||||
None,
|
||||
)
|
||||
if not bookmark:
|
||||
bookmark = Bookmark(owner=user)
|
||||
is_update = False
|
||||
@@ -146,7 +156,7 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
|
||||
_copy_bookmark_data(netscape_bookmark, bookmark, options)
|
||||
# Validate bookmark fields, exclude owner to prevent n+1 database query,
|
||||
# also there is no specific validation on owner
|
||||
bookmark.clean_fields(exclude=['owner'])
|
||||
bookmark.clean_fields(exclude=["owner"])
|
||||
# Schedule for update or insert
|
||||
if is_update:
|
||||
bookmarks_to_update.append(bookmark)
|
||||
@@ -155,20 +165,25 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
|
||||
|
||||
result.success = result.success + 1
|
||||
except:
|
||||
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
|
||||
logging.exception('Error importing bookmark: ' + shortened_bookmark_tag_str)
|
||||
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + "..."
|
||||
logging.exception("Error importing bookmark: " + shortened_bookmark_tag_str)
|
||||
result.failed = result.failed + 1
|
||||
|
||||
# Bulk update bookmarks in DB
|
||||
Bookmark.objects.bulk_update(bookmarks_to_update, ['url',
|
||||
'date_added',
|
||||
'date_modified',
|
||||
'unread',
|
||||
'shared',
|
||||
'title',
|
||||
'description',
|
||||
'notes',
|
||||
'owner'])
|
||||
Bookmark.objects.bulk_update(
|
||||
bookmarks_to_update,
|
||||
[
|
||||
"url",
|
||||
"date_added",
|
||||
"date_modified",
|
||||
"unread",
|
||||
"shared",
|
||||
"title",
|
||||
"description",
|
||||
"notes",
|
||||
"owner",
|
||||
],
|
||||
)
|
||||
# Bulk insert new bookmarks into DB
|
||||
Bookmark.objects.bulk_create(bookmarks_to_create)
|
||||
|
||||
@@ -183,13 +198,20 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
|
||||
for netscape_bookmark in netscape_bookmarks:
|
||||
# Lookup bookmark by URL again
|
||||
bookmark = next(
|
||||
(bookmark for bookmark in existing_bookmarks if bookmark.url == netscape_bookmark.href), None)
|
||||
(
|
||||
bookmark
|
||||
for bookmark in existing_bookmarks
|
||||
if bookmark.url == netscape_bookmark.href
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
if not bookmark:
|
||||
# Something is wrong, we should have just created this bookmark
|
||||
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + '...'
|
||||
shortened_bookmark_tag_str = str(netscape_bookmark)[:100] + "..."
|
||||
logging.warning(
|
||||
f'Failed to assign tags to the bookmark: {shortened_bookmark_tag_str}. Could not find bookmark by URL.')
|
||||
f"Failed to assign tags to the bookmark: {shortened_bookmark_tag_str}. Could not find bookmark by URL."
|
||||
)
|
||||
continue
|
||||
|
||||
# Get tag models by string, schedule inserts for bookmark -> tag associations
|
||||
@@ -201,7 +223,9 @@ def _import_batch(netscape_bookmarks: List[NetscapeBookmark],
|
||||
BookmarkToTagRelationShip.objects.bulk_create(relationships, ignore_conflicts=True)
|
||||
|
||||
|
||||
def _copy_bookmark_data(netscape_bookmark: NetscapeBookmark, bookmark: Bookmark, options: ImportOptions):
|
||||
def _copy_bookmark_data(
|
||||
netscape_bookmark: NetscapeBookmark, bookmark: Bookmark, options: ImportOptions
|
||||
):
|
||||
bookmark.url = netscape_bookmark.href
|
||||
if netscape_bookmark.date_added:
|
||||
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)
|
||||
|
@@ -25,29 +25,29 @@ class BookmarkParser(HTMLParser):
|
||||
|
||||
self.current_tag = None
|
||||
self.bookmark = None
|
||||
self.href = ''
|
||||
self.add_date = ''
|
||||
self.tags = ''
|
||||
self.title = ''
|
||||
self.description = ''
|
||||
self.notes = ''
|
||||
self.toread = ''
|
||||
self.private = ''
|
||||
self.href = ""
|
||||
self.add_date = ""
|
||||
self.tags = ""
|
||||
self.title = ""
|
||||
self.description = ""
|
||||
self.notes = ""
|
||||
self.toread = ""
|
||||
self.private = ""
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list):
|
||||
name = 'handle_start_' + tag.lower()
|
||||
name = "handle_start_" + tag.lower()
|
||||
if name in dir(self):
|
||||
getattr(self, name)({k.lower(): v for k, v in attrs})
|
||||
self.current_tag = tag
|
||||
|
||||
def handle_endtag(self, tag: str):
|
||||
name = 'handle_end_' + tag.lower()
|
||||
name = "handle_end_" + tag.lower()
|
||||
if name in dir(self):
|
||||
getattr(self, name)()
|
||||
self.current_tag = None
|
||||
|
||||
def handle_data(self, data):
|
||||
name = f'handle_{self.current_tag}_data'
|
||||
name = f"handle_{self.current_tag}_data"
|
||||
if name in dir(self):
|
||||
getattr(self, name)(data)
|
||||
|
||||
@@ -60,22 +60,22 @@ class BookmarkParser(HTMLParser):
|
||||
def handle_start_a(self, attrs: Dict[str, str]):
|
||||
vars(self).update(attrs)
|
||||
tag_names = parse_tag_string(self.tags)
|
||||
archived = 'linkding:archived' in self.tags
|
||||
archived = "linkding:archived" in self.tags
|
||||
try:
|
||||
tag_names.remove('linkding:archived')
|
||||
tag_names.remove("linkding:archived")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
self.bookmark = NetscapeBookmark(
|
||||
href=self.href,
|
||||
title='',
|
||||
description='',
|
||||
notes='',
|
||||
title="",
|
||||
description="",
|
||||
notes="",
|
||||
date_added=self.add_date,
|
||||
tag_names=tag_names,
|
||||
to_read=self.toread == '1',
|
||||
to_read=self.toread == "1",
|
||||
# Mark as private by default, also when attribute is not specified
|
||||
private=self.private != '0',
|
||||
private=self.private != "0",
|
||||
archived=archived,
|
||||
)
|
||||
|
||||
@@ -84,9 +84,9 @@ class BookmarkParser(HTMLParser):
|
||||
|
||||
def handle_dd_data(self, data):
|
||||
desc = data.strip()
|
||||
if '[linkding-notes]' in desc:
|
||||
self.notes = desc.split('[linkding-notes]')[1].split('[/linkding-notes]')[0]
|
||||
self.description = desc.split('[linkding-notes]')[0]
|
||||
if "[linkding-notes]" in desc:
|
||||
self.notes = desc.split("[linkding-notes]")[1].split("[/linkding-notes]")[0]
|
||||
self.description = desc.split("[linkding-notes]")[0]
|
||||
|
||||
def add_bookmark(self):
|
||||
if self.bookmark:
|
||||
@@ -95,14 +95,14 @@ class BookmarkParser(HTMLParser):
|
||||
self.bookmark.notes = self.notes
|
||||
self.bookmarks.append(self.bookmark)
|
||||
self.bookmark = None
|
||||
self.href = ''
|
||||
self.add_date = ''
|
||||
self.tags = ''
|
||||
self.title = ''
|
||||
self.description = ''
|
||||
self.notes = ''
|
||||
self.toread = ''
|
||||
self.private = ''
|
||||
self.href = ""
|
||||
self.add_date = ""
|
||||
self.tags = ""
|
||||
self.title = ""
|
||||
self.description = ""
|
||||
self.notes = ""
|
||||
self.toread = ""
|
||||
self.private = ""
|
||||
|
||||
|
||||
def parse(html: str) -> List[NetscapeBookmark]:
|
||||
|
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def get_or_create_tags(tag_names: List[str], user: User):
|
||||
tags = [get_or_create_tag(tag_name, user) for tag_name in tag_names]
|
||||
return unique(tags, operator.attrgetter('id'))
|
||||
return unique(tags, operator.attrgetter("id"))
|
||||
|
||||
|
||||
def get_or_create_tag(name: str, user: User):
|
||||
|
@@ -18,8 +18,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def is_web_archive_integration_active(user: User) -> bool:
|
||||
background_tasks_enabled = not settings.LD_DISABLE_BACKGROUND_TASKS
|
||||
web_archive_integration_enabled = \
|
||||
user.profile.web_archive_integration == UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED
|
||||
web_archive_integration_enabled = (
|
||||
user.profile.web_archive_integration
|
||||
== UserProfile.WEB_ARCHIVE_INTEGRATION_ENABLED
|
||||
)
|
||||
|
||||
return background_tasks_enabled and web_archive_integration_enabled
|
||||
|
||||
@@ -31,28 +33,36 @@ def create_web_archive_snapshot(user: User, bookmark: Bookmark, force_update: bo
|
||||
|
||||
def _load_newest_snapshot(bookmark: Bookmark):
|
||||
try:
|
||||
logger.info(f'Load existing snapshot for bookmark. url={bookmark.url}')
|
||||
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(bookmark.url)
|
||||
logger.info(f"Load existing snapshot for bookmark. url={bookmark.url}")
|
||||
cdx_api = bookmarks.services.wayback.CustomWaybackMachineCDXServerAPI(
|
||||
bookmark.url
|
||||
)
|
||||
existing_snapshot = cdx_api.newest()
|
||||
|
||||
if existing_snapshot:
|
||||
bookmark.web_archive_snapshot_url = existing_snapshot.archive_url
|
||||
bookmark.save(update_fields=['web_archive_snapshot_url'])
|
||||
logger.info(f'Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}')
|
||||
bookmark.save(update_fields=["web_archive_snapshot_url"])
|
||||
logger.info(
|
||||
f"Using newest snapshot. url={bookmark.url} from={existing_snapshot.datetime_timestamp}"
|
||||
)
|
||||
|
||||
except NoCDXRecordFound:
|
||||
logger.info(f'Could not find any snapshots for bookmark. url={bookmark.url}')
|
||||
logger.info(f"Could not find any snapshots for bookmark. url={bookmark.url}")
|
||||
except WaybackError as error:
|
||||
logger.error(f'Failed to load existing snapshot. url={bookmark.url}', exc_info=error)
|
||||
logger.error(
|
||||
f"Failed to load existing snapshot. url={bookmark.url}", exc_info=error
|
||||
)
|
||||
|
||||
|
||||
def _create_snapshot(bookmark: Bookmark):
|
||||
logger.info(f'Create new snapshot for bookmark. url={bookmark.url}...')
|
||||
archive = waybackpy.WaybackMachineSaveAPI(bookmark.url, DEFAULT_USER_AGENT, max_tries=1)
|
||||
logger.info(f"Create new snapshot for bookmark. url={bookmark.url}...")
|
||||
archive = waybackpy.WaybackMachineSaveAPI(
|
||||
bookmark.url, DEFAULT_USER_AGENT, max_tries=1
|
||||
)
|
||||
archive.save()
|
||||
bookmark.web_archive_snapshot_url = archive.archive_url
|
||||
bookmark.save(update_fields=['web_archive_snapshot_url'])
|
||||
logger.info(f'Successfully created new snapshot for bookmark:. url={bookmark.url}')
|
||||
bookmark.save(update_fields=["web_archive_snapshot_url"])
|
||||
logger.info(f"Successfully created new snapshot for bookmark:. url={bookmark.url}")
|
||||
|
||||
|
||||
@background()
|
||||
@@ -72,10 +82,13 @@ def _create_web_archive_snapshot_task(bookmark_id: int, force_update: bool):
|
||||
return
|
||||
except TooManyRequestsError:
|
||||
logger.error(
|
||||
f'Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}')
|
||||
f"Failed to create snapshot due to rate limiting, trying to load newest snapshot as fallback. url={bookmark.url}"
|
||||
)
|
||||
except WaybackError as error:
|
||||
logger.error(f'Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}',
|
||||
exc_info=error)
|
||||
logger.error(
|
||||
f"Failed to create snapshot, trying to load newest snapshot as fallback. url={bookmark.url}",
|
||||
exc_info=error,
|
||||
)
|
||||
|
||||
# Load the newest snapshot as fallback
|
||||
_load_newest_snapshot(bookmark)
|
||||
@@ -102,7 +115,9 @@ def schedule_bookmarks_without_snapshots(user: User):
|
||||
@background()
|
||||
def _schedule_bookmarks_without_snapshots_task(user_id: int):
|
||||
user = get_user_model().objects.get(id=user_id)
|
||||
bookmarks_without_snapshots = Bookmark.objects.filter(web_archive_snapshot_url__exact='', owner=user)
|
||||
bookmarks_without_snapshots = Bookmark.objects.filter(
|
||||
web_archive_snapshot_url__exact="", owner=user
|
||||
)
|
||||
|
||||
for bookmark in bookmarks_without_snapshots:
|
||||
# To prevent rate limit errors from the Wayback API only try to load the latest snapshots instead of creating
|
||||
@@ -128,14 +143,16 @@ def _load_favicon_task(bookmark_id: int):
|
||||
except Bookmark.DoesNotExist:
|
||||
return
|
||||
|
||||
logger.info(f'Load favicon for bookmark. url={bookmark.url}')
|
||||
logger.info(f"Load favicon for bookmark. url={bookmark.url}")
|
||||
|
||||
new_favicon_file = favicon_loader.load_favicon(bookmark.url)
|
||||
|
||||
if new_favicon_file != bookmark.favicon_file:
|
||||
bookmark.favicon_file = new_favicon_file
|
||||
bookmark.save(update_fields=['favicon_file'])
|
||||
logger.info(f'Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon_file}')
|
||||
bookmark.save(update_fields=["favicon_file"])
|
||||
logger.info(
|
||||
f"Successfully updated favicon for bookmark. url={bookmark.url} icon={new_favicon_file}"
|
||||
)
|
||||
|
||||
|
||||
def schedule_bookmarks_without_favicons(user: User):
|
||||
@@ -146,11 +163,13 @@ def schedule_bookmarks_without_favicons(user: User):
|
||||
@background()
|
||||
def _schedule_bookmarks_without_favicons_task(user_id: int):
|
||||
user = get_user_model().objects.get(id=user_id)
|
||||
bookmarks = Bookmark.objects.filter(favicon_file__exact='', owner=user)
|
||||
bookmarks = Bookmark.objects.filter(favicon_file__exact="", owner=user)
|
||||
tasks = []
|
||||
|
||||
for bookmark in bookmarks:
|
||||
task = Task.objects.new_task(task_name='bookmarks.services.tasks._load_favicon_task', args=(bookmark.id,))
|
||||
task = Task.objects.new_task(
|
||||
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
|
||||
)
|
||||
tasks.append(task)
|
||||
|
||||
Task.objects.bulk_create(tasks)
|
||||
@@ -168,7 +187,9 @@ def _schedule_refresh_favicons_task(user_id: int):
|
||||
tasks = []
|
||||
|
||||
for bookmark in bookmarks:
|
||||
task = Task.objects.new_task(task_name='bookmarks.services.tasks._load_favicon_task', args=(bookmark.id,))
|
||||
task = Task.objects.new_task(
|
||||
task_name="bookmarks.services.tasks._load_favicon_task", args=(bookmark.id,)
|
||||
)
|
||||
tasks.append(task)
|
||||
|
||||
Task.objects.bulk_create(tasks)
|
||||
|
@@ -14,8 +14,10 @@ class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
|
||||
|
||||
def newest(self):
|
||||
unix_timestamp = int(time.time())
|
||||
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
self.sort = 'closest'
|
||||
self.closest = waybackpy.utils.unix_timestamp_to_wayback_timestamp(
|
||||
unix_timestamp
|
||||
)
|
||||
self.sort = "closest"
|
||||
self.limit = -5
|
||||
|
||||
newest_snapshot = None
|
||||
@@ -37,4 +39,4 @@ class CustomWaybackMachineCDXServerAPI(waybackpy.WaybackMachineCDXServerAPI):
|
||||
super().add_payload(payload)
|
||||
# Set fastLatest query param, as we are only using this API to get the latest snapshot and using fastLatest
|
||||
# makes searching for latest snapshots faster
|
||||
payload['fastLatest'] = 'true'
|
||||
payload["fastLatest"] = "true"
|
||||
|
@@ -18,9 +18,9 @@ class WebsiteMetadata:
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
'url': self.url,
|
||||
'title': self.title,
|
||||
'description': self.description,
|
||||
"url": self.url,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
}
|
||||
|
||||
|
||||
@@ -34,22 +34,29 @@ def load_website_metadata(url: str):
|
||||
start = timezone.now()
|
||||
page_text = load_page(url)
|
||||
end = timezone.now()
|
||||
logger.debug(f'Load duration: {end - start}')
|
||||
logger.debug(f"Load duration: {end - start}")
|
||||
|
||||
start = timezone.now()
|
||||
soup = BeautifulSoup(page_text, 'html.parser')
|
||||
soup = BeautifulSoup(page_text, "html.parser")
|
||||
|
||||
title = soup.title.string.strip() if soup.title is not None else None
|
||||
description_tag = soup.find('meta', attrs={'name': 'description'})
|
||||
description = description_tag['content'].strip() if description_tag and description_tag[
|
||||
'content'] else None
|
||||
description_tag = soup.find("meta", attrs={"name": "description"})
|
||||
description = (
|
||||
description_tag["content"].strip()
|
||||
if description_tag and description_tag["content"]
|
||||
else None
|
||||
)
|
||||
|
||||
if not description:
|
||||
description_tag = soup.find('meta', attrs={'property': 'og:description'})
|
||||
description = description_tag['content'].strip() if description_tag and description_tag['content'] else None
|
||||
description_tag = soup.find("meta", attrs={"property": "og:description"})
|
||||
description = (
|
||||
description_tag["content"].strip()
|
||||
if description_tag and description_tag["content"]
|
||||
else None
|
||||
)
|
||||
|
||||
end = timezone.now()
|
||||
logger.debug(f'Parsing duration: {end - start}')
|
||||
logger.debug(f"Parsing duration: {end - start}")
|
||||
finally:
|
||||
return WebsiteMetadata(url=url, title=title, description=description)
|
||||
|
||||
@@ -73,30 +80,30 @@ def load_page(url: str):
|
||||
else:
|
||||
content = content + chunk
|
||||
|
||||
logger.debug(f'Loaded chunk (iteration={iteration}, total={size / 1024})')
|
||||
logger.debug(f"Loaded chunk (iteration={iteration}, total={size / 1024})")
|
||||
|
||||
# Stop reading if we have parsed end of head tag
|
||||
end_of_head = '</head>'.encode('utf-8')
|
||||
end_of_head = "</head>".encode("utf-8")
|
||||
if end_of_head in content:
|
||||
logger.debug(f'Found closing head tag after {size} bytes')
|
||||
logger.debug(f"Found closing head tag after {size} bytes")
|
||||
content = content.split(end_of_head)[0] + end_of_head
|
||||
break
|
||||
# Stop reading if we exceed limit
|
||||
if size > MAX_CONTENT_LIMIT:
|
||||
logger.debug(f'Cancel reading document after {size} bytes')
|
||||
logger.debug(f"Cancel reading document after {size} bytes")
|
||||
break
|
||||
if hasattr(r, '_content_consumed'):
|
||||
logger.debug(f'Request consumed: {r._content_consumed}')
|
||||
if hasattr(r, "_content_consumed"):
|
||||
logger.debug(f"Request consumed: {r._content_consumed}")
|
||||
|
||||
# Use charset_normalizer to determine encoding that best matches the response content
|
||||
# Several sites seem to specify the response encoding incorrectly, so we ignore it and use custom logic instead
|
||||
# This is different from Response.text which does respect the encoding specified in the response first,
|
||||
# before trying to determine one
|
||||
results = from_bytes(content or '')
|
||||
results = from_bytes(content or "")
|
||||
return str(results.best())
|
||||
|
||||
|
||||
DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
|
||||
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36"
|
||||
|
||||
|
||||
def fake_request_headers():
|
||||
|
Reference in New Issue
Block a user