Compare commits

...

10 Commits

Author SHA1 Message Date
Sascha Ißbrücker
076c5d7658 Bump version 2021-08-26 12:40:03 +02:00
Sascha Ißbrücker
e47c00bd07 Add support for micro-, nanosecond timestamps in importer (#151) 2021-08-26 12:33:54 +02:00
Sascha Ißbrücker
55a0d189dd Update CHANGELOG.md 2021-08-25 12:36:38 +02:00
Sascha Ißbrücker
d39ce076ec Bump version 2021-08-25 10:25:22 +02:00
Chris Cesare
aa0258d3b6 remove duplicate word in README (#136) 2021-08-25 10:20:35 +02:00
Taku Izumi
937858cf58 Fix website scraper decoding content incorrectly (#126)
* Avoid stall on web scraping

This patch fixes stall on web scraping.
I encountered a stall (scraping never ends) when adding
a bookmark of some site.
To avoid this case, adding a timeout parameter at requests.get()
function is a solution.

Signed-off-by: Taku Izumi <admin@orz-style.com>

* Avoid character corruption of scraping some Japanese sites

This patch fixes character corruption of scraping some Japanese
sites. To avoid character corruption, I use r.content instead
of r.text in load_page function.

The reason of character corruption is encoding problem, I think.
r.text handles data as unicode encoded text, so if scraping
web site's charset is not unicode encoded, character corruption
occurs. r.content handles data as str[], we can avoid encoding
problem.

Signed-off-by: Taku Izumi <admin@orz-style.com>

* use charset_normalizer to determine response encoding

Co-authored-by: Taku Izumi <admin@orz-style.com>
Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@googlemail.com>
2021-08-25 10:16:23 +02:00
Sascha Ißbrücker
8047ba6c63 Fix importer not validating bookmark models (#149) 2021-08-25 09:20:01 +02:00
Damanpreet Singh
de903bc341 Add about section in settings (#134)
* About section in settings

* Added about section in settings tab

* fix code style

Co-authored-by: Sascha Ißbrücker <sascha.issbruecker@googlemail.com>
2021-08-24 19:47:58 +02:00
Sascha Ißbrücker
c8fcc426b0 Update CHANGELOG.md 2021-08-17 06:07:40 +02:00
Sascha Ißbrücker
eb915210d3 Update CHANGELOG.md 2021-08-17 06:02:07 +02:00
15 changed files with 196 additions and 11 deletions

View File

@@ -1,5 +1,18 @@
# Changelog
## v1.7.1 (25/08/2021)
- [**bug**] umlaut/non-ascii characters broken when using bookmarklet (firefox) [#148](https://github.com/sissbruecker/linkding/issues/148)
- [**bug**] Bookmark import accepts empty URL values [#124](https://github.com/sissbruecker/linkding/issues/124)
- [**enhancement**] Show the version in the settings [#104](https://github.com/sissbruecker/linkding/issues/104)
---
## v1.7.0 (17/08/2021)
- Upgrade to Django 3
- Bump other dependencies
---
## v1.6.5 (15/08/2021)
- [**enhancement**] query with multiple hashtags very slow [#112](https://github.com/sissbruecker/linkding/issues/112)
---

View File

@@ -88,7 +88,7 @@ If you can not or don't want to use Docker you can install the application manua
### Hosting
The application runs in a web-server called [uWSGI](https://uwsgi-docs.readthedocs.io/en/latest/) that is production-ready and that you can expose to the web. If you don't know how to configure your server to expose the application to the web there are several more steps involved. I can not support support the process here, but I can give some pointers on what to search for:
The application runs in a web-server called [uWSGI](https://uwsgi-docs.readthedocs.io/en/latest/) that is production-ready and that you can expose to the web. If you don't know how to configure your server to expose the application to the web there are several more steps involved. I can not support the process here, but I can give some pointers on what to search for:
- first get the app running (described in this document)
- open the port that the application is running on in your servers firewall
- depending on your network configuration, forward the opened port in your network router, so that the application can be addressed from the internet using your public IP address and the opened port

View File

@@ -1,6 +1,5 @@
import logging
from dataclasses import dataclass
from datetime import datetime
from django.contrib.auth.models import User
from django.utils import timezone
@@ -8,6 +7,7 @@ from django.utils import timezone
from bookmarks.models import Bookmark, parse_tag_string
from bookmarks.services.parser import parse, NetscapeBookmark
from bookmarks.services.tags import get_or_create_tags
from bookmarks.utils import parse_timestamp
logger = logging.getLogger(__name__)
@@ -47,7 +47,7 @@ def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User):
bookmark.url = netscape_bookmark.href
if netscape_bookmark.date_added:
bookmark.date_added = datetime.utcfromtimestamp(int(netscape_bookmark.date_added)).astimezone()
bookmark.date_added = parse_timestamp(netscape_bookmark.date_added)
else:
bookmark.date_added = timezone.now()
bookmark.date_modified = bookmark.date_added
@@ -57,6 +57,7 @@ def _import_bookmark_tag(netscape_bookmark: NetscapeBookmark, user: User):
bookmark.description = netscape_bookmark.description
bookmark.owner = user
bookmark.full_clean()
bookmark.save()
# Set tags

View File

@@ -2,6 +2,7 @@ from dataclasses import dataclass
import requests
from bs4 import BeautifulSoup
from charset_normalizer import from_bytes
@dataclass
@@ -33,5 +34,11 @@ def load_website_metadata(url: str):
def load_page(url: str):
r = requests.get(url)
return r.text
r = requests.get(url, timeout=10)
# Use charset_normalizer to determine encoding that best matches the response content
# Several sites seem to specify the response encoding incorrectly, so we ignore it and use custom logic instead
# This is different from Response.text which does respect the encoding specified in the response first,
# before trying to determine one
results = from_bytes(r.content)
return str(results.best())

View File

@@ -69,6 +69,15 @@
{% endif %}
</section>
{# About section #}
<section class="content-area">
<h2>About</h2>
<p>Version: {{ app_version }}</p>
<p>
Code: <a href="https://github.com/sissbruecker/linkding/"
target="_blank">GitHub</a>
</p>
</section>
</div>
{% endblock %}

View File

@@ -1,4 +1,5 @@
import random
import logging
from django.contrib.auth.models import User
from django.utils import timezone
@@ -117,3 +118,14 @@ def random_sentence(num_words: int = None, including_word: str = ''):
random.shuffle(selected_words)
return ' '.join(selected_words)
def disable_logging(f):
def wrapper(*args):
logging.disable(logging.CRITICAL)
result = f(*args)
logging.disable(logging.NOTSET)
return result
return wrapper

View File

@@ -11,10 +11,10 @@
<DT><A HREF="https://example.com/1" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag1">test title 1</A>
<DD>test description 1
<DT><A HREF="https://example.com/2" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag2">test title 2</A>
<DT><A HREF="https://example.com/2" ADD_DATE="1616337559000" PRIVATE="0" TOREAD="0" TAGS="tag2">test title 2</A>
<DD>test description 2
<DT><A HREF="https://example.com/3" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">test title 3</A>
<DT><A HREF="https://example.com/3" ADD_DATE="1616337559000000" PRIVATE="0" TOREAD="0" TAGS="tag3">test title 3</A>
<DD>test description 3
</DL><p>

View File

@@ -34,6 +34,27 @@ class BookmarkValidationTestCase(TestCase):
def setUp(self) -> None:
self.user = User.objects.create_user('testuser', 'test@example.com', 'password123')
def test_bookmark_model_should_not_allow_missing_url(self):
bookmark = Bookmark(
date_added=datetime.datetime.now(),
date_modified=datetime.datetime.now(),
owner=self.user
)
with self.assertRaises(ValidationError):
bookmark.full_clean()
def test_bookmark_model_should_not_allow_empty_url(self):
bookmark = Bookmark(
url='',
date_added=datetime.datetime.now(),
date_modified=datetime.datetime.now(),
owner=self.user
)
with self.assertRaises(ValidationError):
bookmark.full_clean()
@override_settings(LD_DISABLE_URL_VALIDATION=False)
def test_bookmark_model_should_validate_url_if_not_disabled_in_settings(self):
self._run_bookmark_model_url_validity_checks(ENABLED_URL_VALIDATION_TEST_CASES)

View File

@@ -0,0 +1,33 @@
from django.test import TestCase
from bookmarks.services.importer import import_netscape_html
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class ImporterTestCase(TestCase, BookmarkFactoryMixin):
def create_import_html(self, bookmark_tags_string: str):
return f'''
<!DOCTYPE NETSCAPE-Bookmark-file-1>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
{bookmark_tags_string}
</DL><p>
'''
@disable_logging
def test_validate_empty_or_missing_bookmark_url(self):
test_html = self.create_import_html(f'''
<!-- Empty URL -->
<DT><A HREF="" ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">Empty URL</A>
<DD>Empty URL
<!-- Missing URL -->
<DT><A ADD_DATE="1616337559" PRIVATE="0" TOREAD="0" TAGS="tag3">Missing URL</A>
<DD>Missing URL
''')
import_result = import_netscape_html(test_html, self.get_or_create_test_user())
self.assertEqual(import_result.success, 0)

View File

@@ -1,7 +1,7 @@
from django.test import TestCase
from django.urls import reverse
from bookmarks.tests.helpers import BookmarkFactoryMixin
from bookmarks.tests.helpers import BookmarkFactoryMixin, disable_logging
class SettingsImportViewTestCase(TestCase, BookmarkFactoryMixin):
@@ -52,6 +52,7 @@ class SettingsImportViewTestCase(TestCase, BookmarkFactoryMixin):
self.assertNoFormSuccessHint(response)
self.assertFormErrorHint(response, 'Please select a file to import.')
@disable_logging
def test_should_show_hint_if_import_raises_exception(self):
with open('bookmarks/tests/resources/invalid_import_file.png', 'rb') as import_file:
response = self.client.post(
@@ -64,6 +65,7 @@ class SettingsImportViewTestCase(TestCase, BookmarkFactoryMixin):
self.assertNoFormSuccessHint(response)
self.assertFormErrorHint(response, 'An error occurred during bookmark import.')
@disable_logging
def test_should_show_respective_hints_if_not_all_bookmarks_were_imported_successfully(self):
with open('bookmarks/tests/resources/simple_valid_import_file_with_one_invalid_bookmark.html') as import_file:
response = self.client.post(

View File

@@ -1,9 +1,10 @@
from unittest.mock import patch
from dateutil.relativedelta import relativedelta
from django.test import TestCase
from django.utils import timezone
from bookmarks.utils import humanize_absolute_date, humanize_relative_date
from bookmarks.utils import humanize_absolute_date, humanize_relative_date, parse_timestamp
class UtilsTestCase(TestCase):
@@ -63,3 +64,45 @@ class UtilsTestCase(TestCase):
# Regression: Test that subsequent calls use current date instead of cached date (#107)
with patch.object(timezone, 'now', return_value=timezone.datetime(2021, 1, 13)):
self.assertEqual(humanize_relative_date(timezone.datetime(2021, 1, 13)), 'Today')
def verify_timestamp(self, date, factor=1):
timestamp_string = str(int(date.timestamp() * factor))
parsed_date = parse_timestamp(timestamp_string)
self.assertEqual(date, parsed_date)
def test_parse_timestamp_fails_for_invalid_timestamps(self):
with self.assertRaises(ValueError):
parse_timestamp('invalid')
def test_parse_timestamp_parses_millisecond_timestamps(self):
now = timezone.now().replace(microsecond=0)
fifty_years_ago = now - relativedelta(year=50)
fifty_years_from_now = now + relativedelta(year=50)
self.verify_timestamp(now)
self.verify_timestamp(fifty_years_ago)
self.verify_timestamp(fifty_years_from_now)
def test_parse_timestamp_parses_microsecond_timestamps(self):
now = timezone.now().replace(microsecond=0)
fifty_years_ago = now - relativedelta(year=50)
fifty_years_from_now = now + relativedelta(year=50)
self.verify_timestamp(now, 1000)
self.verify_timestamp(fifty_years_ago, 1000)
self.verify_timestamp(fifty_years_from_now, 1000)
def test_parse_timestamp_parses_nanosecond_timestamps(self):
now = timezone.now().replace(microsecond=0)
fifty_years_ago = now - relativedelta(year=50)
fifty_years_from_now = now + relativedelta(year=50)
self.verify_timestamp(now, 1000000)
self.verify_timestamp(fifty_years_ago, 1000000)
self.verify_timestamp(fifty_years_from_now, 1000000)
def test_parse_timestamp_fails_for_out_of_range_timestamp(self):
now = timezone.now().replace(microsecond=0)
with self.assertRaises(ValueError):
self.verify_timestamp(now, 1000000000)

View File

@@ -58,3 +58,40 @@ def humanize_relative_date(value: datetime, now: Optional[datetime] = None):
return 'Yesterday'
else:
return weekday_names[value.isoweekday()]
def parse_timestamp(value: str):
"""
Parses a string timestamp into a datetime value
First tries to parse the timestamp as milliseconds.
If that fails with an error indicating that the timestamp exceeds the maximum,
it tries to parse the timestamp as microseconds, and then as nanoseconds
:param value:
:return:
"""
try:
timestamp = int(value)
except ValueError:
raise ValueError(f'{value} is not a valid timestamp')
try:
return datetime.utcfromtimestamp(timestamp).astimezone()
except (OverflowError, ValueError, OSError):
pass
# Value exceeds the max. allowed timestamp
# Try parsing as microseconds
try:
return datetime.utcfromtimestamp(timestamp / 1000).astimezone()
except (OverflowError, ValueError, OSError):
pass
# Value exceeds the max. allowed timestamp
# Try parsing as nanoseconds
try:
return datetime.utcfromtimestamp(timestamp / 1000000).astimezone()
except (OverflowError, ValueError, OSError):
pass
# Timestamp is out of range
raise ValueError(f'{value} exceeds maximum value for a timestamp')

View File

@@ -14,6 +14,12 @@ from bookmarks.services import importer
logger = logging.getLogger(__name__)
try:
with open("version.txt", "r") as f:
app_version = f.read().strip("\n")
except Exception as exc:
logging.exception(exc)
pass
@login_required
def general(request):
@@ -30,6 +36,7 @@ def general(request):
'form': form,
'import_success_message': import_success_message,
'import_errors_message': import_errors_message,
'app_version': app_version
})

View File

@@ -1,6 +1,6 @@
{
"name": "linkding",
"version": "1.7.0",
"version": "1.7.2",
"description": "",
"main": "index.js",
"scripts": {

View File

@@ -1 +1 @@
1.7.0
1.7.2