mirror of
https://github.com/sissbruecker/linkding.git
synced 2025-08-07 10:58:25 +02:00
Remove ads and cookie banners from HTML snapshots (#695)
* integrate ublock with single-file * reuse chromium profile
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -191,3 +191,6 @@ typings/
|
|||||||
/tmp
|
/tmp
|
||||||
# Database file
|
# Database file
|
||||||
/data
|
/data
|
||||||
|
# ublock + chromium
|
||||||
|
/uBlock0.chromium
|
||||||
|
/chromium-profile
|
||||||
|
@@ -18,11 +18,12 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def create_snapshot(url: str, filepath: str):
|
def create_snapshot(url: str, filepath: str):
|
||||||
singlefile_path = settings.LD_SINGLEFILE_PATH
|
singlefile_path = settings.LD_SINGLEFILE_PATH
|
||||||
# parse string to list of arguments
|
# parse options to list of arguments
|
||||||
singlefile_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
|
ublock_options = shlex.split(settings.LD_SINGLEFILE_UBLOCK_OPTIONS)
|
||||||
|
custom_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
|
||||||
temp_filepath = filepath + ".tmp"
|
temp_filepath = filepath + ".tmp"
|
||||||
# concat lists
|
# concat lists
|
||||||
args = [singlefile_path] + singlefile_options + [url, temp_filepath]
|
args = [singlefile_path] + ublock_options + custom_options + [url, temp_filepath]
|
||||||
try:
|
try:
|
||||||
# Use start_new_session=True to create a new process group
|
# Use start_new_session=True to create a new process group
|
||||||
process = subprocess.Popen(args, start_new_session=True)
|
process = subprocess.Popen(args, start_new_session=True)
|
||||||
|
@@ -61,6 +61,10 @@ class SingleFileServiceTestCase(TestCase):
|
|||||||
|
|
||||||
expected_args = [
|
expected_args = [
|
||||||
"single-file",
|
"single-file",
|
||||||
|
'--browser-arg="--headless=new"',
|
||||||
|
'--browser-arg="--user-data-dir=./chromium-profile"',
|
||||||
|
'--browser-arg="--no-sandbox"',
|
||||||
|
'--browser-arg="--load-extension=uBlock0.chromium"',
|
||||||
"http://example.com",
|
"http://example.com",
|
||||||
self.html_filepath + ".tmp",
|
self.html_filepath + ".tmp",
|
||||||
]
|
]
|
||||||
@@ -79,6 +83,10 @@ class SingleFileServiceTestCase(TestCase):
|
|||||||
|
|
||||||
expected_args = [
|
expected_args = [
|
||||||
"single-file",
|
"single-file",
|
||||||
|
'--browser-arg="--headless=new"',
|
||||||
|
'--browser-arg="--user-data-dir=./chromium-profile"',
|
||||||
|
'--browser-arg="--no-sandbox"',
|
||||||
|
'--browser-arg="--load-extension=uBlock0.chromium"',
|
||||||
"--some-option",
|
"--some-option",
|
||||||
"some value",
|
"some value",
|
||||||
"--another-option",
|
"--another-option",
|
||||||
@@ -97,9 +105,9 @@ class SingleFileServiceTestCase(TestCase):
|
|||||||
with mock.patch("subprocess.Popen", return_value=mock_process):
|
with mock.patch("subprocess.Popen", return_value=mock_process):
|
||||||
singlefile.create_snapshot("http://example.com", self.html_filepath)
|
singlefile.create_snapshot("http://example.com", self.html_filepath)
|
||||||
|
|
||||||
mock_process.wait.assert_called_with(timeout=60)
|
mock_process.wait.assert_called_with(timeout=120)
|
||||||
|
|
||||||
@override_settings(LD_SINGLEFILE_TIMEOUT_SEC=120)
|
@override_settings(LD_SINGLEFILE_TIMEOUT_SEC=180)
|
||||||
def test_create_snapshot_custom_timeout_setting(self):
|
def test_create_snapshot_custom_timeout_setting(self):
|
||||||
mock_process = mock.Mock()
|
mock_process = mock.Mock()
|
||||||
mock_process.wait.return_value = 0
|
mock_process.wait.return_value = 0
|
||||||
@@ -108,4 +116,4 @@ class SingleFileServiceTestCase(TestCase):
|
|||||||
with mock.patch("subprocess.Popen", return_value=mock_process):
|
with mock.patch("subprocess.Popen", return_value=mock_process):
|
||||||
singlefile.create_snapshot("http://example.com", self.html_filepath)
|
singlefile.create_snapshot("http://example.com", self.html_filepath)
|
||||||
|
|
||||||
mock_process.wait.assert_called_with(timeout=120)
|
mock_process.wait.assert_called_with(timeout=180)
|
||||||
|
@@ -9,6 +9,8 @@ mkdir -p data
|
|||||||
mkdir -p data/favicons
|
mkdir -p data/favicons
|
||||||
# Create assets folder if it does not exist
|
# Create assets folder if it does not exist
|
||||||
mkdir -p data/assets
|
mkdir -p data/assets
|
||||||
|
# Create chromium profile folder if it does not exist
|
||||||
|
mkdir -p chromium-profile
|
||||||
|
|
||||||
# Generate secret key file if it does not exist
|
# Generate secret key file if it does not exist
|
||||||
python manage.py generate_secret_key
|
python manage.py generate_secret_key
|
||||||
@@ -21,8 +23,9 @@ python manage.py create_initial_superuser
|
|||||||
# Migrate legacy background tasks to Huey
|
# Migrate legacy background tasks to Huey
|
||||||
python manage.py migrate_tasks
|
python manage.py migrate_tasks
|
||||||
|
|
||||||
# Ensure the DB folder is owned by the right user
|
# Ensure folders are owned by the right user
|
||||||
chown -R www-data: /etc/linkding/data
|
chown -R www-data: /etc/linkding/data
|
||||||
|
chown -R www-data: /etc/linkding/chromium-profile
|
||||||
|
|
||||||
# Start background task processor using supervisord, unless explicitly disabled
|
# Start background task processor using supervisord, unless explicitly disabled
|
||||||
if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then
|
if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then
|
||||||
|
@@ -99,10 +99,29 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
|
|||||||
CMD ["./bootstrap.sh"]
|
CMD ["./bootstrap.sh"]
|
||||||
|
|
||||||
|
|
||||||
|
FROM node:18-alpine AS ublock-build
|
||||||
|
WORKDIR /etc/linkding
|
||||||
|
# Install necessary tools
|
||||||
|
RUN apk add --no-cache curl jq unzip
|
||||||
|
# Fetch the latest release tag
|
||||||
|
# Download the library
|
||||||
|
# Unzip the library
|
||||||
|
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
|
||||||
|
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
|
||||||
|
curl -L -o uBlock0.zip $DOWNLOAD_URL && \
|
||||||
|
unzip uBlock0.zip
|
||||||
|
# Patch assets.json to enable easylist-cookies by default
|
||||||
|
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
|
||||||
|
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json && \
|
||||||
|
mv temp.json ./uBlock0.chromium/assets/assets.json
|
||||||
|
|
||||||
|
|
||||||
FROM linkding AS linkding-plus
|
FROM linkding AS linkding-plus
|
||||||
# install node, chromium
|
# install node, chromium
|
||||||
RUN apk update && apk add nodejs npm chromium
|
RUN apk update && apk add nodejs npm chromium
|
||||||
# install single-file from fork for now, which contains several hotfixes
|
# install single-file from fork for now, which contains several hotfixes
|
||||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
|
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
|
||||||
|
# copy uBlock0
|
||||||
|
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
|
||||||
# enable snapshot support
|
# enable snapshot support
|
||||||
ENV LD_ENABLE_SNAPSHOTS=True
|
ENV LD_ENABLE_SNAPSHOTS=True
|
||||||
|
@@ -96,6 +96,24 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
|
|||||||
|
|
||||||
CMD ["./bootstrap.sh"]
|
CMD ["./bootstrap.sh"]
|
||||||
|
|
||||||
|
|
||||||
|
FROM node:18-alpine AS ublock-build
|
||||||
|
WORKDIR /etc/linkding
|
||||||
|
# Install necessary tools
|
||||||
|
RUN apk add --no-cache curl jq unzip
|
||||||
|
# Fetch the latest release tag
|
||||||
|
# Download the library
|
||||||
|
# Unzip the library
|
||||||
|
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
|
||||||
|
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
|
||||||
|
curl -L -o uBlock0.zip $DOWNLOAD_URL && \
|
||||||
|
unzip uBlock0.zip
|
||||||
|
# Patch assets.json to enable easylist-cookies by default
|
||||||
|
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
|
||||||
|
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json && \
|
||||||
|
mv temp.json ./uBlock0.chromium/assets/assets.json
|
||||||
|
|
||||||
|
|
||||||
FROM linkding AS linkding-plus
|
FROM linkding AS linkding-plus
|
||||||
# install chromium
|
# install chromium
|
||||||
RUN apt-get update && apt-get -y install chromium
|
RUN apt-get update && apt-get -y install chromium
|
||||||
@@ -106,6 +124,8 @@ RUN apt-get install -y gnupg2 apt-transport-https ca-certificates && \
|
|||||||
echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
|
echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
|
||||||
apt-get update && apt-get install -y nodejs
|
apt-get update && apt-get install -y nodejs
|
||||||
# install single-file from fork for now, which contains several hotfixes
|
# install single-file from fork for now, which contains several hotfixes
|
||||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
|
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
|
||||||
|
# copy uBlock0
|
||||||
|
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
|
||||||
# enable snapshot support
|
# enable snapshot support
|
||||||
ENV LD_ENABLE_SNAPSHOTS=True
|
ENV LD_ENABLE_SNAPSHOTS=True
|
||||||
|
13
scripts/setup-ublock.sh
Executable file
13
scripts/setup-ublock.sh
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
rm -rf ublock0.chromium
|
||||||
|
|
||||||
|
TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name')
|
||||||
|
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip
|
||||||
|
curl -L -o uBlock0.zip $DOWNLOAD_URL
|
||||||
|
unzip uBlock0.zip
|
||||||
|
rm uBlock0.zip
|
||||||
|
|
||||||
|
curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
|
||||||
|
jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json
|
||||||
|
mv temp.json ./uBlock0.chromium/assets/assets.json
|
||||||
|
|
||||||
|
mkdir -p chromium-profile
|
@@ -12,6 +12,7 @@ https://docs.djangoproject.com/en/2.2/ref/settings/
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import shlex
|
||||||
|
|
||||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
@@ -294,8 +295,19 @@ LD_ENABLE_SNAPSHOTS = os.getenv("LD_ENABLE_SNAPSHOTS", False) in (
|
|||||||
"1",
|
"1",
|
||||||
)
|
)
|
||||||
LD_SINGLEFILE_PATH = os.getenv("LD_SINGLEFILE_PATH", "single-file")
|
LD_SINGLEFILE_PATH = os.getenv("LD_SINGLEFILE_PATH", "single-file")
|
||||||
|
LD_SINGLEFILE_UBLOCK_OPTIONS = os.getenv(
|
||||||
|
"LD_SINGLEFILE_UBLOCK_OPTIONS",
|
||||||
|
shlex.join(
|
||||||
|
[
|
||||||
|
'--browser-arg="--headless=new"',
|
||||||
|
'--browser-arg="--user-data-dir=./chromium-profile"',
|
||||||
|
'--browser-arg="--no-sandbox"',
|
||||||
|
'--browser-arg="--load-extension=uBlock0.chromium"',
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
LD_SINGLEFILE_OPTIONS = os.getenv("LD_SINGLEFILE_OPTIONS", "")
|
LD_SINGLEFILE_OPTIONS = os.getenv("LD_SINGLEFILE_OPTIONS", "")
|
||||||
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 60))
|
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 120))
|
||||||
|
|
||||||
# Monolith isn't used at the moment, as the local snapshot implementation
|
# Monolith isn't used at the moment, as the local snapshot implementation
|
||||||
# switched to single-file after the prototype. Keeping this around in case
|
# switched to single-file after the prototype. Keeping this around in case
|
||||||
|
Reference in New Issue
Block a user