mirror of
				https://github.com/sissbruecker/linkding.git
				synced 2025-11-03 20:44:05 +01:00 
			
		
		
		
	Remove ads and cookie banners from HTML snapshots (#695)
* integrate ublock with single-file * reuse chromium profile
This commit is contained in:
		
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -191,3 +191,6 @@ typings/
 | 
			
		||||
/tmp
 | 
			
		||||
# Database file
 | 
			
		||||
/data
 | 
			
		||||
# ublock + chromium
 | 
			
		||||
/uBlock0.chromium
 | 
			
		||||
/chromium-profile
 | 
			
		||||
 
 | 
			
		||||
@@ -18,11 +18,12 @@ logger = logging.getLogger(__name__)
 | 
			
		||||
 | 
			
		||||
def create_snapshot(url: str, filepath: str):
 | 
			
		||||
    singlefile_path = settings.LD_SINGLEFILE_PATH
 | 
			
		||||
    # parse string to list of arguments
 | 
			
		||||
    singlefile_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
 | 
			
		||||
    # parse options to list of arguments
 | 
			
		||||
    ublock_options = shlex.split(settings.LD_SINGLEFILE_UBLOCK_OPTIONS)
 | 
			
		||||
    custom_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
 | 
			
		||||
    temp_filepath = filepath + ".tmp"
 | 
			
		||||
    # concat lists
 | 
			
		||||
    args = [singlefile_path] + singlefile_options + [url, temp_filepath]
 | 
			
		||||
    args = [singlefile_path] + ublock_options + custom_options + [url, temp_filepath]
 | 
			
		||||
    try:
 | 
			
		||||
        # Use start_new_session=True to create a new process group
 | 
			
		||||
        process = subprocess.Popen(args, start_new_session=True)
 | 
			
		||||
 
 | 
			
		||||
@@ -61,6 +61,10 @@ class SingleFileServiceTestCase(TestCase):
 | 
			
		||||
 | 
			
		||||
            expected_args = [
 | 
			
		||||
                "single-file",
 | 
			
		||||
                '--browser-arg="--headless=new"',
 | 
			
		||||
                '--browser-arg="--user-data-dir=./chromium-profile"',
 | 
			
		||||
                '--browser-arg="--no-sandbox"',
 | 
			
		||||
                '--browser-arg="--load-extension=uBlock0.chromium"',
 | 
			
		||||
                "http://example.com",
 | 
			
		||||
                self.html_filepath + ".tmp",
 | 
			
		||||
            ]
 | 
			
		||||
@@ -79,6 +83,10 @@ class SingleFileServiceTestCase(TestCase):
 | 
			
		||||
 | 
			
		||||
            expected_args = [
 | 
			
		||||
                "single-file",
 | 
			
		||||
                '--browser-arg="--headless=new"',
 | 
			
		||||
                '--browser-arg="--user-data-dir=./chromium-profile"',
 | 
			
		||||
                '--browser-arg="--no-sandbox"',
 | 
			
		||||
                '--browser-arg="--load-extension=uBlock0.chromium"',
 | 
			
		||||
                "--some-option",
 | 
			
		||||
                "some value",
 | 
			
		||||
                "--another-option",
 | 
			
		||||
@@ -97,9 +105,9 @@ class SingleFileServiceTestCase(TestCase):
 | 
			
		||||
        with mock.patch("subprocess.Popen", return_value=mock_process):
 | 
			
		||||
            singlefile.create_snapshot("http://example.com", self.html_filepath)
 | 
			
		||||
 | 
			
		||||
            mock_process.wait.assert_called_with(timeout=60)
 | 
			
		||||
            mock_process.wait.assert_called_with(timeout=120)
 | 
			
		||||
 | 
			
		||||
    @override_settings(LD_SINGLEFILE_TIMEOUT_SEC=120)
 | 
			
		||||
    @override_settings(LD_SINGLEFILE_TIMEOUT_SEC=180)
 | 
			
		||||
    def test_create_snapshot_custom_timeout_setting(self):
 | 
			
		||||
        mock_process = mock.Mock()
 | 
			
		||||
        mock_process.wait.return_value = 0
 | 
			
		||||
@@ -108,4 +116,4 @@ class SingleFileServiceTestCase(TestCase):
 | 
			
		||||
        with mock.patch("subprocess.Popen", return_value=mock_process):
 | 
			
		||||
            singlefile.create_snapshot("http://example.com", self.html_filepath)
 | 
			
		||||
 | 
			
		||||
            mock_process.wait.assert_called_with(timeout=120)
 | 
			
		||||
            mock_process.wait.assert_called_with(timeout=180)
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,8 @@ mkdir -p data
 | 
			
		||||
mkdir -p data/favicons
 | 
			
		||||
# Create assets folder if it does not exist
 | 
			
		||||
mkdir -p data/assets
 | 
			
		||||
# Create chromium profile folder if it does not exist
 | 
			
		||||
mkdir -p chromium-profile
 | 
			
		||||
 | 
			
		||||
# Generate secret key file if it does not exist
 | 
			
		||||
python manage.py generate_secret_key
 | 
			
		||||
@@ -21,8 +23,9 @@ python manage.py create_initial_superuser
 | 
			
		||||
# Migrate legacy background tasks to Huey
 | 
			
		||||
python manage.py migrate_tasks
 | 
			
		||||
 | 
			
		||||
# Ensure the DB folder is owned by the right user
 | 
			
		||||
# Ensure folders are owned by the right user
 | 
			
		||||
chown -R www-data: /etc/linkding/data
 | 
			
		||||
chown -R www-data: /etc/linkding/chromium-profile
 | 
			
		||||
 | 
			
		||||
# Start background task processor using supervisord, unless explicitly disabled
 | 
			
		||||
if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then
 | 
			
		||||
 
 | 
			
		||||
@@ -99,10 +99,29 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
 | 
			
		||||
CMD ["./bootstrap.sh"]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
FROM node:18-alpine AS ublock-build
 | 
			
		||||
WORKDIR /etc/linkding
 | 
			
		||||
# Install necessary tools
 | 
			
		||||
RUN apk add --no-cache curl jq unzip
 | 
			
		||||
# Fetch the latest release tag
 | 
			
		||||
# Download the library
 | 
			
		||||
# Unzip the library
 | 
			
		||||
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
 | 
			
		||||
    DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
 | 
			
		||||
    curl -L -o uBlock0.zip $DOWNLOAD_URL && \
 | 
			
		||||
    unzip uBlock0.zip
 | 
			
		||||
# Patch assets.json to enable easylist-cookies by default
 | 
			
		||||
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
 | 
			
		||||
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json &&  \
 | 
			
		||||
    mv temp.json ./uBlock0.chromium/assets/assets.json
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
FROM linkding AS linkding-plus
 | 
			
		||||
# install node, chromium
 | 
			
		||||
RUN apk update && apk add nodejs npm chromium
 | 
			
		||||
# install single-file from fork for now, which contains several hotfixes
 | 
			
		||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
 | 
			
		||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
 | 
			
		||||
# copy uBlock0
 | 
			
		||||
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
 | 
			
		||||
# enable snapshot support
 | 
			
		||||
ENV LD_ENABLE_SNAPSHOTS=True
 | 
			
		||||
 
 | 
			
		||||
@@ -96,6 +96,24 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
 | 
			
		||||
 | 
			
		||||
CMD ["./bootstrap.sh"]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
FROM node:18-alpine AS ublock-build
 | 
			
		||||
WORKDIR /etc/linkding
 | 
			
		||||
# Install necessary tools
 | 
			
		||||
RUN apk add --no-cache curl jq unzip
 | 
			
		||||
# Fetch the latest release tag
 | 
			
		||||
# Download the library
 | 
			
		||||
# Unzip the library
 | 
			
		||||
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
 | 
			
		||||
    DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
 | 
			
		||||
    curl -L -o uBlock0.zip $DOWNLOAD_URL && \
 | 
			
		||||
    unzip uBlock0.zip
 | 
			
		||||
# Patch assets.json to enable easylist-cookies by default
 | 
			
		||||
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
 | 
			
		||||
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json &&  \
 | 
			
		||||
    mv temp.json ./uBlock0.chromium/assets/assets.json
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
FROM linkding AS linkding-plus
 | 
			
		||||
# install chromium
 | 
			
		||||
RUN apt-get update && apt-get -y install chromium
 | 
			
		||||
@@ -106,6 +124,8 @@ RUN apt-get install -y gnupg2 apt-transport-https ca-certificates && \
 | 
			
		||||
    echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
 | 
			
		||||
    apt-get update && apt-get install -y nodejs
 | 
			
		||||
# install single-file from fork for now, which contains several hotfixes
 | 
			
		||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
 | 
			
		||||
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
 | 
			
		||||
# copy uBlock0
 | 
			
		||||
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
 | 
			
		||||
# enable snapshot support
 | 
			
		||||
ENV LD_ENABLE_SNAPSHOTS=True
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										13
									
								
								scripts/setup-ublock.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										13
									
								
								scripts/setup-ublock.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
rm -rf ublock0.chromium
 | 
			
		||||
 | 
			
		||||
TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name')
 | 
			
		||||
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip
 | 
			
		||||
curl -L -o uBlock0.zip $DOWNLOAD_URL
 | 
			
		||||
unzip uBlock0.zip
 | 
			
		||||
rm uBlock0.zip
 | 
			
		||||
 | 
			
		||||
curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
 | 
			
		||||
jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json
 | 
			
		||||
mv temp.json ./uBlock0.chromium/assets/assets.json
 | 
			
		||||
 | 
			
		||||
mkdir -p chromium-profile
 | 
			
		||||
@@ -12,6 +12,7 @@ https://docs.djangoproject.com/en/2.2/ref/settings/
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import shlex
 | 
			
		||||
 | 
			
		||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 | 
			
		||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
@@ -294,8 +295,19 @@ LD_ENABLE_SNAPSHOTS = os.getenv("LD_ENABLE_SNAPSHOTS", False) in (
 | 
			
		||||
    "1",
 | 
			
		||||
)
 | 
			
		||||
LD_SINGLEFILE_PATH = os.getenv("LD_SINGLEFILE_PATH", "single-file")
 | 
			
		||||
LD_SINGLEFILE_UBLOCK_OPTIONS = os.getenv(
 | 
			
		||||
    "LD_SINGLEFILE_UBLOCK_OPTIONS",
 | 
			
		||||
    shlex.join(
 | 
			
		||||
        [
 | 
			
		||||
            '--browser-arg="--headless=new"',
 | 
			
		||||
            '--browser-arg="--user-data-dir=./chromium-profile"',
 | 
			
		||||
            '--browser-arg="--no-sandbox"',
 | 
			
		||||
            '--browser-arg="--load-extension=uBlock0.chromium"',
 | 
			
		||||
        ]
 | 
			
		||||
    ),
 | 
			
		||||
)
 | 
			
		||||
LD_SINGLEFILE_OPTIONS = os.getenv("LD_SINGLEFILE_OPTIONS", "")
 | 
			
		||||
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 60))
 | 
			
		||||
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 120))
 | 
			
		||||
 | 
			
		||||
# Monolith isn't used at the moment, as the local snapshot implementation
 | 
			
		||||
# switched to single-file after the prototype. Keeping this around in case
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user