feat: split pdf

2025-09-26 01:19:33 +02:00 · 2025-03-26 05:43:59 +00:00
parent c0297a187d
commit e6f54a3f2b
11 changed files with 427 additions and 68 deletions
--- a/src/pages/tools/pdf/index.ts
+++ b/src/pages/tools/pdf/index.ts
@@ -0,0 +1,4 @@
+import { meta as splitPdfMeta } from './split-pdf/meta';
+import { DefinedTool } from '@tools/defineTool';
+
+export const pdfTools: DefinedTool[] = [splitPdfMeta];
--- a/src/pages/tools/pdf/split-pdf/index.tsx
+++ b/src/pages/tools/pdf/split-pdf/index.tsx
@@ -0,0 +1,180 @@
+import { Box, Typography } from '@mui/material';
+import React, { useEffect, useRef, useState } from 'react';
+import ToolFileInput from '@components/input/ToolFileInput';
+import ToolFileResult from '@components/result/ToolFileResult';
+import TextFieldWithDesc from '@components/options/TextFieldWithDesc';
+import ToolContent from '@components/ToolContent';
+import { ToolComponentProps } from '@tools/defineTool';
+import { parsePageRanges, splitPdf } from './service';
+import { CardExampleType } from '@components/examples/ToolExamples';
+import { PDFDocument } from 'pdf-lib';
+import { FormikProps } from 'formik';
+
+type InitialValuesType = {
+  pageRanges: string;
+};
+
+const initialValues: InitialValuesType = {
+  pageRanges: ''
+};
+
+const exampleCards: CardExampleType<InitialValuesType>[] = [
+  {
+    title: 'Extract Specific Pages',
+    description: 'Extract pages 1, 5, 6, 7, and 8 from a PDF document.',
+    sampleText: '',
+    sampleResult: '',
+    sampleOptions: {
+      pageRanges: '1,5-8'
+    }
+  },
+  {
+    title: 'Extract First and Last Pages',
+    description: 'Extract only the first and last pages from a PDF document.',
+    sampleText: '',
+    sampleResult: '',
+    sampleOptions: {
+      pageRanges: '1,10'
+    }
+  },
+  {
+    title: 'Extract a Range of Pages',
+    description: 'Extract a continuous range of pages from a PDF document.',
+    sampleText: '',
+    sampleResult: '',
+    sampleOptions: {
+      pageRanges: '3-7'
+    }
+  }
+];
+
+export default function SplitPdf({ title }: ToolComponentProps) {
+  const [input, setInput] = useState<File | null>(null);
+  const [result, setResult] = useState<File | null>(null);
+  const [isProcessing, setIsProcessing] = useState<boolean>(false);
+  const [totalPages, setTotalPages] = useState<number>(0);
+  const [pageRangePreview, setPageRangePreview] = useState<string>('');
+
+  // Get the total number of pages when a PDF is uploaded
+  useEffect(() => {
+    const getPdfInfo = async () => {
+      if (!input) {
+        setTotalPages(0);
+        return;
+      }
+
+      try {
+        const arrayBuffer = await input.arrayBuffer();
+        const pdf = await PDFDocument.load(arrayBuffer);
+        setTotalPages(pdf.getPageCount());
+      } catch (error) {
+        console.error('Error getting PDF info:', error);
+        setTotalPages(0);
+      }
+    };
+
+    getPdfInfo();
+  }, [input]);
+
+  const onValuesChange = (values: InitialValuesType) => {
+    const { pageRanges } = values;
+    if (!totalPages || !pageRanges?.trim()) {
+      setPageRangePreview('');
+      return;
+    }
+    try {
+      const count = parsePageRanges(pageRanges, totalPages).length;
+      setPageRangePreview(
+        `${count} page${count !== 1 ? 's' : ''} will be extracted`
+      );
+    } catch (error) {
+      setPageRangePreview('');
+    }
+  };
+
+  const compute = async (values: InitialValuesType, input: File | null) => {
+    if (!input) return;
+
+    try {
+      setIsProcessing(true);
+      const splitResult = await splitPdf(input, values.pageRanges);
+      setResult(splitResult);
+    } catch (error) {
+      throw new Error('Error splitting PDF:' + error);
+    } finally {
+      setIsProcessing(false);
+    }
+  };
+
+  return (
+    <ToolContent
+      title={title}
+      input={input}
+      setInput={setInput}
+      initialValues={initialValues}
+      compute={compute}
+      exampleCards={exampleCards}
+      inputComponent={
+        <ToolFileInput
+          value={input}
+          onChange={setInput}
+          accept={['application/pdf']}
+          title={'Input PDF'}
+        />
+      }
+      resultComponent={
+        <ToolFileResult
+          title={'Output PDF with selected pages'}
+          value={result}
+          extension={'pdf'}
+          loading={isProcessing}
+          loadingText={'Extracting pages'}
+        />
+      }
+      getGroups={({ values, updateField }) => [
+        {
+          title: 'Page Selection',
+          component: (
+            <Box>
+              {totalPages > 0 && (
+                <Typography variant="body2" sx={{ mb: 1 }}>
+                  PDF has {totalPages} page{totalPages !== 1 ? 's' : ''}
+                </Typography>
+              )}
+              <TextFieldWithDesc
+                value={values.pageRanges}
+                onOwnChange={(val) => {
+                  updateField('pageRanges', val);
+                }}
+                description={
+                  'Enter page numbers or ranges separated by commas (e.g., 1,3,5-7)'
+                }
+                placeholder={'e.g., 1,5-8'}
+              />
+              {pageRangePreview && (
+                <Typography
+                  variant="body2"
+                  sx={{ mt: 1, color: 'primary.main' }}
+                >
+                  {pageRangePreview}
+                </Typography>
+              )}
+            </Box>
+          )
+        }
+      ]}
+      onValuesChange={onValuesChange}
+      toolInfo={{
+        title: 'How to Use the Split PDF Tool',
+        description: `This tool allows you to extract specific pages from a PDF document. You can specify individual page numbers (e.g., 1,3,5) or page ranges (e.g., 2-6) or a combination of both (e.g., 1,3-5,8).
+
+Leave the page ranges field empty to include all pages from the PDF.
+
+Examples:
+- "1,5,9" extracts pages 1, 5, and 9
+- "1-5" extracts pages 1 through 5
+- "1,3-5,8-10" extracts pages 1, 3, 4, 5, 8, 9, and 10`
+      }}
+    />
+  );
+}
--- a/src/pages/tools/pdf/split-pdf/meta.ts
+++ b/src/pages/tools/pdf/split-pdf/meta.ts
@@ -0,0 +1,13 @@
+import { defineTool } from '@tools/defineTool';
+import { lazy } from 'react';
+
+export const meta = defineTool('pdf', {
+  name: 'Split PDF',
+  shortDescription: 'Extract specific pages from a PDF file',
+  description:
+    'Extract specific pages from a PDF file using page numbers or ranges (e.g., 1,5-8)',
+  icon: 'mdi:file-pdf-box',
+  component: lazy(() => import('./index')),
+  keywords: ['pdf', 'split', 'extract', 'pages', 'range', 'document'],
+  path: 'split-pdf'
+});
--- a/src/pages/tools/pdf/split-pdf/service.test.ts
+++ b/src/pages/tools/pdf/split-pdf/service.test.ts
@@ -0,0 +1,43 @@
+import { parsePageRanges } from './service';
+
+describe('parsePageRanges', () => {
+  test('should return all pages when input is empty', () => {
+    expect(parsePageRanges('', 5)).toEqual([1, 2, 3, 4, 5]);
+  });
+
+  test('should parse single page numbers', () => {
+    expect(parsePageRanges('1,3,5', 5)).toEqual([1, 3, 5]);
+  });
+
+  test('should parse page ranges', () => {
+    expect(parsePageRanges('2-4', 5)).toEqual([2, 3, 4]);
+  });
+
+  test('should parse mixed page numbers and ranges', () => {
+    expect(parsePageRanges('1,3-5', 5)).toEqual([1, 3, 4, 5]);
+  });
+
+  test('should handle whitespace', () => {
+    expect(parsePageRanges(' 1, 3 - 5 ', 5)).toEqual([1, 3, 4, 5]);
+  });
+
+  test('should ignore invalid page numbers', () => {
+    expect(parsePageRanges('1,a,3', 5)).toEqual([1, 3]);
+  });
+
+  test('should ignore out-of-range page numbers', () => {
+    expect(parsePageRanges('1,6,3', 5)).toEqual([1, 3]);
+  });
+
+  test('should limit ranges to valid pages', () => {
+    expect(parsePageRanges('0-6', 5)).toEqual([1, 2, 3, 4, 5]);
+  });
+
+  test('should handle reversed ranges', () => {
+    expect(parsePageRanges('4-2', 5)).toEqual([2, 3, 4]);
+  });
+
+  test('should remove duplicates', () => {
+    expect(parsePageRanges('1,1,2,2-4,3', 5)).toEqual([1, 2, 3, 4]);
+  });
+});
--- a/src/pages/tools/pdf/split-pdf/service.ts
+++ b/src/pages/tools/pdf/split-pdf/service.ts
@@ -0,0 +1,66 @@
+import { PDFDocument } from 'pdf-lib';
+
+/**
+ * Parses a page range string and returns an array of page numbers
+ * @param pageRangeStr String like "1,3-5,7" to extract pages 1, 3, 4, 5, and 7
+ * @param totalPages Total number of pages in the PDF
+ * @returns Array of page numbers to extract
+ */
+export function parsePageRanges(
+  pageRangeStr: string,
+  totalPages: number
+): number[] {
+  if (!pageRangeStr.trim()) {
+    return Array.from({ length: totalPages }, (_, i) => i + 1);
+  }
+
+  const pageNumbers = new Set<number>();
+  const ranges = pageRangeStr.split(',');
+
+  for (const range of ranges) {
+    const trimmedRange = range.trim();
+
+    if (trimmedRange.includes('-')) {
+      const [start, end] = trimmedRange.split('-').map(Number);
+      if (!isNaN(start) && !isNaN(end)) {
+        for (let i = Math.max(1, start); i <= Math.min(totalPages, end); i++) {
+          pageNumbers.add(i);
+        }
+      }
+    } else {
+      const pageNum = parseInt(trimmedRange, 10);
+      if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= totalPages) {
+        pageNumbers.add(pageNum);
+      }
+    }
+  }
+
+  return [...pageNumbers].sort((a, b) => a - b);
+}
+
+/**
+ * Splits a PDF file based on specified page ranges
+ * @param pdfFile The input PDF file
+ * @param pageRanges String specifying which pages to extract (e.g., "1,3-5,7")
+ * @returns Promise resolving to a new PDF file with only the selected pages
+ */
+export async function splitPdf(
+  pdfFile: File,
+  pageRanges: string
+): Promise<File> {
+  const arrayBuffer = await pdfFile.arrayBuffer();
+  const sourcePdf = await PDFDocument.load(arrayBuffer);
+  const totalPages = sourcePdf.getPageCount();
+  const pagesToExtract = parsePageRanges(pageRanges, totalPages);
+
+  const newPdf = await PDFDocument.create();
+  const copiedPages = await newPdf.copyPages(
+    sourcePdf,
+    pagesToExtract.map((pageNum) => pageNum - 1)
+  );
+  copiedPages.forEach((page) => newPdf.addPage(page));
+
+  const newPdfBytes = await newPdf.save();
+  const newFileName = pdfFile.name.replace('.pdf', '-extracted.pdf');
+  return new File([newPdfBytes], newFileName, { type: 'application/pdf' });
+}