feat: split pdf

This commit is contained in:
Ibrahima G. Coulibaly
2025-03-26 05:43:59 +00:00
parent c0297a187d
commit e6f54a3f2b
11 changed files with 427 additions and 68 deletions

View File

@@ -0,0 +1,4 @@
import { meta as splitPdfMeta } from './split-pdf/meta';
import { DefinedTool } from '@tools/defineTool';
export const pdfTools: DefinedTool[] = [splitPdfMeta];

View File

@@ -0,0 +1,180 @@
import { Box, Typography } from '@mui/material';
import React, { useEffect, useRef, useState } from 'react';
import ToolFileInput from '@components/input/ToolFileInput';
import ToolFileResult from '@components/result/ToolFileResult';
import TextFieldWithDesc from '@components/options/TextFieldWithDesc';
import ToolContent from '@components/ToolContent';
import { ToolComponentProps } from '@tools/defineTool';
import { parsePageRanges, splitPdf } from './service';
import { CardExampleType } from '@components/examples/ToolExamples';
import { PDFDocument } from 'pdf-lib';
import { FormikProps } from 'formik';
type InitialValuesType = {
pageRanges: string;
};
const initialValues: InitialValuesType = {
pageRanges: ''
};
const exampleCards: CardExampleType<InitialValuesType>[] = [
{
title: 'Extract Specific Pages',
description: 'Extract pages 1, 5, 6, 7, and 8 from a PDF document.',
sampleText: '',
sampleResult: '',
sampleOptions: {
pageRanges: '1,5-8'
}
},
{
title: 'Extract First and Last Pages',
description: 'Extract only the first and last pages from a PDF document.',
sampleText: '',
sampleResult: '',
sampleOptions: {
pageRanges: '1,10'
}
},
{
title: 'Extract a Range of Pages',
description: 'Extract a continuous range of pages from a PDF document.',
sampleText: '',
sampleResult: '',
sampleOptions: {
pageRanges: '3-7'
}
}
];
export default function SplitPdf({ title }: ToolComponentProps) {
const [input, setInput] = useState<File | null>(null);
const [result, setResult] = useState<File | null>(null);
const [isProcessing, setIsProcessing] = useState<boolean>(false);
const [totalPages, setTotalPages] = useState<number>(0);
const [pageRangePreview, setPageRangePreview] = useState<string>('');
// Get the total number of pages when a PDF is uploaded
useEffect(() => {
const getPdfInfo = async () => {
if (!input) {
setTotalPages(0);
return;
}
try {
const arrayBuffer = await input.arrayBuffer();
const pdf = await PDFDocument.load(arrayBuffer);
setTotalPages(pdf.getPageCount());
} catch (error) {
console.error('Error getting PDF info:', error);
setTotalPages(0);
}
};
getPdfInfo();
}, [input]);
const onValuesChange = (values: InitialValuesType) => {
const { pageRanges } = values;
if (!totalPages || !pageRanges?.trim()) {
setPageRangePreview('');
return;
}
try {
const count = parsePageRanges(pageRanges, totalPages).length;
setPageRangePreview(
`${count} page${count !== 1 ? 's' : ''} will be extracted`
);
} catch (error) {
setPageRangePreview('');
}
};
const compute = async (values: InitialValuesType, input: File | null) => {
if (!input) return;
try {
setIsProcessing(true);
const splitResult = await splitPdf(input, values.pageRanges);
setResult(splitResult);
} catch (error) {
throw new Error('Error splitting PDF:' + error);
} finally {
setIsProcessing(false);
}
};
return (
<ToolContent
title={title}
input={input}
setInput={setInput}
initialValues={initialValues}
compute={compute}
exampleCards={exampleCards}
inputComponent={
<ToolFileInput
value={input}
onChange={setInput}
accept={['application/pdf']}
title={'Input PDF'}
/>
}
resultComponent={
<ToolFileResult
title={'Output PDF with selected pages'}
value={result}
extension={'pdf'}
loading={isProcessing}
loadingText={'Extracting pages'}
/>
}
getGroups={({ values, updateField }) => [
{
title: 'Page Selection',
component: (
<Box>
{totalPages > 0 && (
<Typography variant="body2" sx={{ mb: 1 }}>
PDF has {totalPages} page{totalPages !== 1 ? 's' : ''}
</Typography>
)}
<TextFieldWithDesc
value={values.pageRanges}
onOwnChange={(val) => {
updateField('pageRanges', val);
}}
description={
'Enter page numbers or ranges separated by commas (e.g., 1,3,5-7)'
}
placeholder={'e.g., 1,5-8'}
/>
{pageRangePreview && (
<Typography
variant="body2"
sx={{ mt: 1, color: 'primary.main' }}
>
{pageRangePreview}
</Typography>
)}
</Box>
)
}
]}
onValuesChange={onValuesChange}
toolInfo={{
title: 'How to Use the Split PDF Tool',
description: `This tool allows you to extract specific pages from a PDF document. You can specify individual page numbers (e.g., 1,3,5) or page ranges (e.g., 2-6) or a combination of both (e.g., 1,3-5,8).
Leave the page ranges field empty to include all pages from the PDF.
Examples:
- "1,5,9" extracts pages 1, 5, and 9
- "1-5" extracts pages 1 through 5
- "1,3-5,8-10" extracts pages 1, 3, 4, 5, 8, 9, and 10`
}}
/>
);
}

View File

@@ -0,0 +1,13 @@
import { defineTool } from '@tools/defineTool';
import { lazy } from 'react';
export const meta = defineTool('pdf', {
name: 'Split PDF',
shortDescription: 'Extract specific pages from a PDF file',
description:
'Extract specific pages from a PDF file using page numbers or ranges (e.g., 1,5-8)',
icon: 'mdi:file-pdf-box',
component: lazy(() => import('./index')),
keywords: ['pdf', 'split', 'extract', 'pages', 'range', 'document'],
path: 'split-pdf'
});

View File

@@ -0,0 +1,43 @@
import { parsePageRanges } from './service';
describe('parsePageRanges', () => {
test('should return all pages when input is empty', () => {
expect(parsePageRanges('', 5)).toEqual([1, 2, 3, 4, 5]);
});
test('should parse single page numbers', () => {
expect(parsePageRanges('1,3,5', 5)).toEqual([1, 3, 5]);
});
test('should parse page ranges', () => {
expect(parsePageRanges('2-4', 5)).toEqual([2, 3, 4]);
});
test('should parse mixed page numbers and ranges', () => {
expect(parsePageRanges('1,3-5', 5)).toEqual([1, 3, 4, 5]);
});
test('should handle whitespace', () => {
expect(parsePageRanges(' 1, 3 - 5 ', 5)).toEqual([1, 3, 4, 5]);
});
test('should ignore invalid page numbers', () => {
expect(parsePageRanges('1,a,3', 5)).toEqual([1, 3]);
});
test('should ignore out-of-range page numbers', () => {
expect(parsePageRanges('1,6,3', 5)).toEqual([1, 3]);
});
test('should limit ranges to valid pages', () => {
expect(parsePageRanges('0-6', 5)).toEqual([1, 2, 3, 4, 5]);
});
test('should handle reversed ranges', () => {
expect(parsePageRanges('4-2', 5)).toEqual([2, 3, 4]);
});
test('should remove duplicates', () => {
expect(parsePageRanges('1,1,2,2-4,3', 5)).toEqual([1, 2, 3, 4]);
});
});

View File

@@ -0,0 +1,66 @@
import { PDFDocument } from 'pdf-lib';
/**
* Parses a page range string and returns an array of page numbers
* @param pageRangeStr String like "1,3-5,7" to extract pages 1, 3, 4, 5, and 7
* @param totalPages Total number of pages in the PDF
* @returns Array of page numbers to extract
*/
export function parsePageRanges(
pageRangeStr: string,
totalPages: number
): number[] {
if (!pageRangeStr.trim()) {
return Array.from({ length: totalPages }, (_, i) => i + 1);
}
const pageNumbers = new Set<number>();
const ranges = pageRangeStr.split(',');
for (const range of ranges) {
const trimmedRange = range.trim();
if (trimmedRange.includes('-')) {
const [start, end] = trimmedRange.split('-').map(Number);
if (!isNaN(start) && !isNaN(end)) {
for (let i = Math.max(1, start); i <= Math.min(totalPages, end); i++) {
pageNumbers.add(i);
}
}
} else {
const pageNum = parseInt(trimmedRange, 10);
if (!isNaN(pageNum) && pageNum >= 1 && pageNum <= totalPages) {
pageNumbers.add(pageNum);
}
}
}
return [...pageNumbers].sort((a, b) => a - b);
}
/**
* Splits a PDF file based on specified page ranges
* @param pdfFile The input PDF file
* @param pageRanges String specifying which pages to extract (e.g., "1,3-5,7")
* @returns Promise resolving to a new PDF file with only the selected pages
*/
export async function splitPdf(
pdfFile: File,
pageRanges: string
): Promise<File> {
const arrayBuffer = await pdfFile.arrayBuffer();
const sourcePdf = await PDFDocument.load(arrayBuffer);
const totalPages = sourcePdf.getPageCount();
const pagesToExtract = parsePageRanges(pageRanges, totalPages);
const newPdf = await PDFDocument.create();
const copiedPages = await newPdf.copyPages(
sourcePdf,
pagesToExtract.map((pageNum) => pageNum - 1)
);
copiedPages.forEach((page) => newPdf.addPage(page));
const newPdfBytes = await newPdf.save();
const newFileName = pdfFile.name.replace('.pdf', '-extracted.pdf');
return new File([newPdfBytes], newFileName, { type: 'application/pdf' });
}