feat: add PDF to EPUB conversion tool

- Updated package.json to include new dependencies for PDF to EPUB conversion.
- Implemented PdfToEpub component for converting PDF files to EPUB format.
- Added meta information for the new PDF to EPUB tool.
- Created service to handle PDF processing and EPUB file generation.
- Integrated the new tool into the existing PDF tools list.
This commit is contained in:
ARRY7686
2025-05-28 22:11:33 +05:30
parent 7412d19cd1
commit 15bd83075f
6 changed files with 1005 additions and 33 deletions

794
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -41,11 +41,16 @@
"@types/morsee": "^1.0.2",
"@types/omggif": "^1.0.5",
"browser-image-compression": "^2.0.2",
"buffer": "^6.0.3",
"color": "^4.2.3",
"dayjs": "^1.11.13",
"epub-gen-memory": "^1.1.2",
"epubconventer": "^1.0.2",
"file-saver": "^2.0.5",
"formik": "^2.4.6",
"jimp": "^0.22.12",
"js-quantities": "^1.8.0",
"jszip": "^3.10.1",
"lint-staged": "^15.4.3",
"lodash": "^4.17.21",
"mime": "^4.0.6",
@@ -53,17 +58,22 @@
"nerdamer-prime": "^1.2.4",
"notistack": "^3.0.1",
"omggif": "^1.0.10",
"path-browserify": "^1.0.1",
"pdf-lib": "^1.17.1",
"pdfjs-dist": "^5.2.133",
"playwright": "^1.45.0",
"process": "^0.11.10",
"rc-slider": "^11.1.8",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-helmet": "^6.1.0",
"react-image-crop": "^11.0.7",
"react-router-dom": "^6.23.1",
"stream-browserify": "^3.0.0",
"tesseract.js": "^6.0.0",
"type-fest": "^4.35.0",
"use-deep-compare-effect": "^1.8.1",
"util": "^0.12.5",
"yup": "^1.4.0"
},
"devDependencies": {
@@ -77,6 +87,7 @@
"@types/node": "^20.12.12",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@types/jszip": "^3.10.1",
"@types/react-helmet": "^6.1.11",
"@typescript-eslint/eslint-plugin": "^6.21.0",
"@typescript-eslint/parser": "^6.21.0",

View File

@@ -4,11 +4,13 @@ import { meta as mergePdf } from './merge-pdf/meta';
import { DefinedTool } from '@tools/defineTool';
import { tool as compressPdfTool } from './compress-pdf/meta';
import { tool as protectPdfTool } from './protect-pdf/meta';
import { meta as pdfToEpub } from './pdf-to-epub/meta';
export const pdfTools: DefinedTool[] = [
splitPdfMeta,
pdfRotatePdf,
compressPdfTool,
protectPdfTool,
mergePdf
mergePdf,
pdfToEpub
];

View File

@@ -0,0 +1,67 @@
import { useState, useEffect } from 'react';
import ToolFileResult from '@components/result/ToolFileResult';
import ToolContent from '@components/ToolContent';
import { ToolComponentProps } from '@tools/defineTool';
import ToolPdfInput from '@components/input/ToolPdfInput';
import { convertPdfToEpub } from './service';
export default function PdfToEpub({ title }: ToolComponentProps) {
const [input, setInput] = useState<File | null>(null);
const [result, setResult] = useState<File | null>(null);
const [isProcessing, setIsProcessing] = useState<boolean>(false);
const compute = async (files: File[]) => {
if (!files?.[0]) return;
try {
setIsProcessing(true);
setResult(null); // Clear previous result
const epub = await convertPdfToEpub(files[0]);
setResult(epub);
} catch (error) {
console.error('Failed to convert PDF to EPUB:', error);
// Handle error appropriately - maybe set an error state
} finally {
setIsProcessing(false);
}
};
// Auto-trigger conversion when file is uploaded
useEffect(() => {
if (input) {
compute([input]);
}
}, [input]);
return (
<ToolContent
title={title}
input={input}
setInput={setInput}
initialValues={input ? [input] : []}
compute={compute}
inputComponent={
<ToolPdfInput
value={input}
onChange={(file) => setInput(file)}
accept={['application/pdf']}
title={'Input PDF'}
/>
}
getGroups={null}
resultComponent={
<ToolFileResult
title={'EPUB Output'}
value={result}
extension={'epub'}
loading={isProcessing}
loadingText={'Converting PDF to EPUB...'}
/>
}
toolInfo={{
title: 'How to Use PDF to EPUB?',
description: `Upload a PDF file and this tool will convert it into an EPUB format, suitable for most e-reader devices.`
}}
/>
);
}

View File

@@ -0,0 +1,13 @@
import { defineTool } from '@tools/defineTool';
import { lazy } from 'react';
export const meta = defineTool('pdf', {
name: 'PDF to EPUB',
shortDescription: 'Convert PDF files to EPUB format',
description:
'Transform PDF documents into EPUB files for better e-reader compatibility.',
icon: 'material-symbols:import-contacts',
component: lazy(() => import('./index')),
keywords: ['pdf', 'epub', 'convert', 'ebook'],
path: 'pdf-to-epub'
});

View File

@@ -0,0 +1,149 @@
import * as pdfjsLib from 'pdfjs-dist';
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.min?url';
import JSZip from 'jszip';
// Set worker source for PDF.js
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker;
/**
* Helper to convert raw text into clean paragraphs
*/
function formatTextToParagraphs(raw: string): string {
return raw
.split(/\n{2,}|\r{2,}/g) // Split on double line breaks
.map((p) => p.trim())
.filter((p) => p.length > 0)
.map((p) => `<p>${p.replace(/\n/g, ' ')}</p>`)
.join('\n');
}
export async function convertPdfToEpub(pdfFile: File): Promise<File> {
const arrayBuffer = await pdfFile.arrayBuffer();
// Load PDF document
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
const pdfDoc = await loadingTask.promise;
const numPages = pdfDoc.numPages;
// Extract text from all pages
const pages: string[] = [];
for (let i = 1; i <= numPages; i++) {
const page = await pdfDoc.getPage(i);
const textContent = await page.getTextContent();
const pageText = textContent.items.map((item: any) => item.str).join('\n'); // Preserve line breaks better
pages.push(pageText);
}
// Create EPUB structure using JSZip
const zip = new JSZip();
zip.file('mimetype', 'application/epub+zip', { compression: 'STORE' });
const metaInf = zip.folder('META-INF');
metaInf!.file(
'container.xml',
`<?xml version="1.0" encoding="UTF-8"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>`
);
const oebps = zip.folder('OEBPS');
const bookTitle = pdfFile.name.replace(/\.pdf$/i, '');
const contentOpf = `<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="bookid" version="2.0">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title>${bookTitle}</dc:title>
<dc:creator>Converted by omni-tools</dc:creator>
<dc:identifier id="bookid">${Date.now()}</dc:identifier>
<dc:language>en</dc:language>
</metadata>
<manifest>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
${pages
.map(
(_, index) =>
`<item id="chapter${index + 1}" href="chapter${
index + 1
}.xhtml" media-type="application/xhtml+xml"/>`
)
.join('\n ')}
</manifest>
<spine toc="ncx">
${pages
.map((_, index) => `<itemref idref="chapter${index + 1}"/>`)
.join('\n ')}
</spine>
</package>`;
oebps!.file('content.opf', contentOpf);
const tocNcx = `<?xml version="1.0" encoding="UTF-8"?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
<head>
<meta name="dtb:uid" content="${Date.now()}"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>${bookTitle}</text>
</docTitle>
<navMap>
${pages
.map(
(_, index) =>
`<navPoint id="navpoint-${index + 1}" playOrder="${index + 1}">
<navLabel>
<text>Page ${index + 1}</text>
</navLabel>
<content src="chapter${index + 1}.xhtml"/>
</navPoint>`
)
.join('\n ')}
</navMap>
</ncx>`;
oebps!.file('toc.ncx', tocNcx);
pages.forEach((pageText, index) => {
const formattedBody = formatTextToParagraphs(pageText);
const chapterXhtml = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Page ${index + 1}</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style>
body {
font-family: serif;
line-height: 1.6;
margin: 1em;
}
p {
margin-bottom: 1em;
text-align: justify;
}
</style>
</head>
<body>
<h1>Page ${index + 1}</h1>
${formattedBody}
</body>
</html>`;
oebps!.file(`chapter${index + 1}.xhtml`, chapterXhtml);
});
const epubBuffer = await zip.generateAsync({ type: 'arraybuffer' });
return new File([epubBuffer], pdfFile.name.replace(/\.pdf$/i, '.epub'), {
type: 'application/epub+zip'
});
}