From 3ab760b5bb6a7d0a30745fb060a6407bde41487e Mon Sep 17 00:00:00 2001 From: Jerome Wu Date: Tue, 3 Dec 2019 22:06:44 +0800 Subject: [PATCH] Use IDBFS and NODEFS to process big file --- examples/browser/image2video.html | 7 +- examples/browser/run.html | 2 +- examples/browser/transcode.html | 5 +- examples/node/image2video.js | 7 +- examples/node/run.js | 2 +- package-lock.json | 5 + package.json | 3 +- src/createWorker.js | 109 ++++++++++-------- src/utils/extractProgress.js | 24 ++-- src/worker-script/browser/index.js | 2 + src/worker-script/index.js | 60 +++------- src/worker-script/node/index.js | 2 + .../browser/{loadMedia.js => fetchFile.js} | 22 ++-- src/worker/browser/fs.js | 34 ++++++ src/worker/browser/index.js | 6 +- src/worker/node/fetchFile.js | 26 +++++ src/worker/node/fs.js | 16 +++ src/worker/node/index.js | 15 +-- src/worker/node/loadMedia.js | 28 ----- tests/ffmpeg.test.js | 2 - 20 files changed, 215 insertions(+), 162 deletions(-) rename src/worker/browser/{loadMedia.js => fetchFile.js} (61%) create mode 100644 src/worker/browser/fs.js create mode 100644 src/worker/node/fetchFile.js create mode 100644 src/worker/node/fs.js delete mode 100644 src/worker/node/loadMedia.js diff --git a/examples/browser/image2video.html b/examples/browser/image2video.html index 8f2aeee..76bfb64 100644 --- a/examples/browser/image2video.html +++ b/examples/browser/image2video.html @@ -38,8 +38,13 @@ await worker.write(`tmp.${num}.png`, `../../tests/assets/triangle/tmp.${num}.png`); } message.innerHTML = 'Start transcoding'; - await worker.run('-framerate 30 -pattern_type glob -i *.png -i audio.ogg -c:a copy -shortest -c:v libx264 -pix_fmt yuv420p out.mp4'); + await worker.run('-framerate 30 -pattern_type glob -i /data/*.png -i /data/audio.ogg -c:a copy -shortest -c:v libx264 -pix_fmt yuv420p out.mp4', { outputPath: 'out.mp4' }); const { data } = await worker.read('out.mp4'); + await worker.remove('audio.ogg'); + for (let i = 0; i < 60; i += 1) { + const num = `00${i}`.slice(-3); + await worker.remove(`tmp.${num}.png`); + } const video = document.getElementById('output-video'); video.src = URL.createObjectURL(new Blob([data.buffer], { type: 'video/mp4' })); diff --git a/examples/browser/run.html b/examples/browser/run.html index dbb0c97..565d4a0 100644 --- a/examples/browser/run.html +++ b/examples/browser/run.html @@ -33,7 +33,7 @@ await worker.load(); message.innerHTML = 'Start transcoding'; await worker.write(name, files[0]); - await worker.run(`-i ${name} output.mp4`); + await worker.run(`-i /data/${name} output.mp4`, { inputPath: name, outputPath: 'output.mp4' }); message.innerHTML = 'Complete transcoding'; const { data } = await worker.read('output.mp4'); diff --git a/examples/browser/transcode.html b/examples/browser/transcode.html index 3fa215f..bf36b59 100644 --- a/examples/browser/transcode.html +++ b/examples/browser/transcode.html @@ -23,7 +23,7 @@ const { createWorker } = FFmpeg; const worker = createWorker({ corePath: '../../node_modules/@ffmpeg/core/ffmpeg-core.js', - logger: ({ message }) => console.log(message), + progress: p => console.log(p), }); const transcode = async ({ target: { files } }) => { @@ -31,11 +31,12 @@ const { name } = files[0]; message.innerHTML = 'Loading ffmpeg-core.js'; await worker.load(); - message.innerHTML = 'Start transcoding'; await worker.write(name, files[0]); + message.innerHTML = 'Start transcoding'; await worker.transcode(name, 'output.mp4'); message.innerHTML = 'Complete transcoding'; const { data } = await worker.read('output.mp4'); + console.log(data); const video = document.getElementById('output-video'); video.src = URL.createObjectURL(new Blob([data.buffer], { type: 'video/mp4' })); diff --git a/examples/node/image2video.js b/examples/node/image2video.js index ca14e2e..d9202e6 100755 --- a/examples/node/image2video.js +++ b/examples/node/image2video.js @@ -15,9 +15,14 @@ const worker = createWorker({ await worker.write(`tmp.${num}.png`, `../../tests/assets/triangle/tmp.${num}.png`); } console.log('Start transcoding'); - await worker.run('-framerate 30 -pattern_type glob -i *.png -i audio.ogg -c:a copy -shortest -c:v libx264 -pix_fmt yuv420p out.mp4'); + await worker.run('-framerate 30 -pattern_type glob -i /data/*.png -i /data/audio.ogg -c:a copy -shortest -c:v libx264 -pix_fmt yuv420p out.mp4', { outputPath: 'out.mp4' }); const { data } = await worker.read('out.mp4'); console.log('Complete transcoding'); + await worker.remove('audio.ogg'); + for (let i = 0; i < 60; i += 1) { + const num = `00${i}`.slice(-3); + await worker.remove(`tmp.${num}.png`); + } fs.writeFileSync('out.mp4', Buffer.from(data)); process.exit(0); })(); diff --git a/examples/node/run.js b/examples/node/run.js index 022cb1d..c55082f 100755 --- a/examples/node/run.js +++ b/examples/node/run.js @@ -9,7 +9,7 @@ const worker = createWorker({ await worker.load(); console.log('Start transcoding'); await worker.write('flame.avi', '../../tests/assets/flame.avi'); - await worker.run('-i flame.avi flame.mp4'); + await worker.run('-i /data/flame.avi flame.mp4', { inputPath: 'flame.avi', outputPath: 'flame.mp4' }); const { data } = await worker.read('flame.mp4'); console.log('Complete transcoding'); fs.writeFileSync('flame.mp4', Buffer.from(data)); diff --git a/package-lock.json b/package-lock.json index 7845bb4..99c2cce 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4258,6 +4258,11 @@ "safer-buffer": ">= 2.1.2 < 3" } }, + "idb": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/idb/-/idb-4.0.5.tgz", + "integrity": "sha512-P+Fk9HT2h1DhXoE1YNK183SY+CRh2GHNh28de94sGwhe0bUA75JJeVJWt3SenE5p0BXK7maflIq29dl6UZHrFw==" + }, "ieee754": { "version": "1.1.13", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.1.13.tgz", diff --git a/package.json b/package.json index 9179e8c..d776022 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "lint": "eslint src", "wait": "rimraf dist && wait-on http://localhost:3000/dist/ffmpeg.dev.js", "test": "npm-run-all -p -r start test:all", - "test:all": "npm-run-all wait test:browser:* test:node:all", + "test:all": "npm-run-all wait test:node:all", "test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js", "test:node:all": "npm run test:node -- ./tests/*.test.js", "test:browser": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -a disable-logging -t 300000", @@ -39,6 +39,7 @@ "homepage": "https://github.com/ffmpegjs/ffmpeg.js#readme", "dependencies": { "@ffmpeg/core": "^0.4.0", + "idb": "^4.0.5", "is-url": "^1.2.4", "node-fetch": "^2.6.0", "regenerator-runtime": "^0.13.3", diff --git a/src/createWorker.js b/src/createWorker.js index 1531f52..d04a724 100644 --- a/src/createWorker.js +++ b/src/createWorker.js @@ -8,8 +8,9 @@ const { spawnWorker, terminateWorker, onMessage, - loadMedia, send, + fetchFile, + fs, } = require('./worker/node'); let workerCounter = 0; @@ -58,57 +59,72 @@ module.exports = (_options = {}) => { })) ); - const write = async (path, data, jobId) => ( + const syncfs = (populate, jobId) => ( startJob(createJob({ - id: jobId, - action: 'write', - payload: { - path, - data: await loadMedia(data), - }, + id: jobId, action: 'syncfs', payload: { populate }, })) ); - const writeText = async (path, text, jobId) => ( + const write = async (path, data) => { + await syncfs(); + await fs.writeFile(path, await fetchFile(data)); + await syncfs(true); + return { + path: `/data/${path}`, + }; + }; + + const writeText = async (path, text) => { + await syncfs(true); + await fs.writeFile(path, text); + await syncfs(true); + return { + path: `/data/${path}`, + }; + }; + + const read = async (path, del = true) => { + const data = await fs.readFile(path); + if (del) { + await fs.deleteFile(path); + } + return { + data, + }; + }; + + const remove = async (path) => { + await fs.deleteFile(path); + return { + path: `/data/${path}`, + }; + }; + + const run = (args, opts = {}, jobId) => ( startJob(createJob({ - id: jobId, - action: 'writeText', - payload: { - path, - text, - }, + id: jobId, action: 'run', payload: { args, options: opts }, })) ); - const run = (args, jobId) => ( + const transcode = (inputPath, outputPath, opts = '', del = true, jobId) => ( + run( + `${opts} -i /data/${inputPath} ${outputPath}`, + { inputPath, outputPath, del }, + jobId, + ) + ); + + const trim = (inputPath, outputPath, from, to, opts = '', del = true, jobId) => ( + run( + `${opts} -ss ${from} -i /data/${inputPath} -t ${to} -c copy ${outputPath}`, + { inputPath, outputPath, del }, + jobId, + ) + ); + + const ls = (path, jobId) => ( startJob(createJob({ - id: jobId, action: 'run', payload: { args }, - })) - ); - - const transcode = (inputPath, outputPath, opts = '', jobId) => ( - run(`${opts} -i ${inputPath} ${outputPath}`, jobId) - ); - - const trim = (inputPath, outputPath, from, to, opts = '', jobId) => ( - run(`${opts} -ss ${from} -i ${inputPath} -t ${to} -c copy ${outputPath}`, jobId) - ); - - const read = (path, jobId) => ( - startJob(createJob({ - id: jobId, action: 'read', payload: { path }, - })) - ); - - const remove = (path, jobId) => ( - startJob(createJob({ - id: jobId, action: 'remove', payload: { path }, - })) - ); - - const mkdir = (path, jobId) => ( - startJob(createJob({ - id: jobId, action: 'mkdir', payload: { path }, + id: jobId, action: 'ls', payload: { path }, })) ); @@ -151,14 +167,15 @@ module.exports = (_options = {}) => { setResolve, setReject, load, + syncfs, write, writeText, - transcode, - trim, read, remove, - mkdir, run, + transcode, + trim, + ls, terminate, }; }; diff --git a/src/utils/extractProgress.js b/src/utils/extractProgress.js index 7a02a85..20fc521 100644 --- a/src/utils/extractProgress.js +++ b/src/utils/extractProgress.js @@ -6,17 +6,19 @@ const ts2sec = (ts) => { }; module.exports = ({ message }, progress) => { - if (message.startsWith(' Duration')) { - const ts = message.split(', ')[0].split(': ')[1]; - const d = ts2sec(ts); - if (duration === 0 || duration > d) { - duration = d; + if (typeof message === 'string') { + if (message.startsWith(' Duration')) { + const ts = message.split(', ')[0].split(': ')[1]; + const d = ts2sec(ts); + if (duration === 0 || duration > d) { + duration = d; + } + } else if (message.startsWith('frame')) { + const ts = message.split('time=')[1].split(' ')[0]; + const t = ts2sec(ts); + progress({ ratio: t / duration }); + } else if (message.startsWith('video:')) { + progress({ ratio: 1 }); } - } else if (message.startsWith('frame')) { - const ts = message.split('time=')[1].split(' ')[0]; - const t = ts2sec(ts); - progress({ ratio: t / duration }); - } else if (message.startsWith('video:')) { - progress({ ratio: 1 }); } }; diff --git a/src/worker-script/browser/index.js b/src/worker-script/browser/index.js index 0ce1497..83d244b 100644 --- a/src/worker-script/browser/index.js +++ b/src/worker-script/browser/index.js @@ -1,5 +1,6 @@ const worker = require('../'); const getCore = require('./getCore'); +const fs = require('../../worker/browser/fs'); global.addEventListener('message', ({ data }) => { worker.dispatchHandlers(data, (obj) => postMessage(obj)); @@ -7,4 +8,5 @@ global.addEventListener('message', ({ data }) => { worker.setAdapter({ getCore, + fs, }); diff --git a/src/worker-script/index.js b/src/worker-script/index.js index a63eb57..bc46e16 100644 --- a/src/worker-script/index.js +++ b/src/worker-script/index.js @@ -30,7 +30,7 @@ const load = ({ workerId, payload: { options: { corePath } } }, res) => { if (Module == null) { const Core = adapter.getCore(corePath); Core() - .then((_Module) => { + .then(async (_Module) => { Module = _Module; Module.setLogger((message, type) => { res.progress({ @@ -45,60 +45,37 @@ const load = ({ workerId, payload: { options: { corePath } } }, res) => { } }; -const write = ({ +const syncfs = async ({ payload: { - path, - data, + populate = false, }, }, res) => { - const d = Uint8Array.from({ ...data, length: Object.keys(data).length }); - Module.FS.writeFile(path, d); - res.resolve({ message: `Write ${path} (${d.length} bytes)` }); + await Module.syncfs(populate); + res.resolve({ message: `Sync file system with populate=${populate}` }); }; -const writeText = ({ - payload: { - path, - text, - }, -}, res) => { - Module.FS.writeFile(path, text); - res.resolve({ message: `Write ${path} (${text.length} bytes)` }); -}; - -const read = ({ +const ls = ({ payload: { path, }, }, res) => { - res.resolve(Module.FS.readFile(path)); + const dirs = Module.FS.readdir(path); + res.resolve({ message: `List path ${path}`, dirs }); }; -const remove = ({ - payload: { - path, - }, -}, res) => { - Module.FS.unlink(path); - res.resolve({ message: `Delete ${path}` }); -}; - -const mkdir = ({ - payload: { - path, - }, -}, res) => { - Module.FS.mkdir(path); - res.resolve({ message: `Create Directory ${path}` }); -}; - -const run = ({ +const run = async ({ payload: { args: _args, + options: { inputPath, outputPath, del }, }, }, res) => { const args = [...defaultArgs, ..._args.trim().split(' ')]; ffmpeg(args.length, strList2ptr(args)); + await adapter.fs.writeFile(outputPath, Module.FS.readFile(outputPath)); + Module.FS.unlink(outputPath); + if (del && typeof inputPath === 'string') { + await adapter.fs.deleteFile(inputPath); + } res.resolve({ message: `Complete ${args.join(' ')}` }); }; @@ -118,11 +95,8 @@ exports.dispatchHandlers = (packet, send) => { try { ({ load, - write, - writeText, - read, - remove, - mkdir, + ls, + syncfs, run, })[packet.action](packet, res); } catch (err) { diff --git a/src/worker-script/node/index.js b/src/worker-script/node/index.js index f59ff58..0085455 100644 --- a/src/worker-script/node/index.js +++ b/src/worker-script/node/index.js @@ -1,5 +1,6 @@ const worker = require('../'); const getCore = require('./getCore'); +const fs = require('../../worker/node/fs'); process.on('message', (packet) => { worker.dispatchHandlers(packet, (obj) => process.send(obj)); @@ -7,4 +8,5 @@ process.on('message', (packet) => { worker.setAdapter({ getCore, + fs, }); diff --git a/src/worker/browser/loadMedia.js b/src/worker/browser/fetchFile.js similarity index 61% rename from src/worker/browser/loadMedia.js rename to src/worker/browser/fetchFile.js index 0440331..823aa45 100644 --- a/src/worker/browser/loadMedia.js +++ b/src/worker/browser/fetchFile.js @@ -20,27 +20,25 @@ const readFromBlobOrFile = (blob) => ( }) ); -const loadMedia = async (image) => { - let data = image; - if (typeof image === 'undefined') { +module.exports = async (_data) => { + let data = _data; + if (typeof _data === 'undefined') { return 'undefined'; } - if (typeof image === 'string') { - // Base64 Media - if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { - data = atob(image.split(',')[1]) + if (typeof _data === 'string') { + // Base64 _data + if (/data:_data\/([a-zA-Z]*);base64,([^"]*)/.test(_data)) { + data = atob(_data.split(',')[1]) .split('') .map((c) => c.charCodeAt(0)); } else { - const res = await fetch(resolveURL(image)); + const res = await fetch(resolveURL(_data)); data = await res.arrayBuffer(); } - } else if (image instanceof File || image instanceof Blob) { - data = await readFromBlobOrFile(image); + } else if (_data instanceof File || _data instanceof Blob) { + data = await readFromBlobOrFile(_data); } return new Uint8Array(data); }; - -module.exports = loadMedia; diff --git a/src/worker/browser/fs.js b/src/worker/browser/fs.js new file mode 100644 index 0000000..319405e --- /dev/null +++ b/src/worker/browser/fs.js @@ -0,0 +1,34 @@ +const { openDB } = require('idb'); + +const getDB = () => openDB('/data', 21); + +const getDataKeyAndMode = async (db) => { + const dummy = await db.get('FILE_DATA', '/data/.DUMMY'); + const dataKey = Object.keys(dummy).filter((k) => !['mode', 'timestamp'].includes(k)).pop(); + return { dataKey, mode: dummy.mode }; +}; + +module.exports = { + readFile: async (path) => { + const db = await getDB(); + const { dataKey } = await getDataKeyAndMode(db); + return (await db.get('FILE_DATA', `/data/${path}`))[dataKey]; + }, + writeFile: async (path, data) => { + const db = await getDB(); + const { dataKey, mode } = await getDataKeyAndMode(db); + await db.put( + 'FILE_DATA', + { + [dataKey]: data, + mode, + timestamp: new Date(), + }, + `/data/${path}`, + ); + }, + deleteFile: async (path) => { + const db = await getDB(); + await db.delete('FILE_DATA', `/data/${path}`); + }, +}; diff --git a/src/worker/browser/index.js b/src/worker/browser/index.js index ab562a0..5289f98 100644 --- a/src/worker/browser/index.js +++ b/src/worker/browser/index.js @@ -12,7 +12,8 @@ const spawnWorker = require('./spawnWorker'); const terminateWorker = require('./terminateWorker'); const onMessage = require('./onMessage'); const send = require('./send'); -const loadMedia = require('./loadMedia'); +const fetchFile = require('./fetchFile'); +const fs = require('./fs'); module.exports = { defaultOptions, @@ -20,5 +21,6 @@ module.exports = { terminateWorker, onMessage, send, - loadMedia, + fetchFile, + fs, }; diff --git a/src/worker/node/fetchFile.js b/src/worker/node/fetchFile.js new file mode 100644 index 0000000..8686465 --- /dev/null +++ b/src/worker/node/fetchFile.js @@ -0,0 +1,26 @@ +const util = require('util'); +const fs = require('fs'); +const fetch = require('node-fetch'); +const isURL = require('is-url'); + +module.exports = async (_data) => { + let data = _data; + if (typeof _data === 'undefined') { + return _data; + } + + if (typeof _data === 'string') { + if (isURL(_data) || _data.startsWith('chrome-extension://') || _data.startsWith('file://')) { + const res = await fetch(_data); + data = await res.arrayBuffer(); + } else if (/data:_data\/([a-zA-Z]*);base64,([^"]*)/.test(_data)) { + data = Buffer.from(_data.split(',')[1], 'base64'); + } else { + data = await util.promisify(fs.readFile)(_data); + } + } else if (Buffer.isBuffer(_data)) { + data = _data; + } + + return data; +}; diff --git a/src/worker/node/fs.js b/src/worker/node/fs.js new file mode 100644 index 0000000..d5f3eb3 --- /dev/null +++ b/src/worker/node/fs.js @@ -0,0 +1,16 @@ +const util = require('util'); +const fs = require('fs'); + +const readFile = util.promisify(fs.readFile); +const writeFile = util.promisify(fs.writeFile); +const deleteFile = util.promisify(fs.unlink); + +module.exports = (path) => ( + readFile(`./data/${path}`) +); + +module.exports = { + readFile: (path) => readFile(`./data/${path}`), + writeFile: (path, data) => writeFile(`./data/${path}`, data), + deleteFile: (path) => deleteFile(`./data/${path}`), +}; diff --git a/src/worker/node/index.js b/src/worker/node/index.js index 1911335..3aa7147 100644 --- a/src/worker/node/index.js +++ b/src/worker/node/index.js @@ -1,18 +1,10 @@ -/** - * - * Tesseract Worker impl. for node (using child_process) - * - * @fileoverview Tesseract Worker impl. for node - * @author Kevin Kwok - * @author Guillermo Webster - * @author Jerome Wu - */ const defaultOptions = require('./defaultOptions'); const spawnWorker = require('./spawnWorker'); const terminateWorker = require('./terminateWorker'); const onMessage = require('./onMessage'); const send = require('./send'); -const loadMedia = require('./loadMedia'); +const fetchFile = require('./fetchFile'); +const fs = require('./fs'); module.exports = { defaultOptions, @@ -20,5 +12,6 @@ module.exports = { terminateWorker, onMessage, send, - loadMedia, + fetchFile, + fs, }; diff --git a/src/worker/node/loadMedia.js b/src/worker/node/loadMedia.js deleted file mode 100644 index da35f00..0000000 --- a/src/worker/node/loadMedia.js +++ /dev/null @@ -1,28 +0,0 @@ -const util = require('util'); -const fs = require('fs'); -const fetch = require('node-fetch'); -const isURL = require('is-url'); - -const readFile = util.promisify(fs.readFile); - -module.exports = async (media) => { - let data = media; - if (typeof media === 'undefined') { - return media; - } - - if (typeof media === 'string') { - if (isURL(media) || media.startsWith('chrome-extension://') || media.startsWith('file://')) { - const res = await fetch(media); - data = await res.arrayBuffer(); - } else if (/data:media\/([a-zA-Z]*);base64,([^"]*)/.test(media)) { - data = Buffer.from(media.split(',')[1], 'base64'); - } else { - data = await readFile(media); - } - } else if (Buffer.isBuffer(media)) { - data = media; - } - - return new Uint8Array(data); -}; diff --git a/tests/ffmpeg.test.js b/tests/ffmpeg.test.js index 5a48e0e..56258e9 100644 --- a/tests/ffmpeg.test.js +++ b/tests/ffmpeg.test.js @@ -12,9 +12,7 @@ describe('transcode()', () => { it(`transcode ${name}`, async () => { await worker.write(name, `${BASE_URL}/${name}`); await worker.transcode(name, 'output.mp4'); - await worker.remove(name); const { data } = await worker.read('output.mp4'); - await worker.remove('output.mp4'); expect(data.length).to.be(FLAME_MP4_LENGTH); }).timeout(TIMEOUT) ));