前端·

大文件上传

实现大文件上传,分片上传和断点续传原理与实践

分片上传

客户端

  • 基于 Blob.prototype.slice 方法将文件分割成多个切片
// 生成文件切片 size:切片大小
function createFileChunk(file, size = SIZE) {
    const fileChunkList = [];
    let currentSize = 0;
    let i = 0;
    while (currentSize < file.size) {
        const bigSize =
            file.size < currentSize + size ? file.size : currentSize + size;
        fileChunkList.push({
            chunk: file.slice(currentSize, currentSize + size),
            // 文件名 + 数组下标
            hash: file.name + "-" + i,
            percentage: 0,
            index: i,
            size: bigSize - currentSize,
        });
        currentSize += size;
        i++;
    }
    return fileChunkList;
}

  • 基于 asyncPool 限制N个并发量,并将所有切片记录排序进行上传
// 并发数量限制
async function asyncPool(poolLimit, iterable, iteratorFn) {
    const ret = [];
    const executing = new Set();
    for (const item of iterable) {
        const p = Promise.resolve().then(() => iteratorFn(item, iterable));
        ret.push(p);
        executing.add(p);
        const clean = () => executing.delete(p);
        p.then(clean).catch(clean);
        // 最大并发量,等待上传
        if (executing.size >= poolLimit) {
            await Promise.race(executing);
        }
    }
    return Promise.all(ret);
}
  • 所有切片上传完成,前端通知后端合并所有切片

服务端

  • 基于 Nodejs 的 读写流readStream/writeStream,将所有切片的流传输到最终文件的流里
const fse = require("fs-extra");
// 写入文件流
const pipeStream = (path, writeStream) => {
    return new Promise((resolve) => {
        const readStream = fse.createReadStream(path);
        readStream.on("end", () => {
            fse.unlinkSync(path);
            resolve();
        });
        readStream.pipe(writeStream);
    });
}

// 合并切片
const mergeFileChunk = async (filePath, filename, size) => {
    const chunkDir = path.resolve(UPLOAD_DIR, "chunkDir" + filename);
    const chunkPaths = await fse.readdir(chunkDir);
    // 根据切片下标进行排序
    // 否则直接读取目录的获得的顺序会错乱
    chunkPaths.sort((a, b) => a.split("-")[1] - b.split("-")[1]);
    // 并发写入文件
    await Promise.all(
        chunkPaths.map((chunkPath, index) =>
            pipeStream(
                path.resolve(chunkDir, chunkPath),
                // 根据 size 在指定位置创建可写流
                fse.createWriteStream(filePath, {
                    start: index * size,
                })
            )
        )
    );
    // 合并后删除保存切片的目录
    fse.rmdirSync(chunkDir);
};

断点续传

客户端

  • 基于spark-md5生成文件对应的唯一hash, 用来处理上传暂停后,跳过已上传文件
    • 利用FileRender读取每个切片的ArrayBuffer,将其传入spark-md5
    • 基于worker线程计算hash, 从而不影响主界面正常交互
    • 基于onmessagepostMessage通信,得到实时生成进度
// 创建文件对应的唯一hash
function createFileHash(fileChunkList) {
    return new Promise((resolve) => {
        // 添加 worker 属性
        container.value.worker = new Worker("/hash.js");
        // fileChunkList 切片列表
        container.value.worker.postMessage({ fileChunkList });
        container.value.worker.onmessage = (e) => {
            const { percentage, hash } = e.data;
            // 记录生成 hash 进度条
            hashPercentage.value = parseInt(percentage);
            if (hash) {
                resolve(hash);
            }
        };
    });
}
// hash.js

// 导入脚本
self.importScripts("/spark-md5.min.js");

// 生成文件 hash
self.onmessage = e => {
    const { fileChunkList } = e.data;
    const spark = new self.SparkMD5.ArrayBuffer();
    let percentage = 0;
    let count = 0;
    const loadNext = index => {
        const reader = new FileReader();
        reader.readAsArrayBuffer(fileChunkList[index].file);
        reader.onload = e => {
            count++;
            spark.append(e.target.result);
            if (count === fileChunkList.length) {
                self.postMessage({
                    percentage: 100,
                    hash: spark.end()
                });
                self.close();
            } else {
                percentage += 100 / fileChunkList.length;
                self.postMessage({
                    percentage
                });
                // calculate recursively
                loadNext(count);
            }
        };
    };
    loadNext(0);
};
  • 基于axiosabort()方法取消上传

服务端

  • 根据唯一hash, 查询已上传切片或者文件

完整代码地址

代码地址