转存服务器

作者: 我就是L | 来源:发表于2017-03-09 21:03 被阅读46次

Buffer、Stream、Promise、async await、request、分片上传

什么是转存服务器？

即向服务器发送一个图片的url地址，服务负责去将图片下载到服务器上，之后再将这个图片上传到存储服务，得到一个可以访问（通常情况都是CDN服务）的图片地址。

当服务器下在一个大型文件时，需要完全下载完，然后缓存到本地硬盘的缓存文件中，而且一次性上传大文件，过程中由于耗时较长，因此存在较高的失败率，通常采用分片法来解决，如果分片失败则只需重新上传该分片即可。

在下载时，如果下载量满足一个分片大小则上传。所以第一步就是监听下载内容。ReadStream在接收数据时会不断的触发data事件，因此只需监听data事件就可以准确捕获到每一次数据传输过程，ReadStream分为两种模式流动模式和暂停模式，流动模式下数据会源源不断的流出供需要者使用，而暂停模式只有调用read()方法才会有数据流出。这里我们通过pipe把ReadStream与WriteStream相连，让数据流动起来。

const request = require('request');
const fs      = require('fs');
const path    = require('path');
const url     = 'https://baobao-3d.bj.bcebos.com/16-0-205.shuimian.mp4';


const httpReadStream  = request({method: 'GET', url: url});
const fileWriteStream = fs.createWriteStream(path.join(__dirname, 'download', 'lyf.mp4'));

httpReadStream.pipe(fileWriteStream);

let totalLength = 0;
httpReadStream
    .on('response', res=> {
        console.log('response.headers', res.statusCode);
    })
    .on('data', chunk=> {
        totalLength += chunk.length;
    })
    .on('error', err=> {
        console.log('err', err);
    });

fileWriteStream.on('close', ()=> {
    console.log(`已下载文件大小: ${(totalLength / 1000 / 1024).toFixed(2)} MB`)
});```

每次data事件获取的chunk大小因网络而变化，假设每次上传分片大小为2MB，每一次chunk有可能大于2MB也可能小于2MB，所以可在中间设置一缓冲区，当缓冲区满足2MB时就取出2MB作为一个分片进行上传。

于是我们使用Buffer实现一个缓冲区，主要用于分片。
```javascript
class BufferCache {
    constructor(cutSize = 2 * 1024 * 1000) {
        this._cache      = Buffer.alloc(0);
        this._cutSzie    = cutSize;
        this._readyCache = [];
    }

    push(buf) {
        let cacheLength = this._cache.length;
        let bufLength   = buf.length;
        this._cache     = Buffer.concat([this._cache, buf], bufLength + cacheLength)
        this.cut();
    }

    pull() {
        return this._readyCache.shift();
    }


    cut() {
        if (this._cache.length >= this._cutSzie) {
            const totalCacheLength = this._cache.length;
            let cutCount           = Math.floor(totalCacheLength / this._cutSzie);

            for (let i = 0; i < cutCount; i++) {
                let newBuffer = Buffer.alloc(this._cutSzie);
                this._cache.copy(newBuffer, 0, i * this._cutSzie, (i + 1) * this._cutSzie);
                this._readyCache.push(newBuffer);
            }
            this._cache = this._cache.slice(cutCount * this._cutSzie);
        }
    }

    getReadChunks() {
        return this._readyCache;
    }

    getRemainChunks() {
        if (this._cache.length < this._cutSzie)
            return this._cache;
        else {
            this.cut();
            return this.getRemainChunks();
        }
    }
}

exports = module.exports = BufferCache;

为了便于后面的编码，提高可扩展性和可读性，我们将下载过程封装如下。通过四个回调函数轻易掌控下载开始、中途、结束、异常四种状态。

const request     = require('request');
const fs          = require('fs');
const path        = require('path');
const BufferCache = require('./bufferCache');
const url         = 'https://baobao-3d.bj.bcebos.com/16-0-205.shuimian.mp4';
const _cutSize    = 1024 * 1000 * 2;
const bufferCache = new BufferCache(_cutSize);
let isFinished    = false;

function getChunks(options, onStart, onData, onFinish, onError) {
    const httpReadStream  = request({method: options.method, url: options.url});
    const fileWriteStream = fs.createWriteStream(path.join(__dirname, 'download', 'lyf.mp4'));

    httpReadStream.pipe(fileWriteStream);

    let downloadedLength = 0;
    httpReadStream
        .on('response', res=>onStart(res))
        .on('data', chunk=> {
            downloadedLength += chunk.length;
            onData(chunk, downloadedLength)
        })
        .on('error', err=>onError(err));
    
    fileWriteStream.on('close', ()=> {
        onFinish(downloadedLength)
    });
}

function onStart(res) {
    console.log('start downloading, statusCode is :', res.statusCode);
}

function onData(chunk, downloadedLength) {
    bufferCache.push(chunk);
}

function onFinished(totalLength) {
    let chunkCount = Math.ceil(totalLength / _cutSize);
    console.log('total chunk count is:' + chunkCount);
}

function onError(err) {
    console.log('err', err)
}

getChunks({method: 'GET', url: url}, onStart, onData, onFinished, onError);

截止目前，我们已经完成下载、分片接下来需要考虑如下：

如何连续获取准备好的分片?
如何上传分片?
上传分片失败的重传问题?
上传完所有分片之后的统一处理接口?
分片的并发上传?以及并发数的控制

如何连续获取准备好的分片?
在onStart执行之后即数据开始传输时，我们可以使用Node自带的间隔计时器setInterval，每隔200ms获取一次分片。一个文件在经过多次相同大小的切割之后，总会遗留下小的一块分片，因此我们还需要对最后一个分片进行特殊处理。当 readyCache 的长度为0的时候，而且下载已经完成，不会再调用 pushBuf 函数，就是获取最后一段分片的时机。于是重写onStart函数完成以上业务

function onStart(res) {
    console.log('start downloading, statusCode is :', res.statusCode);
    let interval = setInterval(function () {
        if (bufferCache.getReadChunks().length > 0) {
            let readCache = bufferCache.pull();
            console.log('recives', readCache.length)
        }
        if (isFinished) {
            clearInterval(interval);
            let lastChunk = bufferCache.getRemainChunks();
            console.log('the last chunk', lastChunk.length);
        }
    }, 200)
}

如何上传分片？
使用HTTP进行文件上传，文件在传输过程中为一个byte序列，其 content-type 为 multipart/form-data，我们先通过Promise封装一个上传函数

function upload(url, data) {
    return new Promise((resolve, reject) => {
        request.post({
            url: url,
            formData: data
        }, function (err, response, body) {
            if (!err && response.statusCode === 200) {
                resolve(body);
            }
            else {
                reject(err);
            }
        });
    });
}

我们现在需要从缓存中拿分片，如国还有剩余着继续，没有则通知发送完成，对于这样的逻辑可以使用递归。
假设当前网络环境拥堵，会导致上传一个分片的时间 > 200ms， 200ms之后下一次轮询开始运行时，原先的分片还没上传完毕，由于没有一个状态值进行判断，依然会调用上传函数，又再一次进行分片上传，就会更加剧的网络拥堵环境，导致分片上传时间更短。如此反复，时间一长就会导致崩溃，造成分片上传全部大面积失败。为了避免这样的情况，我们就需要一个变量来表示当前这个上传流程的状态，目前我们只关心单个流程进行上传，可以只需要保证最大同时上传的值为1即可。

function sendChunks() {
    let chunkId     = 0; // 给每个分片划分ID
    let sending     = 0; // 当前并行上传的数量
    let MAX_SENDING = 1; // 最大并行上传数

    function send(readCaches) {
        if (readCaches.length <= 0)
            return;
        console.log(`发送第 ${chunkId} 块分片`)
        const chunk       = readCaches.shift();
        const sendPromise = upload('http://localhost:3000/upload', {
            chunk: {
                value: chunk,
                options: {
                    // 在文件名称上添加chunkId，可以方便后端服务进行分片整理
                    filename: 'example.mp4_IDSPLIT_' + chunkId
                }
            }
        });
        sending++;
        sendPromise.then(resBody=> {
            sending--;
            if (resBody.uploadStatus === 0 && readCaches.length > 0)
                send(readCaches);
        });
        chunkId++;
    }

    return new Promise((resolve, reject)=> {
        let readyCaches = bufferCache.getReadChunks();
        let interval    = setInterval(function () {
            if (readyCaches.length >= 0 && sending <= MAX_SENDING) {
                send(readyCaches);
            }
            if (isFinished && readyCaches.length === 0) {
                clearInterval(interval);
                const lastChunk = bufferCache.getRemainChunks();
                readyCaches.push(lastChunk);
                send(readyCaches)
            }
        }, 200)
    })
}

截止此我们已经完成下载－分片－连续上传分片的简单实现，但如果某一分片上传失败又该怎么办呢？send()函数可以看作一个发送单个分片(不考虑递归)的控制器，只需在其内部捕获上传错误的分片，保存下来重传即可。于是我们修改sendChunks函数如下：在send().cathc(fn)内进行重传控制，在可尝试次数之内进行重传，如果失败则抛出异常。

function sendChunks() {
    let chunkId = 0;
    let sending = 0; // 当前并行上传的数量
    let MAX_SENDING = 1; // 最大并行上传数
    let stopSend = false;

    function send(options) {
        let readyCache = options.readyCache;
        let fresh = options.fresh;
        let retryCount = options.retry;
        let chunkIndex;

        let chunk = null;

        // 新的数据
        if (fresh) {
            if (readyCache.length === 0) {
                return;
            }

            chunk = readyCache.shift();
            chunkIndex = chunkId;
            chunkId++;
        }
        // 失败重试的数据
        else {
            chunk = options.data;
            chunkIndex = options.index;
        }

        sending++;
        let sendP = upload('http://localhost:3000', {
            chunk: {
                value: chunk,
                options: {
                    filename: 'example.mp4_IDSPLIT_' + chunkIndex
                }
            }
        }).then((response) => {
            sending--;
            let json = JSON.parse(response);

            if (json.errno === 0 && readyCache.length > 0) {
                return send({
                    retry: RETRY_COUNT,
                    fresh: true,
                    readyCache: readyCache
                });
            }

            // 这里一直返回成功
            return Promise.resolve(json);
        }).catch(err => {
            if (retryCount > 0) {
                // 这里递归下去，如果成功的话，就等同于错误已经处理
                return send({
                    retry: retryCount - 1,
                    index: chunkIndex,
                    fresh: false,
                    data: chunk,
                    readyCache: readyCache
                });
            }
            else {
                console.log(`upload failed of chunkIndex: ${chunkIndex}`);
                // 停止上传标识，会直接停止上传
                stopSend = true;
                // 返回reject，异常抛出
                return Promise.reject(err);
            }
        });
    }

    return new Promise((resolve, reject) => {
        let readyCache = bufferCache.getChunks();

        let sendTimer = setInterval(() => {
            if (sending < MAX_SENDING && readyCache.length > 0) {
                // 改用传入对象
                send({
                    retry: 3, // 最大重试3次
                    fresh: true, // 用这个字段来区分是新的分片，还是由于失败重试的
                    readyCache: readyCache
                }).catch(err => {
                    console.log('upload failed, errmsg: ', err);
                });
            }
            else if (isFinished && readyCache.length === 0 || stopSend) {
                clearTimeout(sendTimer);

                // 已经成功走到最后一个分片了。
                if (!stopSend) {
                    let lastChunk = bufferCache.getRemainChunks();
                    readyCache.push(lastChunk);

                    send({
                        retry: 3,
                        fresh: true,
                        readyCache: readyCache
                    }).catch(err => {
                        console.log('upload failed, errmsg: ', err);
                    });
                }
            }

            // 到这里是为分片正在下载，同时又正在上传
            // 或者上传比下载快，已经下载好的分片都传完了，等待下载完成
        }, 200);
    });
}

上传完所有分片之后的统一处理接口?
由于上传send()在成功上传一个分片后会返回一个Promise对象，上传失败时会抛出异常，所以只需使用Promsie.all()方法捕获即可。

let readyCache = bufferCache.getChunks();
let sendPromise = [];

let sendTimer = setInterval(() => {
    if (sending < MAX_SENDING && readyCache.length > 0) {
        // 把Promise塞进数组
        sendPromise.push(send({
            retry: RETRY_COUNT,
            fresh: true,
            readyCache: readyCache
        }));
    }
    else if ((isFinished && readyCache.length === 0) || stopSend) {
        clearTimeout(sendTimer);

        if (!stopSend) {
            console.log('got last chunk');
            let lastChunk = bufferCache.getRemainChunks();
            readyCache.push(lastChunk);
            // 把Promise塞进数组
            sendPromise.push(send({
                retry: RETRY_COUNT,
                fresh: true,
                readyCache: readyCache
            }));
        }

        // 当所有的分片都发送之后触发，
        Promise.all(sendPromise).then(() => {
            console.log('send success');
        }).catch(err => {
            console.log('send failed');
        });
    }
    // not ready, wait for next interval
}, 200);

分片的并发上传?以及并发数的控制?现在还剩最后一个问题，Node本身就是非阻塞IO、事件驱动的，我们只需使用send()去同步的获得执行，而真正的上传逻辑upload却是异步，所以不需要考虑资源竞争、死锁等问题，只需同步扩展send方法即可。


let readyCache = bufferCache.getChunks();
let threadPool = [];

let sendTimer = setInterval(() => {
    if (sending < MAX_SENDING && readyCache.length > 0) {
        // 这个例子同时开启4个分片上传
        for (let i = 0; i < MAX_SENDING; i++) {
            let thread = send({
                retry: RETRY_COUNT,
                fresh: true,
                readyCache: readyCache
            });

            threadPool.push(thread);
        }
    }
    else if ((isFinished && readyCache.length === 0) || stopSend) {
        clearTimeout(sendTimer);

        if (!stopSend) {
            console.log('got last chunk');
            let lastChunk = bufferCache.getRemainChunks();
            readyCache.push(lastChunk);
            threadPool.push(send({
                retry: RETRY_COUNT,
                fresh: true,
                readyCache: readyCache
            }));
        }

        Promise.all(threadPool).then(() => {
            console.log('send success');
        }).catch(err => {
            console.log('send failed');
        });
    }
}, 200);

这里我们通过文件的md5值去判断是否属于同一文件。


function toMd5(buffer) {
    let md5 = crypto.createHash('md5');
    md5.update(buffer);
    return md5.digest('hex');
}

存储服务器上由于是分片后的文件，所以我们先把目录中的文件以Buffer的形式读入内存,在求文件的md5值即可。

function filesInDirToBuffer(dirPath) {
    let totalBuffer = Buffer.allocUnsafe(0);
    const dirsInfo  = fs.readdirSync(dirPath);
    dirsInfo.forEach(file=> {
        if (file != '.DS_Store') {
            const currentFileBuffer = fs.readFileSync(path.join(dirPath, file));
            totalBuffer             = Buffer.concat([totalBuffer, currentFileBuffer], totalBuffer.length + currentFileBuffer.length);
        }
    });
    return totalBuffer;
}

转存服务器

什么是转存服务器？

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

Node程序员的技术日常