美文网首页PHP我爱编程
php实现多进程下载百度网盘文件

php实现多进程下载百度网盘文件

作者: PeterQ1998 | 来源:发表于2018-04-11 16:42 被阅读626次

    大家知道百度网盘下载对于非会员是有下载限速的, 最大速度基本上维持在 100kB/s以内,要下个电影啥的,那就有得等了. 之前还可以把网盘里的文件链接解析出来放到Uget或迅雷之类的下载工具里去多线程下载, 但是现在百度的文件服务器对ua做了校验, 没找到好用的可编辑http header的下载工具, 于是自己动手写一个了.基本上可以充分利用已有的带宽

    download_demo.gif

    PS: 请自行安装swoole拓展和Guzzle http包.
    直接上代码吧:

    <?php
    
    include 'vendor/autoload.php';
    // $service = new Service('http://peterq.cn/movie/api/video_redirect?fid=543468589252145', __DIR__);
    $service = new Service('http://peterq.cn/movie/api/video_redirect?fid=364402848596280', __DIR__);
    $service->start();
    
    use GuzzleHttp\Client;
    
    
    class Service
    {
    
        /**
         * @var Client;
         */
        protected $client;
    
        protected $worker_pool; // 下载进程池
    
        protected $available_worker_queue; // 可用的进程队列
    
        protected $worker_number = 16; // 定义需要开多少个进程, 文件较小时, 并不一定全部用得上, 取决于你的分片大小
    
        protected $started = false; // 是否已经开始下载
    
        protected $url; // 下载链接
    
        protected $length; // 文件大小
    
        protected $dir; // 保存目录
    
        protected $filename; // 文件绝对路径
    
        protected $downloaded = 0; // 已下载字节数
    
        protected $speedArr = []; // 用来计算下载速度的数组
    
        protected $distributed = 0; // 对于要下载的文件, 已经分配到哪个位置了
    
        public function __construct($url, $dir)
        {
            $this->url = $url;
            $this->dir = realpath($dir);
        }
    
        public function start()
        {
            if ($this->started) return;
            $this->available_worker_queue = new SplQueue();
            $this->started = true;
            // 创建客户端
            $this->client = new Client([
                'headers' => [
                    "Accept"           => "application/json, text/javascript, text/html, */*; q=0.01",
                    "Accept-Encoding"  => "gzip, deflate, sdch",
                    "Accept-Language"  => "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",
                    "Referer"          => "http://pan.baidu.com/disk/home",
                    "X-Requested-With" => "XMLHttpRequest",
                    "User-Agent"       => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",
                    "Connection"       => "keep-alive",
                ],
            ]);
            // 设置进程名称
            swoole_set_process_name('download-master');
            echo 'master pid:' . posix_getpid() . PHP_EOL;
            // 创建多个下载进程
            for ($i = 0; $i < $this->worker_number; $i++) {
                $process = $this->createProcess($i);
                $this->worker_pool[$i] = $process;
                // 通信通道加入事件轮训, 进行异步通信
                swoole_event_add($process->pipe, function ($pipe) use ($process) {
                    $data = $process->read();
                    $data = unserialize($data);
                    $this->handleChildMessage($process, $data['type'], $data['data']);
                });
                $process->start();
                $this->available_worker_queue->enqueue($process);
            }
    
            // 子进程退出回收
            swoole_process::signal(SIGCHLD, function ($sig) {
                static $exited = 0;
                // 必须为false,非阻塞模式
                while ($ret = swoole_process::wait(false)) {
                    echo "child process exited, PID={$ret['pid']}\n";
                    $exited++;
                    if ($exited == count($this->worker_pool)) exit();
                }
            });
    
            $this->initDownload();
    
        }
    
        // 初始化下载
        protected function initDownload()
        {
            $resp = $this->client->request('GET', $this->url, [
                'stream'          => true,
                'read_timeout'    => 10,
            ]);
            // 处理重定向
            while (in_array($resp->getStatusCode(), [301, 302])) {
                $this->url = $resp->getBody()->read(1024);
                dump('redirect: ' . $this->url);
                $resp = $this->client->request('GET', $this->url, [
                    'stream'          => true,
                    'read_timeout'    => 10,
                ]);
            }
            if (!$resp->getHeader('Content-Disposition')) {
                dump('not a file download url');
            }
            $this->length = intval($resp->getHeader('Content-Length')[0]);
            $fname = $resp->getHeader('Content-Disposition')[0];
            $fname = substr($fname, strpos($fname, 'filename=') + strlen('filename='));
            $fname = urldecode($fname);
            $this->filename = $this->dir . '/' . $fname;
            dump([
                '文件' => $this->filename,
                '大小' => round($this->length / 1024 / 1024, 2) . 'MB'
            ]);
            file_put_contents($this->filename, '');
            $this->download();
        }
    
        // 启动下载
        protected function download()
        {
            while (
                $this->distributed < $this->length
                && $this->available_worker_queue->count()
                && $process = $this->available_worker_queue->dequeue()) {
                $this->distributeSegment($process);
            }
        }
    
        // 分配下一块区间给一个进程
        protected function distributeSegment($process)
        {
            // 分成 1 MB 一个段去下载
            $size = 1 * 1024 * 1024;
            $process->write(serialize([
                'type' => 'new-segment',
                'data' => [
                    'url' => $this->url,
                    'file' => $this->filename,
                    'start' => $this->distributed,
                    'length' => min($size, $this->length - $this->distributed),
                ]
            ]));
            $this->distributed += $size;
        }
    
        // 进程间通信处理
        protected function handleChildMessage($process, $type, $data)
        {
            method_exists($this, 'on' . ucfirst($type)) and $this->{'on' . ucfirst($type)}($process, $data);
        }
    
        // 当下载进程下载一小块时, 通过此回调通知master进程
        protected function onRange(swoole_process $process, $data)
        {
            $this->downloaded += $data;
            static $lastClearTime = 0;
            $time = time();
            $this->speedArr[$time] = $this->speedArr[$time] ?? 0;
            $this->speedArr[$time] += $data;
            // 取过去 5 秒作为平均速度 作为速度显示, 粗略计算, 并不准确
            if ($time > $lastClearTime) {
                $lastClearTime = $time;
                foreach ($this->speedArr as $t => $size) {
                    if ($t < $time - 5) unset($this->speedArr[$t]);
                }
            }
            $speed = array_sum($this->speedArr) / count($this->speedArr);
            $percent = $this->downloaded / $this->length * 100;
            $percent = round($percent, 2);
            $size = humanSize($this->downloaded);
            $speed = humanSize($speed);
            echo "\r\033[2K" . "已下载: $size, $percent%; 当前速度: $speed/s";
        }
    
        // 当分配给下载进程的下载任务完成时执行的回调
        protected function onTaskFinished($process, $data)
        {
            if ($this->distributed < $this->length)
                $this->distributeSegment($process);
            else {
                $this->available_worker_queue->enqueue($process);
                if ($this->available_worker_queue->count() == count($this->worker_pool)) {
                    dump('文件下载完成');
                    foreach ($this->worker_pool as $worker) {
                        $worker->write(serialize([
                            'type' => 'exit', 'data' => ''
                        ]));
                    }
                }
            }
        }
    
        // 创建下载进程
        protected function createProcess($index = null)
        {
            $process = new swoole_process(function (swoole_process $process) use ($index) {
                swoole_set_process_name('download worker' . $index);
                echo sprintf('worker:%s, pid:%s', $index, posix_getpid()) . PHP_EOL;
                $downloader = null;
                // 通信通道加入事件轮训, 进行异步通信
                swoole_event_add($process->pipe, function ($pipe) use ($process, &$downloader) {
                    $data = $process->read();
                    $data = unserialize($data);
                    $type = $data['type'];
                    $data = $data['data'];
                    // 这里会阻塞掉, 后续改进
                    if ($type == 'new-segment') {
                        $downloader = new Downloader($process, $this->client, $data['url'], $data['file'], $data['start'], $data['length']);
                        $downloader->download();
                        $process->write(serialize([
                            'type' => 'taskFinished',
                            'data' => ''
                        ]));
                        $downloader = null;
                        return;
                    }
                    if ($type == 'exit') exit(0);
                });
            }, false, 2);
    
            return $process;
        }
    }
    
    
    // 下载器类
    class Downloader
    {
    
        protected $client; // guzzle实例
    
        protected $process; // 当前进程实例
    
        protected $file; // 文件名
    
        protected $url;
    
        protected $start; // 开始位置
    
        protected $length; // 下载长度
    
        protected $offset; // 已经下到哪一个位置了
    
        public function __construct(swoole_process $process, Client $client, $url, $file, $start, $length)
        {
            $this->process = $process;
            $this->client = $client;
            $this->url = $url;
            $this->file = $file;
            $this->start = $start;
            $this->length = $length;
        }
    
    
        public function download()
        {
            $this->offset = $this->start;
            $res = fopen($this->file, 'rb+');
            fseek($res, $this->start, SEEK_SET);
            $resp = $this->client->request('GET', $this->url, [
                'stream' => true,
                'headers' => [
                    'Range' => 'bytes=' . $this->start . '-' . ($this->start + $this->length)
                ]
            ]);
            $loaded = 0;
            while (!$resp->getBody()->eof()) {
                // 5 kb 的下载
                $size = 1024 * 5;
                $data = $resp->getBody()->read($size);
                $loaded += strlen($data);
                fwrite($res, $data);
                $this->process->write(serialize([
                    'type' => 'range',
                    'data' => strlen($data)
                ]));
                if ($loaded >= $this->length) break; // eof 貌似不起作用, 手动退出
            }
            fclose($res);
            dump($this->length / 1024 / 1024 . 'MB下载完成');
        }
    }
    
    // 把文件大小从字节转换为合适的单位
    function humanSize($size) {
        $units = ['B', 'KB', 'MB', 'GB'];
        foreach ($units as $unit) {
            if ($size > 1024)
                $size /= 1024;
            else break;
        }
        return round($size, 2) . $unit;
    }
    
    
    

    相关文章

      网友评论

        本文标题:php实现多进程下载百度网盘文件

        本文链接:https://www.haomeiwen.com/subject/sslzhftx.html