美文网首页
WAV格式与PCM简介

WAV格式与PCM简介

作者: 沐文乘光 | 来源:发表于2020-07-15 15:11 被阅读0次

    最近的工作涉及到语音识别相关的研究,因此先简单的梳理一下WAV格式和PCM。以前用c++实现了mp3 player,这个时候再来回顾下代码实现,将WAV的播放 看了下。

    什么是PCM

    直接上百度百科的定义:PCM(Pulse Code Modulation)脉冲编码调制是数字通信的编码方式之一。主要过程是将话音、图像等模拟信号每隔一定时间进行取样,使其离散化,同时将抽样值按分层单位四舍五入取整量化,同时将抽样值按一组二进制码来表示抽样脉冲的幅值。

    简单的说,PCM就是直接对原始的模拟声波信号 进行 采样 后得到的 数据

    什么是 采样率 sample rate

    采样率 是 指 一秒钟 从脉冲 信号 中 采样了 多少个 sample
    举例: sample rate: 16000 HZ ,意思是一秒钟 从 脉冲信号中 采样16000个点(sample)

    什么是比特率 bits per second

    从字面定义,即:一秒钟的声音信号 占有的比特数。
    简言之:比特率 表征 采样的 数据 的 信息逼真程度,比特率越高,数据的逼真程度越高。

    使用 alsa sound 进行 pcm 数据的播放

    关于alsa sound

    从网上找了alsa sound的解释如下:ALSA(Advanced Linux Sound Architecture)是linux上主流的音频结构,在没有出现ALSA架构之前,一直使用的是OSS(Open Sound System)音频架构。
    简单的说,alsa sound是linux下的音频框架,使用alsa sound的 接口,即可以播放pcm数据。这里,正好可以用这个接口,来体验pcm的声音效果,以便有个 真切的 将pcm传到 电脑喇叭的 声音 体验

    使用 alsa soun 播放pcm
    • 计算320ms的pcm数据长度,因为我的代码是每次播放320ms长度的sample数据,所以先计算出320ms的采样数据长度。
      直接给出公式如下:
    简单的说下:
    sample_rate 标识1秒的采样的sample数,单声道的采样数=采样率,双声道的采样数=采样率*channel数。 320ms的采样数 = 1秒的采样数*320/1000 = 采样率*声道数*32/100
    samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
    

    wav格式

    以.wav为后缀的文件就是wav格式的文件,先上百度百科的定义:是微软公司专门为Windows开发的一种标准数字音频文件,该文件能记录各种单声道或立体声的声音信息,并能保证声音不失真。
    简单的说:WAV文件就是在PCM数据前加了个PCM的信息说明头,仅此而已
    这个头部数据占有字节内容如下:

    RIFF 4个字节
    UNKNOWN 4个字节,
    WAVE 4 bytes
    fmt 4bytes
    UNKNOWN 4bytes
    采样率、声道数、比特率、一个sample占的bit数 16bytes
    extra 2bytes //根据chunk_size == 18才有
    fact 4bytes
    data 4bytes
    UNKNOWN 4bytes
    ---------------
    到这一步,就计算出接下来PCM数据的长度,接下来的位置就是PCM数据了
    

    最后附播放alsa sound 播放pcm的源码

    class WAVParser {
    public:
    typedef struct {
        int format_tag;
        int channels;
        unsigned int sample_rate;
        unsigned int bit_rate;
        int block_align;
        int bits_per_sample;
        int data_pos;
        int data_size;
    }FrameHeader;
    
    typedef struct {
    
        int Init(const FrameHeader& header);
    }FrameInfo;
    
        static int GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info);
    
        WAVParser(const unsigned char* data, int len);
        ~WAVParser();
    
        int Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate);
    
    protected:
        static int FindFrameHeader(const unsigned char* data, int len);
    
        static int ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header);
        
        static void TypeFrameHeader(const FrameHeader& header) {
            PRINT("=============================================");
            PRINT("header.format_tag = %u", header.format_tag);
            PRINT("header.channels = %u", header.channels);
            PRINT("header.sample_rate = %u", header.sample_rate);
            PRINT("header.bit_rate = %u", header.bit_rate);
            PRINT("header.block_align = %u", header.block_align);
            PRINT("header.bits_per_sample = %u", header.bits_per_sample);
        }
    
    private:
        int index_;
        FrameHeader header_;
        const unsigned char* data_;
        const int data_len_;
        int samples_per_320ms_;
    
    };
    ///////////////////////////////////
    //////////////WAV//////////////////
    ///////////////////////////////////
    int WAVParser::FrameInfo::Init(const FrameHeader& header) {
    
        return 0;
    }
    
    int WAVParser::GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info) {
        FrameHeader header;
        int pos = -1;
        const unsigned char* temp_data = data;
        int temp_len = len;
        frame_len = 0;
        
        pos = FindFrameHeader(temp_data, temp_len);
        if (pos < 0)
            return -1;
        
        int ret = ParseFrameHeader(&temp_data[pos], temp_len, header);    
        if (ret != 0)
            return -1;
    
        //TypeFrameHeader(header);
        return 0;
    }
    
    int WAVParser::FindFrameHeader(const unsigned char* data, int len) {
        // nothing to do
        const char* RIFF = "RIFF";
        int pos = 0;
        
        while (pos < len - 4) {
            if (memcmp(&data[pos], RIFF, 4) == 0) {
                PRINT("found header");
                return pos;
            }
            pos++;
        }
        return -1;
    }
    
    int WAVParser::ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header) {
        unsigned int chunk_size = 0;
        unsigned int temp;
        int pos = 0;
    
        if (memcmp(&data[pos], "RIFF", 4) != 0) {
            PRINT("not RIFF");
            return -1;
        }
    
        pos += 4;
        temp = data[pos];
        chunk_size += temp;
        temp = data[pos+1];
        temp = temp<<8;
        chunk_size += temp;
        temp = data[pos+2];
        temp = temp<<16;
        chunk_size += temp;
        temp = data[pos+3];
        temp = temp<<24;
        chunk_size += temp;
    
        pos += 4;
        if (memcmp(&data[pos], "WAVE", 4) != 0) {
            PRINT("not WAVE");
            return -1;
        }
        //PRINT("RIFF chunk_size = %u", chunk_size);
    
        pos += 4;
        if (memcmp(&data[pos], "fmt ", 4) != 0) {
            PRINT("not fmt");
            return -1;
        }
    
        pos += 4;
        temp = data[pos];
        chunk_size = 0;
        chunk_size += temp;
        temp = data[pos+1];
        temp = temp<<8;
        chunk_size += temp;
        temp = data[pos+2];
        temp = temp<<16;
        chunk_size += temp;
        temp = data[pos+3];
        temp = temp<<24;
        chunk_size += temp;
    
        //PRINT("fmt chunk_size = %u", chunk_size);
        pos += 4;
    
        temp = data[pos];
        header.format_tag = temp;
        pos++;
        temp = data[pos];
        temp = temp<<8;
        header.format_tag += temp;
        pos++;
        
        temp = data[pos];
        header.channels = temp;
        pos++;
        temp = data[pos];
        temp = temp<<8;
        header.channels += temp;
        pos++;
        
        temp = data[pos];
        header.sample_rate = temp;
        pos++;
        temp = data[pos];
        temp = temp<<8;
        header.sample_rate += temp;
        pos++;
        temp = data[pos];
        temp = temp<<16;
        header.sample_rate += temp;
        pos++;
        temp = data[pos];
        temp = temp<<24;
        header.sample_rate += temp;
        pos++;
        
        
        temp = data[pos];
        header.bit_rate = temp;
        pos++;
        temp = data[pos];
        temp = temp<<8;
        header.bit_rate += temp;
        pos++;
        temp = data[pos];
        temp = temp<<16;
        header.bit_rate += temp;
        pos++;
        temp = data[pos];
        temp = temp<<24;
        header.bit_rate += temp;
        pos++;
        
        temp = data[pos];
        header.block_align = temp;
        pos++;
        temp = data[pos];
        temp = temp<<8;
        header.block_align += temp;
        pos++;
        
        temp = data[pos];
        header.bits_per_sample = temp;
        pos++;
        temp = data[pos];
        temp = temp<<8;
        header.bits_per_sample += temp;
        pos++;
    
        // extra 2 bytes
        if (chunk_size == 18) {
            pos += 2;
        }
        
        // fact chunk optional
        if (memcmp(&data[pos], "fact", 4) == 0) {
            PRINT("fact");
            chunk_size = 0;
            pos += 12;
        }
    
    
        while (memcmp(&data[pos], "data", 4) != 0) {
            pos++;
        }
        pos += 4;
    
        chunk_size = 0;
        chunk_size += temp;
        temp = data[pos+1];
        temp = temp<<8;
        chunk_size += temp;
        temp = data[pos+2];
        temp = temp<<16;
        chunk_size += temp;
        temp = data[pos+3];
        temp = temp<<24;
        chunk_size += temp;
        pos += 4;
    
        PRINT("data chunk_size = %u", chunk_size);
        header.data_size = chunk_size;
        header.data_pos = pos;
        
    
    #if 0
        // calculate duration time
        {
            int sr = header.sample_rate;
            int ch = header.channels;
            assert(header.bits_per_sample == sizeof(short)*8);
            int data_size = header.data_size;
            int samples = data_size*8/header.bits_per_sample;
            assert(data_size%header.bits_per_sample == 0);
            int samples_per_channel = samples/ch;
            int seconds = samples_per_channel/sr;
            PRINT("play %d seconds", seconds);
        }
    #endif
    
        return 0;
    }
    
    WAVParser::WAVParser(const unsigned char* data, int len):data_(data), data_len_(len), index_(0) {
        int pos = -1;
        pos = FindFrameHeader(data_, data_len_);
        assert(pos >= 0);
    
        int ret = ParseFrameHeader(&data_[pos], data_len_, header_);
        assert(ret == 0);
        
        if (ret == 0) {
            samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
        }
    }
    
    WAVParser::~WAVParser() {
    }
    
    int WAVParser::Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate) {
        samples = 0;
        channels = 0;
    
        const unsigned char* temp = &data_[header_.data_pos];
    
        temp += index_;
        int size = samples_per_320ms_*header_.bits_per_sample/8;
        if (index_ + size > header_.data_size || size > len) {
            return -1;
        }
    
        memcpy(data, temp, size);
        index_ += size;
        samples = samples_per_320ms_;
        channels = header_.channels;
        samplerate = header_.sample_rate;
    
        return 0;
    }
    
    

    播放PCM的相关代码

    void wav_play(const unsigned char* data, int len) {
        int ret;
        int samples;
        int channels;
        int samplerate;
        PCMPlayer player(16000, 1);
        WAVParser parser(data, len);
        unsigned char* buffer = new unsigned char[1024*1024];
    
        while (1) {
            ret = parser.Get320msSample(buffer, 1024*1024, samples, channels, samplerate);
            if (ret != 0)
                break;
            player.play((short*)buffer, samples, samplerate, channels);
        }
    
        delete []buffer;
    }
    
    #ifndef _PLAY_PCM_H
    #define _PLAY_PCM_H
    #include <alsa/asoundlib.h>
    #include "util.h"
    class PCMPlayer {
    public:
        PCMPlayer(int sample_rate, int channels):
                channels_(channels),
                sample_rate_(sample_rate),
                init_(false) {
    
            snd_pcm_hw_params_t *params;
            int rc = snd_pcm_open(&handle_, "default",SND_PCM_STREAM_PLAYBACK, 0);
            if (rc < 0) {
                fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
                exit(1);
            }
    
            /* Allocate a hardware parameters object. */
            snd_pcm_hw_params_alloca(&params);
            /* Fill it in with default values. */
            snd_pcm_hw_params_any(handle_, params);
            /* Set the desired hardware parameters. */
            /* Interleaved mode */
            snd_pcm_hw_params_set_access(handle_, params, SND_PCM_ACCESS_RW_INTERLEAVED);
    
            /* Signed 16-bit little-endian format */
            snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);
    
            /* Two channels (stereo) */
            snd_pcm_hw_params_set_channels(handle_, params, channels_);
    
            /* 44100 bits/second sampling rate (CD quality) */
            unsigned int val = sample_rate_;
            int dir;
            snd_pcm_hw_params_set_rate_near(handle_, params, &val, &dir); 
    
            /* Set period size to 32 frames. */
            snd_pcm_uframes_t frames = 0;
            snd_pcm_hw_params_set_period_size_near(handle_, params, &frames, &dir);
            
            /* Write the parameters to the driver */
            rc = snd_pcm_hw_params(handle_, params);
            if (rc < 0) {
                fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
                exit(1);
            }
    
            /* Use a buffer large enough to hold one period */
            snd_pcm_hw_params_get_period_size(params, &frames, &dir);
            frame_size_ = frames;
        }
    
        void play(short* sample, int samples, int sample_rate, int channels) {
            SetSamplerateAndChannel(sample_rate, channels);
    
            int pos = 0;
            int size = frame_size_*channels_;
            while (pos < samples) {
                int rc = snd_pcm_writei(handle_, &sample[pos], frame_size_);
                if (rc == -EPIPE) {
                    /* EPIPE means underrun */
                    fprintf(stderr, "underrun occurred\n");
                    snd_pcm_prepare(handle_);
                } else if (rc < 0) {
                    fprintf(stderr, "error from writei: %s\n", snd_strerror(rc));
                }  else if (rc != (int)frame_size_) {
                    fprintf(stderr, "short write, write %d frames\n", rc);
                }
                pos += size;
            }
        }
    
        ~PCMPlayer() {
            snd_pcm_drain(handle_);
            snd_pcm_close(handle_);
        }
        
    protected:
        void SetSamplerateAndChannel(int sample_rate, int channels) {
            if (sample_rate_ != sample_rate || channels_ != channels) {
                PRINT("SetSamplerateAndChannel: sample rate = %d  chs = %d", sample_rate, channels);
                sample_rate_ = sample_rate;
                channels_ = channels;
    
                /* Allocate a hardware parameters object. */
                snd_pcm_hw_params_alloca(&params_);
                /* Fill it in with default values. */
                snd_pcm_hw_params_any(handle_, params_);
                /* Set the desired hardware parameters. */
                /* Interleaved mode */
                snd_pcm_hw_params_set_access(handle_, params_, SND_PCM_ACCESS_RW_INTERLEAVED);
    
                /* Signed 16-bit little-endian format */
                snd_pcm_hw_params_set_format(handle_, params_, SND_PCM_FORMAT_S16_LE);
    
                int dir;
                unsigned int val = sample_rate_;
                snd_pcm_hw_params_set_rate_near(handle_, params_, &val, &dir);
    
                /* Two channels (stereo) */
                snd_pcm_hw_params_set_channels(handle_, params_, channels_);
    
                /* Set period size to 32 frames. */
                snd_pcm_uframes_t frames = 0;
                snd_pcm_hw_params_set_period_size_near(handle_, params_, &frames, &dir);
    
                /* Write the parameters to the driver */
                int rc = snd_pcm_hw_params(handle_, params_);
                if (rc < 0) {
                    fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
                    exit(1);
                }
    
                /* Use a buffer large enough to hold one period */
                snd_pcm_hw_params_get_period_size(params_, &frames, &dir);
                frame_size_ = frames;
    
                PRINT("SetSamplerateAndChannel--<");
                sleep(1);
            }
        }
    
    private:
        int channels_;
        int sample_rate_;
        int frame_size_;
        bool init_;
        snd_pcm_t *handle_;
        snd_pcm_hw_params_t *params_;
    };
    
    #endif
    

    相关文章

      网友评论

          本文标题:WAV格式与PCM简介

          本文链接:https://www.haomeiwen.com/subject/zpydhktx.html