WAV格式与PCM简介

作者: 沐文乘光 | 来源:发表于2020-07-15 15:11 被阅读0次

最近的工作涉及到语音识别相关的研究，因此先简单的梳理一下WAV格式和PCM。以前用c++实现了mp3 player，这个时候再来回顾下代码实现，将WAV的播放看了下。

什么是PCM

直接上百度百科的定义：PCM（Pulse Code Modulation）脉冲编码调制是数字通信的编码方式之一。主要过程是将话音、图像等模拟信号每隔一定时间进行取样，使其离散化，同时将抽样值按分层单位四舍五入取整量化，同时将抽样值按一组二进制码来表示抽样脉冲的幅值。

简单的说，PCM就是直接对原始的模拟声波信号进行采样后得到的数据

什么是采样率 sample rate

采样率是指一秒钟从脉冲信号中采样了多少个 sample
举例： sample rate: 16000 HZ ，意思是一秒钟从脉冲信号中采样16000个点(sample)

什么是比特率 bits per second

从字面定义，即：一秒钟的声音信号占有的比特数。
简言之：比特率表征采样的数据的信息逼真程度，比特率越高，数据的逼真程度越高。

使用 alsa sound 进行 pcm 数据的播放

关于alsa sound

从网上找了alsa sound的解释如下：ALSA(Advanced Linux Sound Architecture)是linux上主流的音频结构，在没有出现ALSA架构之前，一直使用的是OSS(Open Sound System)音频架构。
简单的说，alsa sound是linux下的音频框架，使用alsa sound的接口，即可以播放pcm数据。这里，正好可以用这个接口，来体验pcm的声音效果，以便有个真切的将pcm传到电脑喇叭的声音体验

使用 alsa soun 播放pcm

计算320ms的pcm数据长度，因为我的代码是每次播放320ms长度的sample数据，所以先计算出320ms的采样数据长度。
直接给出公式如下：

简单的说下：
sample_rate 标识1秒的采样的sample数，单声道的采样数=采样率，双声道的采样数=采样率*channel数。 320ms的采样数 = 1秒的采样数*320/1000 = 采样率*声道数*32/100
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;

wav格式

以.wav为后缀的文件就是wav格式的文件，先上百度百科的定义：是微软公司专门为Windows开发的一种标准数字音频文件，该文件能记录各种单声道或立体声的声音信息，并能保证声音不失真。
简单的说:WAV文件就是在PCM数据前加了个PCM的信息说明头，仅此而已
这个头部数据占有字节内容如下：

RIFF 4个字节
UNKNOWN 4个字节，
WAVE 4 bytes
fmt 4bytes
UNKNOWN 4bytes
采样率、声道数、比特率、一个sample占的bit数 16bytes
extra 2bytes //根据chunk_size == 18才有
fact 4bytes
data 4bytes
UNKNOWN 4bytes
---------------
到这一步，就计算出接下来PCM数据的长度，接下来的位置就是PCM数据了

最后附播放alsa sound 播放pcm的源码

class WAVParser {
public:
typedef struct {
    int format_tag;
    int channels;
    unsigned int sample_rate;
    unsigned int bit_rate;
    int block_align;
    int bits_per_sample;
    int data_pos;
    int data_size;
}FrameHeader;

typedef struct {

    int Init(const FrameHeader& header);
}FrameInfo;

    static int GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info);

    WAVParser(const unsigned char* data, int len);
    ~WAVParser();

    int Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate);

protected:
    static int FindFrameHeader(const unsigned char* data, int len);

    static int ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header);
    
    static void TypeFrameHeader(const FrameHeader& header) {
        PRINT("=============================================");
        PRINT("header.format_tag = %u", header.format_tag);
        PRINT("header.channels = %u", header.channels);
        PRINT("header.sample_rate = %u", header.sample_rate);
        PRINT("header.bit_rate = %u", header.bit_rate);
        PRINT("header.block_align = %u", header.block_align);
        PRINT("header.bits_per_sample = %u", header.bits_per_sample);
    }

private:
    int index_;
    FrameHeader header_;
    const unsigned char* data_;
    const int data_len_;
    int samples_per_320ms_;

};
///////////////////////////////////
//////////////WAV//////////////////
///////////////////////////////////
int WAVParser::FrameInfo::Init(const FrameHeader& header) {

    return 0;
}

int WAVParser::GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info) {
    FrameHeader header;
    int pos = -1;
    const unsigned char* temp_data = data;
    int temp_len = len;
    frame_len = 0;
    
    pos = FindFrameHeader(temp_data, temp_len);
    if (pos < 0)
        return -1;
    
    int ret = ParseFrameHeader(&temp_data[pos], temp_len, header);    
    if (ret != 0)
        return -1;

    //TypeFrameHeader(header);
    return 0;
}

int WAVParser::FindFrameHeader(const unsigned char* data, int len) {
    // nothing to do
    const char* RIFF = "RIFF";
    int pos = 0;
    
    while (pos < len - 4) {
        if (memcmp(&data[pos], RIFF, 4) == 0) {
            PRINT("found header");
            return pos;
        }
        pos++;
    }
    return -1;
}

int WAVParser::ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header) {
    unsigned int chunk_size = 0;
    unsigned int temp;
    int pos = 0;

    if (memcmp(&data[pos], "RIFF", 4) != 0) {
        PRINT("not RIFF");
        return -1;
    }

    pos += 4;
    temp = data[pos];
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;

    pos += 4;
    if (memcmp(&data[pos], "WAVE", 4) != 0) {
        PRINT("not WAVE");
        return -1;
    }
    //PRINT("RIFF chunk_size = %u", chunk_size);

    pos += 4;
    if (memcmp(&data[pos], "fmt ", 4) != 0) {
        PRINT("not fmt");
        return -1;
    }

    pos += 4;
    temp = data[pos];
    chunk_size = 0;
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;

    //PRINT("fmt chunk_size = %u", chunk_size);
    pos += 4;

    temp = data[pos];
    header.format_tag = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.format_tag += temp;
    pos++;
    
    temp = data[pos];
    header.channels = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.channels += temp;
    pos++;
    
    temp = data[pos];
    header.sample_rate = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.sample_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<16;
    header.sample_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<24;
    header.sample_rate += temp;
    pos++;
    
    
    temp = data[pos];
    header.bit_rate = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.bit_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<16;
    header.bit_rate += temp;
    pos++;
    temp = data[pos];
    temp = temp<<24;
    header.bit_rate += temp;
    pos++;
    
    temp = data[pos];
    header.block_align = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.block_align += temp;
    pos++;
    
    temp = data[pos];
    header.bits_per_sample = temp;
    pos++;
    temp = data[pos];
    temp = temp<<8;
    header.bits_per_sample += temp;
    pos++;

    // extra 2 bytes
    if (chunk_size == 18) {
        pos += 2;
    }
    
    // fact chunk optional
    if (memcmp(&data[pos], "fact", 4) == 0) {
        PRINT("fact");
        chunk_size = 0;
        pos += 12;
    }


    while (memcmp(&data[pos], "data", 4) != 0) {
        pos++;
    }
    pos += 4;

    chunk_size = 0;
    chunk_size += temp;
    temp = data[pos+1];
    temp = temp<<8;
    chunk_size += temp;
    temp = data[pos+2];
    temp = temp<<16;
    chunk_size += temp;
    temp = data[pos+3];
    temp = temp<<24;
    chunk_size += temp;
    pos += 4;

    PRINT("data chunk_size = %u", chunk_size);
    header.data_size = chunk_size;
    header.data_pos = pos;
    

#if 0
    // calculate duration time
    {
        int sr = header.sample_rate;
        int ch = header.channels;
        assert(header.bits_per_sample == sizeof(short)*8);
        int data_size = header.data_size;
        int samples = data_size*8/header.bits_per_sample;
        assert(data_size%header.bits_per_sample == 0);
        int samples_per_channel = samples/ch;
        int seconds = samples_per_channel/sr;
        PRINT("play %d seconds", seconds);
    }
#endif

    return 0;
}

WAVParser::WAVParser(const unsigned char* data, int len):data_(data), data_len_(len), index_(0) {
    int pos = -1;
    pos = FindFrameHeader(data_, data_len_);
    assert(pos >= 0);

    int ret = ParseFrameHeader(&data_[pos], data_len_, header_);
    assert(ret == 0);
    
    if (ret == 0) {
        samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
    }
}

WAVParser::~WAVParser() {
}

int WAVParser::Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate) {
    samples = 0;
    channels = 0;

    const unsigned char* temp = &data_[header_.data_pos];

    temp += index_;
    int size = samples_per_320ms_*header_.bits_per_sample/8;
    if (index_ + size > header_.data_size || size > len) {
        return -1;
    }

    memcpy(data, temp, size);
    index_ += size;
    samples = samples_per_320ms_;
    channels = header_.channels;
    samplerate = header_.sample_rate;

    return 0;
}

播放PCM的相关代码

void wav_play(const unsigned char* data, int len) {
    int ret;
    int samples;
    int channels;
    int samplerate;
    PCMPlayer player(16000, 1);
    WAVParser parser(data, len);
    unsigned char* buffer = new unsigned char[1024*1024];

    while (1) {
        ret = parser.Get320msSample(buffer, 1024*1024, samples, channels, samplerate);
        if (ret != 0)
            break;
        player.play((short*)buffer, samples, samplerate, channels);
    }

    delete []buffer;
}

#ifndef _PLAY_PCM_H
#define _PLAY_PCM_H
#include <alsa/asoundlib.h>
#include "util.h"
class PCMPlayer {
public:
    PCMPlayer(int sample_rate, int channels):
            channels_(channels),
            sample_rate_(sample_rate),
            init_(false) {

        snd_pcm_hw_params_t *params;
        int rc = snd_pcm_open(&handle_, "default",SND_PCM_STREAM_PLAYBACK, 0);
        if (rc < 0) {
            fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
            exit(1);
        }

        /* Allocate a hardware parameters object. */
        snd_pcm_hw_params_alloca(&params);
        /* Fill it in with default values. */
        snd_pcm_hw_params_any(handle_, params);
        /* Set the desired hardware parameters. */
        /* Interleaved mode */
        snd_pcm_hw_params_set_access(handle_, params, SND_PCM_ACCESS_RW_INTERLEAVED);

        /* Signed 16-bit little-endian format */
        snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);

        /* Two channels (stereo) */
        snd_pcm_hw_params_set_channels(handle_, params, channels_);

        /* 44100 bits/second sampling rate (CD quality) */
        unsigned int val = sample_rate_;
        int dir;
        snd_pcm_hw_params_set_rate_near(handle_, params, &val, &dir); 

        /* Set period size to 32 frames. */
        snd_pcm_uframes_t frames = 0;
        snd_pcm_hw_params_set_period_size_near(handle_, params, &frames, &dir);
        
        /* Write the parameters to the driver */
        rc = snd_pcm_hw_params(handle_, params);
        if (rc < 0) {
            fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
            exit(1);
        }

        /* Use a buffer large enough to hold one period */
        snd_pcm_hw_params_get_period_size(params, &frames, &dir);
        frame_size_ = frames;
    }

    void play(short* sample, int samples, int sample_rate, int channels) {
        SetSamplerateAndChannel(sample_rate, channels);

        int pos = 0;
        int size = frame_size_*channels_;
        while (pos < samples) {
            int rc = snd_pcm_writei(handle_, &sample[pos], frame_size_);
            if (rc == -EPIPE) {
                /* EPIPE means underrun */
                fprintf(stderr, "underrun occurred\n");
                snd_pcm_prepare(handle_);
            } else if (rc < 0) {
                fprintf(stderr, "error from writei: %s\n", snd_strerror(rc));
            }  else if (rc != (int)frame_size_) {
                fprintf(stderr, "short write, write %d frames\n", rc);
            }
            pos += size;
        }
    }

    ~PCMPlayer() {
        snd_pcm_drain(handle_);
        snd_pcm_close(handle_);
    }
    
protected:
    void SetSamplerateAndChannel(int sample_rate, int channels) {
        if (sample_rate_ != sample_rate || channels_ != channels) {
            PRINT("SetSamplerateAndChannel: sample rate = %d  chs = %d", sample_rate, channels);
            sample_rate_ = sample_rate;
            channels_ = channels;

            /* Allocate a hardware parameters object. */
            snd_pcm_hw_params_alloca(&params_);
            /* Fill it in with default values. */
            snd_pcm_hw_params_any(handle_, params_);
            /* Set the desired hardware parameters. */
            /* Interleaved mode */
            snd_pcm_hw_params_set_access(handle_, params_, SND_PCM_ACCESS_RW_INTERLEAVED);

            /* Signed 16-bit little-endian format */
            snd_pcm_hw_params_set_format(handle_, params_, SND_PCM_FORMAT_S16_LE);

            int dir;
            unsigned int val = sample_rate_;
            snd_pcm_hw_params_set_rate_near(handle_, params_, &val, &dir);

            /* Two channels (stereo) */
            snd_pcm_hw_params_set_channels(handle_, params_, channels_);

            /* Set period size to 32 frames. */
            snd_pcm_uframes_t frames = 0;
            snd_pcm_hw_params_set_period_size_near(handle_, params_, &frames, &dir);

            /* Write the parameters to the driver */
            int rc = snd_pcm_hw_params(handle_, params_);
            if (rc < 0) {
                fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
                exit(1);
            }

            /* Use a buffer large enough to hold one period */
            snd_pcm_hw_params_get_period_size(params_, &frames, &dir);
            frame_size_ = frames;

            PRINT("SetSamplerateAndChannel--<");
            sleep(1);
        }
    }

private:
    int channels_;
    int sample_rate_;
    int frame_size_;
    bool init_;
    snd_pcm_t *handle_;
    snd_pcm_hw_params_t *params_;
};

#endif

WAV格式与PCM简介

什么是PCM

什么是采样率 sample rate

什么是比特率 bits per second

使用 alsa sound 进行 pcm 数据的播放

关于alsa sound

使用 alsa soun 播放pcm

wav格式

最后附播放alsa sound 播放pcm的源码

播放PCM的相关代码

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

WAV格式与PCM简介

什么是PCM

什么是 采样率 sample rate

什么是比特率 bits per second

使用 alsa sound 进行 pcm 数据的播放

关于alsa sound

使用 alsa soun 播放pcm

wav格式

最后附播放alsa sound 播放pcm的源码

播放PCM的相关代码

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

什么是采样率 sample rate