最近的工作涉及到语音识别相关的研究,因此先简单的梳理一下WAV格式和PCM。以前用c++实现了mp3 player,这个时候再来回顾下代码实现,将WAV的播放 看了下。
什么是PCM
直接上百度百科的定义:PCM(Pulse Code Modulation)脉冲编码调制是数字通信的编码方式之一。主要过程是将话音、图像等模拟信号每隔一定时间进行取样,使其离散化,同时将抽样值按分层单位四舍五入取整量化,同时将抽样值按一组二进制码来表示抽样脉冲的幅值。
简单的说,PCM就是直接对原始的模拟声波信号 进行 采样 后得到的 数据
什么是 采样率 sample rate
采样率 是 指 一秒钟 从脉冲 信号 中 采样了 多少个 sample
举例: sample rate: 16000 HZ ,意思是一秒钟 从 脉冲信号中 采样16000个点(sample)
什么是比特率 bits per second
从字面定义,即:一秒钟的声音信号 占有的比特数。
简言之:比特率 表征 采样的 数据 的 信息逼真程度,比特率越高,数据的逼真程度越高。
使用 alsa sound 进行 pcm 数据的播放
关于alsa sound
从网上找了alsa sound的解释如下:ALSA(Advanced Linux Sound Architecture)是linux上主流的音频结构,在没有出现ALSA架构之前,一直使用的是OSS(Open Sound System)音频架构。
简单的说,alsa sound是linux下的音频框架,使用alsa sound的 接口,即可以播放pcm数据。这里,正好可以用这个接口,来体验pcm的声音效果,以便有个 真切的 将pcm传到 电脑喇叭的 声音 体验
使用 alsa soun 播放pcm
- 计算320ms的pcm数据长度,因为我的代码是每次播放320ms长度的sample数据,所以先计算出320ms的采样数据长度。
直接给出公式如下:
简单的说下:
sample_rate 标识1秒的采样的sample数,单声道的采样数=采样率,双声道的采样数=采样率*channel数。 320ms的采样数 = 1秒的采样数*320/1000 = 采样率*声道数*32/100
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
wav格式
以.wav为后缀的文件就是wav格式的文件,先上百度百科的定义:是微软公司专门为Windows开发的一种标准数字音频文件,该文件能记录各种单声道或立体声的声音信息,并能保证声音不失真。
简单的说:WAV文件就是在PCM数据前加了个PCM的信息说明头,仅此而已
这个头部数据占有字节内容如下:
RIFF 4个字节
UNKNOWN 4个字节,
WAVE 4 bytes
fmt 4bytes
UNKNOWN 4bytes
采样率、声道数、比特率、一个sample占的bit数 16bytes
extra 2bytes //根据chunk_size == 18才有
fact 4bytes
data 4bytes
UNKNOWN 4bytes
---------------
到这一步,就计算出接下来PCM数据的长度,接下来的位置就是PCM数据了
最后附播放alsa sound 播放pcm的源码
class WAVParser {
public:
typedef struct {
int format_tag;
int channels;
unsigned int sample_rate;
unsigned int bit_rate;
int block_align;
int bits_per_sample;
int data_pos;
int data_size;
}FrameHeader;
typedef struct {
int Init(const FrameHeader& header);
}FrameInfo;
static int GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info);
WAVParser(const unsigned char* data, int len);
~WAVParser();
int Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate);
protected:
static int FindFrameHeader(const unsigned char* data, int len);
static int ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header);
static void TypeFrameHeader(const FrameHeader& header) {
PRINT("=============================================");
PRINT("header.format_tag = %u", header.format_tag);
PRINT("header.channels = %u", header.channels);
PRINT("header.sample_rate = %u", header.sample_rate);
PRINT("header.bit_rate = %u", header.bit_rate);
PRINT("header.block_align = %u", header.block_align);
PRINT("header.bits_per_sample = %u", header.bits_per_sample);
}
private:
int index_;
FrameHeader header_;
const unsigned char* data_;
const int data_len_;
int samples_per_320ms_;
};
///////////////////////////////////
//////////////WAV//////////////////
///////////////////////////////////
int WAVParser::FrameInfo::Init(const FrameHeader& header) {
return 0;
}
int WAVParser::GetFrame(const unsigned char* data, int len, int& frame_len, FrameInfo&info) {
FrameHeader header;
int pos = -1;
const unsigned char* temp_data = data;
int temp_len = len;
frame_len = 0;
pos = FindFrameHeader(temp_data, temp_len);
if (pos < 0)
return -1;
int ret = ParseFrameHeader(&temp_data[pos], temp_len, header);
if (ret != 0)
return -1;
//TypeFrameHeader(header);
return 0;
}
int WAVParser::FindFrameHeader(const unsigned char* data, int len) {
// nothing to do
const char* RIFF = "RIFF";
int pos = 0;
while (pos < len - 4) {
if (memcmp(&data[pos], RIFF, 4) == 0) {
PRINT("found header");
return pos;
}
pos++;
}
return -1;
}
int WAVParser::ParseFrameHeader(const unsigned char* data, int len, FrameHeader& header) {
unsigned int chunk_size = 0;
unsigned int temp;
int pos = 0;
if (memcmp(&data[pos], "RIFF", 4) != 0) {
PRINT("not RIFF");
return -1;
}
pos += 4;
temp = data[pos];
chunk_size += temp;
temp = data[pos+1];
temp = temp<<8;
chunk_size += temp;
temp = data[pos+2];
temp = temp<<16;
chunk_size += temp;
temp = data[pos+3];
temp = temp<<24;
chunk_size += temp;
pos += 4;
if (memcmp(&data[pos], "WAVE", 4) != 0) {
PRINT("not WAVE");
return -1;
}
//PRINT("RIFF chunk_size = %u", chunk_size);
pos += 4;
if (memcmp(&data[pos], "fmt ", 4) != 0) {
PRINT("not fmt");
return -1;
}
pos += 4;
temp = data[pos];
chunk_size = 0;
chunk_size += temp;
temp = data[pos+1];
temp = temp<<8;
chunk_size += temp;
temp = data[pos+2];
temp = temp<<16;
chunk_size += temp;
temp = data[pos+3];
temp = temp<<24;
chunk_size += temp;
//PRINT("fmt chunk_size = %u", chunk_size);
pos += 4;
temp = data[pos];
header.format_tag = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.format_tag += temp;
pos++;
temp = data[pos];
header.channels = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.channels += temp;
pos++;
temp = data[pos];
header.sample_rate = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.sample_rate += temp;
pos++;
temp = data[pos];
temp = temp<<16;
header.sample_rate += temp;
pos++;
temp = data[pos];
temp = temp<<24;
header.sample_rate += temp;
pos++;
temp = data[pos];
header.bit_rate = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.bit_rate += temp;
pos++;
temp = data[pos];
temp = temp<<16;
header.bit_rate += temp;
pos++;
temp = data[pos];
temp = temp<<24;
header.bit_rate += temp;
pos++;
temp = data[pos];
header.block_align = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.block_align += temp;
pos++;
temp = data[pos];
header.bits_per_sample = temp;
pos++;
temp = data[pos];
temp = temp<<8;
header.bits_per_sample += temp;
pos++;
// extra 2 bytes
if (chunk_size == 18) {
pos += 2;
}
// fact chunk optional
if (memcmp(&data[pos], "fact", 4) == 0) {
PRINT("fact");
chunk_size = 0;
pos += 12;
}
while (memcmp(&data[pos], "data", 4) != 0) {
pos++;
}
pos += 4;
chunk_size = 0;
chunk_size += temp;
temp = data[pos+1];
temp = temp<<8;
chunk_size += temp;
temp = data[pos+2];
temp = temp<<16;
chunk_size += temp;
temp = data[pos+3];
temp = temp<<24;
chunk_size += temp;
pos += 4;
PRINT("data chunk_size = %u", chunk_size);
header.data_size = chunk_size;
header.data_pos = pos;
#if 0
// calculate duration time
{
int sr = header.sample_rate;
int ch = header.channels;
assert(header.bits_per_sample == sizeof(short)*8);
int data_size = header.data_size;
int samples = data_size*8/header.bits_per_sample;
assert(data_size%header.bits_per_sample == 0);
int samples_per_channel = samples/ch;
int seconds = samples_per_channel/sr;
PRINT("play %d seconds", seconds);
}
#endif
return 0;
}
WAVParser::WAVParser(const unsigned char* data, int len):data_(data), data_len_(len), index_(0) {
int pos = -1;
pos = FindFrameHeader(data_, data_len_);
assert(pos >= 0);
int ret = ParseFrameHeader(&data_[pos], data_len_, header_);
assert(ret == 0);
if (ret == 0) {
samples_per_320ms_ = header_.sample_rate*header_.channels*32/100;
}
}
WAVParser::~WAVParser() {
}
int WAVParser::Get320msSample(const unsigned char* data, int len, int& samples, int& channels, int&samplerate) {
samples = 0;
channels = 0;
const unsigned char* temp = &data_[header_.data_pos];
temp += index_;
int size = samples_per_320ms_*header_.bits_per_sample/8;
if (index_ + size > header_.data_size || size > len) {
return -1;
}
memcpy(data, temp, size);
index_ += size;
samples = samples_per_320ms_;
channels = header_.channels;
samplerate = header_.sample_rate;
return 0;
}
播放PCM的相关代码
void wav_play(const unsigned char* data, int len) {
int ret;
int samples;
int channels;
int samplerate;
PCMPlayer player(16000, 1);
WAVParser parser(data, len);
unsigned char* buffer = new unsigned char[1024*1024];
while (1) {
ret = parser.Get320msSample(buffer, 1024*1024, samples, channels, samplerate);
if (ret != 0)
break;
player.play((short*)buffer, samples, samplerate, channels);
}
delete []buffer;
}
#ifndef _PLAY_PCM_H
#define _PLAY_PCM_H
#include <alsa/asoundlib.h>
#include "util.h"
class PCMPlayer {
public:
PCMPlayer(int sample_rate, int channels):
channels_(channels),
sample_rate_(sample_rate),
init_(false) {
snd_pcm_hw_params_t *params;
int rc = snd_pcm_open(&handle_, "default",SND_PCM_STREAM_PLAYBACK, 0);
if (rc < 0) {
fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(¶ms);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle_, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle_, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle_, params, SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle_, params, channels_);
/* 44100 bits/second sampling rate (CD quality) */
unsigned int val = sample_rate_;
int dir;
snd_pcm_hw_params_set_rate_near(handle_, params, &val, &dir);
/* Set period size to 32 frames. */
snd_pcm_uframes_t frames = 0;
snd_pcm_hw_params_set_period_size_near(handle_, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle_, params);
if (rc < 0) {
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
frame_size_ = frames;
}
void play(short* sample, int samples, int sample_rate, int channels) {
SetSamplerateAndChannel(sample_rate, channels);
int pos = 0;
int size = frame_size_*channels_;
while (pos < samples) {
int rc = snd_pcm_writei(handle_, &sample[pos], frame_size_);
if (rc == -EPIPE) {
/* EPIPE means underrun */
fprintf(stderr, "underrun occurred\n");
snd_pcm_prepare(handle_);
} else if (rc < 0) {
fprintf(stderr, "error from writei: %s\n", snd_strerror(rc));
} else if (rc != (int)frame_size_) {
fprintf(stderr, "short write, write %d frames\n", rc);
}
pos += size;
}
}
~PCMPlayer() {
snd_pcm_drain(handle_);
snd_pcm_close(handle_);
}
protected:
void SetSamplerateAndChannel(int sample_rate, int channels) {
if (sample_rate_ != sample_rate || channels_ != channels) {
PRINT("SetSamplerateAndChannel: sample rate = %d chs = %d", sample_rate, channels);
sample_rate_ = sample_rate;
channels_ = channels;
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(¶ms_);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle_, params_);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle_, params_, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle_, params_, SND_PCM_FORMAT_S16_LE);
int dir;
unsigned int val = sample_rate_;
snd_pcm_hw_params_set_rate_near(handle_, params_, &val, &dir);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle_, params_, channels_);
/* Set period size to 32 frames. */
snd_pcm_uframes_t frames = 0;
snd_pcm_hw_params_set_period_size_near(handle_, params_, &frames, &dir);
/* Write the parameters to the driver */
int rc = snd_pcm_hw_params(handle_, params_);
if (rc < 0) {
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params_, &frames, &dir);
frame_size_ = frames;
PRINT("SetSamplerateAndChannel--<");
sleep(1);
}
}
private:
int channels_;
int sample_rate_;
int frame_size_;
bool init_;
snd_pcm_t *handle_;
snd_pcm_hw_params_t *params_;
};
#endif
网友评论