美文网首页
pydub及其示例

pydub及其示例

作者: KyoDante | 来源:发表于2021-03-04 10:37 被阅读0次

    该库是比较好的音频处理库,适用于音频切分等功能。


    Windows下安装:

    pip install pydub
    

    如果使用非wav格式的文件,通过ffmpeg.org安装ffmpeg

    然后,按以下步骤添加libav:

    1. Download and extract libav from Windows binaries provided here
    2. Add the libav /bin folder to your PATH envvar

    翻译:

    1. 下载libav,因为音频处理底层需要,从上面选一个和系统位数一样的压缩包下载(x64后缀的即可)。
    2. 把libav下面的/bin路径添加到环境系统变量里面的PATH。

    完成后,运行以下的导入,不会有error出现,如果没有添加libav,会提示缺少文件之类的错误。

    import pydub
    

    注意:切分的时候,单位是毫秒(ms)
    具体使用请参考网上的教程或者官方文档。
    以下仅提供一些实例,核心类为AudioSegment:


    # Import PyDub main class 
    from pydub import AudioSegment
    # Import an audio file
    wav_file = AudioSegment.from_file(file="wav_file.wav", format="wav")
    # Format parameter only for readability
    wav_file = AudioSegment.from_file(file="wav_file.wav")
    type(wav_file)
    

    输出:pydub.audio_segment.AudioSegment

    # Install simpleaudio for wav playback
    # 需要安装simpleaudio,为了播放。
    $pip install simpleaudio
    # Import play function
    from pydub.playback import play
    # Import audio file
    wav_file = AudioSegment.from_file(file="wav_file.wav")
    # Play audio file
    play(wav_file)
    

    音频的一些属性:比如采样率,通道数,数据位宽,最大振幅,时长等。

    # Import audio files
    wav_file = AudioSegment.from_file(file="wav_file.wav")
    two_speakers = AudioSegment.from_file(file="two_speakers.wav")
    # Check number of channels
    wav_file.channels, two_speakers.channels
    # 输出:1, 2
    wav_file.frame_rate
    # 输出:48000
    # Find the number of bytes per sample
    wav_file.sample_width
    # 输出:2
    # Find the max amplitude
    wav_file.max
    # 输出:8488
    # Duration of audio file in milliseconds
    len(wav_file)
    # 输出:3284
    

    改变一些属性:

    # Change ATTRIBUTENAME of AudioSegment to x,其中ATTRIBUTENAME根据实际情况,包括channels等。
    changeed_audio_segment = audio_segment.set_ATTRIBUTENAME(x)
    # Change sample width to 1
    wav_file_width_1 = wav_file.sample_width(1)
    wav_file_width_1.sample_width
    # 输出:1
    
    # Change sample rate
    wav_file_16k = wav_file.frame_rate(16000)
    wav_file_16k.frame_rate16000
    # Change number of channels
    wav_file_1_channel = wav_file.set_channels(1)
    wav_file_1_channel.channels
    # 输出:1
    
    

    操作音频文件(加减音量、标准化、):

    # Import audio file
    wav_file = AudioSegment.from_file("wav_file.wav")
    # Minus 60 dB
    quiet_wav_file = wav_file - 60
    # Try to recognize quiet audio
    recognizer.recognize_google(quiet_wav_file)
    # 输出:UnknownValueError:
    
    # Increase the volume by 10 dB
    louder_wav_file = wav_file + 10
    # Try to recognize
    recognizer.recognize_google(louder_wav_file)
    # 输出:this is a wav file
    
    # Import AudioSegment and normalize
    from pydub import AudioSegment
    from pydub.effects import normalize
    from pydub.playback import play
    # Import uneven sound audio file
    loud_quiet = AudioSegment.from_file("loud_quiet.wav")
    # Normalize the sound levels
    normalized_loud_quiet = normalize(loud_quiet)
    # Check the sound
    play(normalized_loud_quiet)
    
    # 去掉前面5秒的内容。
    # Import audio with static at start
    static_at_start = AudioSegment.from_file("static_at_start.wav")
    # Remove the static via slicing
    no_static_at_start = static_at_start[5000:]
    # Check the new sound
    play(no_static_at_start)
    
    # 把两个wav concat到一起。
    # Import two audio files
    wav_file_1 = AudioSegment.from_file("wav_file_1.wav")
    wav_file_2 = AudioSegment.from_file("wav_file_2.wav")
    # Combine the two audio files
    wav_file_3 = wav_file_1 + wav_file_2
    # Check the sound
    play(wav_file_3)
    # Combine two wav files and make the combination louder
    louder_wav_file_3 = wav_file_1 + wav_file_2 + 10
    
    # 把多声道转单声道
    # Import phone call audio
    phone_call = AudioSegment.from_file("phone_call.wav")
    # Find number of channels
    phone_call.channels
    #输出:2
    # Split stereo to mono
    phone_call_channels = phone_call.split_to_mono()
    phone_call_channels
    #输出:[<pydub.audio_segment.AudioSegment, <pydub.audio_segment.AudioSegment>]
    
    # Find number of channels of first list item
    phone_call_channels[0].channels
    #输出:1
    # Recognize the first channel
    recognizer.recognize_google(phone_call_channel_1)
    #输出:the pydub library is really useful
    

    转换并保存(其他格式转wav再保存)

    from pydub import AudioSegment
    # Import audio file
    wav_file = AudioSegment.from_file("wav_file.wav")
    # Increase by 10 decibels
    louder_wav_file = wav_file + 10
    # Export louder audio file
    louder_wav_file.export(out_f="louder_wav_file.wav", format="wav")
    # 输出:<_io.BufferedRandom name='louder_wav_file.wav'>
    
    
    def make_wav(wrong_folder_path, right_folder_path):
    # Loop through wrongly formatted files
      for file in os.scandir(wrong_folder_path):
      # Only work with files with audio extensions we're fixing
        if file.path.endswith(".mp3") or file.path.endswith(".flac"):
        # Create the new .wav filename
            out_file = right_folder_path + os.path.splitext(os.path.basename(file.path))[0] + ".wav"
            # Read in the audio file and export it in wav format
            AudioSegment.from_file(file.path).export(out_file, format="wav")
            print(f"Creating {out_file}")
    
    # Call our new function
    make_wav("data/wrong_formats/", "data/right_format/")
    # 输出:
    # Creating data/right_types/wav_file.wav
    # Creating data/right_types/flac_file.wav
    # Creating data/right_types/mp3_file.wav
    
    def make_no_static_louder(static_quiet, louder_no_static):
    # Loop through files with static and quiet (already in wav format)
      for file in os.scandir(static_quiet_folder_path):
      # Create new file path
            out_file = louder_no_static + os.path.splitext(os.path.basename(file.path))[0] + ".wav"
            # Read the audio file
            audio_file = AudioSegment.from_file(file.path)
            # Remove first three seconds and add 10 decibels and export
            audio_file = (audio_file[3100:] + 10).export(out_file, format="wav")
            print(f"Creating {out_file}")
    
    # Remove static and make louder
    make_no_static_louder("data/static_quiet/", "data/louder_no_static/")
    # 输出:
    # Creating data/louder_no_static/speech-recognition-services.wav
    # Creating data/louder_no_static/order-issue.wav
    # Creating data/louder_no_static/help-with-acount.wav
    

    相关文章

      网友评论

          本文标题:pydub及其示例

          本文链接:https://www.haomeiwen.com/subject/pxbcqltx.html