美文网首页
给语音打上标签

给语音打上标签

作者: yanghedada | 来源:发表于2018-09-08 10:07 被阅读23次
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 28 20:14:38 2018

@author: yanghe
"""

import numpy as np
import tensorflow as tf
import pygame  
from  voice_tool import *

pygame.mixer.init() 
track4=pygame.mixer.Sound("./raw_data/dev/2.wav")
track4.play()


X = np.load("./XY_train/X.npy")
Y = np.load("./XY_train/Y.npy")
Y = np.reshape(Y, (-1, 1375))
X_dev = np.load("./XY_dev/X_dev.npy")
Y_dev = np.load("./XY_dev/Y_dev.npy")
Y_dev = np.reshape(Y_dev, (-1, 1375))
def get_weight_l2(shape ,name, regulariztion_rate=None):
    weight = tf.get_variable('weight_%s'%str(name), shape=shape, initializer=tf.truncated_normal_initializer(stddev=0.1) )
    bias =  tf.get_variable('bias_%s'%str(name), shape=[shape[-1]], initializer=tf.constant_initializer(0.1) )
    if regulariztion_rate != None  :
        tf.add_to_collection('loss' ,tf.contrib.layers.l2_regularizer(regulariztion_rate)(weight))
    return bias , weight

def model(inputs):
    inputs = tf.reshape(inputs, (-1, 5511, 1, 101))
    with tf.variable_scope('conv1'):
        conv1_biases,conv1_weight = get_weight_l2(shape=[15, 1, 101, 128],name='layer1')
        conv1 = tf.nn.conv2d(inputs, conv1_weight, strides=[1,1,1,1], padding='SAME') 
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1 , conv1_biases))
        pool1 = tf.nn.max_pool(relu1, ksize=[1,15,1,1], strides=[1,4,1,1], padding='VALID')
        dorp1 = tf.nn.dropout(pool1,keep_prob )
        dorp1 = tf.reshape(dorp1, (-1, 1375, 128))
        dorp1 = tf.transpose(dorp1, [1, 0, 2])
    with tf.variable_scope('rnn'):
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(128, forget_bias=1.0)
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 2)
        outputs, _ = tf.nn.dynamic_rnn(cell, dorp1, dtype=tf.float32)
        outputs = tf.reshape(outputs, (-1, 1375, 128, 1))
    with tf.variable_scope('conv2'):
        conv2_biases,conv2_weight = get_weight_l2(shape=[1, 128, 1, 1],name='layer2')
        conv2 = tf.nn.conv2d(outputs, conv2_weight, strides=[1,1,1,1], padding='SAME') 
        sig1 = tf.nn.sigmoid(tf.nn.bias_add(conv2 , conv2_biases))
        sig1 = tf.reshape(sig1, (-1, 1375))
    return sig1

def train():
    global_step = tf.Variable(0, trainable=False)
    with tf.variable_scope("moving_average"):
        variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.variable_scope('voice') as scope:
        pred = model(input_data)
    with tf.variable_scope("train_step"):
        learning_rate = tf.train.exponential_decay(
                        learning_rate_base,
                        global_step,
                        1,
                        learing_rate_decay,
                        staircase=True)
    cross_entropy = tf.reduce_mean(-tf.reduce_sum( targets* tf.log(pred) ,reduction_indices=[1]))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy, global_step=global_step)
    
    with tf.control_dependencies([train_step , variables_averages_op]):
        train_op = tf.no_op(name='train')
    
    saver=tf.train.Saver()
    with tf.Session() as sess :
        tf.global_variables_initializer().run()
        for i in range(training_steps):
            _= sess.run(train_op , feed_dict={input_data:X , targets:Y,keep_prob:0.8})
            if i % 2 == 0 :
                loss = sess.run(cross_entropy , feed_dict={input_data:X_dev , targets:Y_dev,keep_prob:1.0})
                print("After %d training step(s), the model loss is %g " % (i, loss))
        saver.save(sess , 'saver/moedl_voce_3.ckpt')
            
def predict():
    filename  = "./raw_data/dev/2.wav"
    x = graph_spectrogram(filename)
    x  = x.swapaxes(0,1)
    x = np.expand_dims(x, axis=0)
    with tf.variable_scope('voice') as scope:
        scope.reuse_variables()
        prediction = model(input_data)
    saver=tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver.restore(sess,'saver/moedl_voce_1.ckpt')
        predict_= sess.run(prediction,feed_dict={input_data:x,keep_prob:1.0})
        print(predict_)
        chime_on_activate(filename, predict_, 0.5)
        track4=pygame.mixer.Sound("./chime_output.wav")
        track4.play()
        
keep_prob = tf.placeholder(tf.float32)
input_data = tf.placeholder(tf.float32, [None,  5511, 101])
targets = tf.placeholder(tf.float32,[None, 1375])
training_steps = 50 
learning_rate_base = 0.01
learing_rate_decay = 0.99
moving_average_decay = 0.99

train()
#predict()


filename  = "./raw_data/dev/2.wav"
x = graph_spectrogram(filename)
x  = x.swapaxes(0,1)
x = np.expand_dims(x, axis=0)
with tf.variable_scope('voice') as scope:
    scope.reuse_variables()
    prediction = model(input_data)
saver=tf.train.Saver()
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    saver.restore(sess , 'saver/moedl_voce_3.ckpt')
    predict_= sess.run(prediction,feed_dict={input_data:x,keep_prob:1.0})
    chime_on_activate(filename, predict_,  0.9995656)
    track4=pygame.mixer.Sound("./chime_output.wav")
    track4.play()

相关文章

  • 给语音打上标签

  • 给自己的抖音帐号,打上标签的方法

    给自己的抖音帐号打上标签,为什么要给自己的抖音帐号打上标签呢?因为一旦你的帐号打上了标签,抖音平台给你推荐的人流就...

  • 自信是最大的能量

    如果一定要因为贫富打标签,你愿意给自己打上富有还是贫穷的标签?如果一定要因为学识打标签,你愿意给自己打上才华横溢还...

  • 我亲爱的偏执狂

    01旅行的意义 我们经常喜欢在社交软件上给自己打上爱好旅行这样的标签,可是,我们为什么要给自己打上如此标签?有多少...

  • Git学习笔记-标签

    列出所有标签 Git可以给某次提交打上标签以示重要性,使用git tag列出所有标签。 使用-l选项查找标签。 创...

  • 别做无用功,精准营销首先需要精准的用户画像

    “精准营销”依托大数据的发展,分析用户的消费习惯,给用户的消费行为打上专属标签,根据标签内容画出用户画像,继而...

  • git中的标签管理与自定义配置

    标签管理 在使用git管理项目发布的时候可以给当前版本打上一个标签,将来在需要的时候可以通过标签将这个版本拿出来,...

  • 商品标签的建立及应用

    给每个商品打上合适的标签是做好商品管理的非常重要的基础工作 什么是标签?标签是一组经过浓缩精炼、带有特定含义的词语...

  • 打上失败的标签以后

    早上打开手机一看,日更宣布挑战失败!意料之中的事。每天日更的时间在晚上十点之后。昨天晚上跟孩子因为不早点睡的问题争...

  • 开店所担心的问题:二技能--标签

    A1:如何给自己的商品打上“支持7天无理由退货”的标签? 答:商家可在商品基本信息页里面选择无理由退货,该标签即可...

网友评论

      本文标题:给语音打上标签

      本文链接:https://www.haomeiwen.com/subject/xuskgftx.html