    数据+代码 TensorFlow实现


    import tensorflow as tf
    import numpy as np
    from sklearn.model_selection import train_test_split
    import time
    import matplotlib.pyplot as plt
    import pickle
    X, Y, en_word2idx, en_idx2word, en_vocab, de_word2idx, de_idx2word, de_vocab = pickle.load(open("data.pkl", 'rb'), encoding='utf-8')
    print('Sentence in English - encoded:', X[0])
    print('Sentence in German - encoded:', Y[0])
    print('英语句子:',end=' ')
    for i in range(len(X[1])):
        print(en_idx2word[X[1][i]],end=' ')
    print('\n德语句子:',end=' ')
    for i in range(len(Y[1])):
        print(de_idx2word[Y[1][i]],end=' ')
    def data_padding(x, y, length = 15):
        for i in range(len(x)):
            x[i] = x[i] + (length - len(x[i])) * [en_word2idx['<pad>']]
            y[i] = [de_word2idx['<go>']] + y[i] + [de_word2idx['<eos>']] + (length-len(y[i])) * [de_word2idx['<pad>']]
    data_padding(X, Y)
    X_train,  X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1)
    del X
    del Y
    input_seq_len = 15
    output_seq_len = 17
    en_vocab_size = len(en_vocab) + 2 # + <pad>, <ukn>
    de_vocab_size = len(de_vocab) + 4 # + <pad>, <ukn>, <eos>, <go>
    # 占位符,len(encoder_inputs)=15,len(decoder_inputs)=17,len(targets)=15,len(target_weights)=17
    encoder_inputs = [tf.placeholder(dtype = tf.int32, shape = [None], name = 'encoder{}'.format(i)) for i in range(input_seq_len)]
    decoder_inputs = [tf.placeholder(dtype = tf.int32, shape = [None], name = 'decoder{}'.format(i)) for i in range(output_seq_len)]
    targets = [decoder_inputs[i+1] for i in range(output_seq_len-1)]
    targets.append(tf.placeholder(dtype = tf.int32, shape = [None], name = 'last_target'))
    target_weights = [tf.placeholder(dtype = tf.float32, shape = [None], name = 'target_w{}'.format(i)) for i in range(output_seq_len)]
    size = 512  #德语的词向量维度
    w_t = tf.get_variable('proj_w', [de_vocab_size, size], tf.float32)  #德语词向量矩阵变量
    b = tf.get_variable('proj_b', [de_vocab_size], tf.float32) #德语词向量偏量
    w = tf.transpose(w_t)
    output_projection = (w, b)
    outputs, states = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
                                                num_encoder_symbols = en_vocab_size,
                                                num_decoder_symbols = de_vocab_size,
                                                embedding_size = 100,
                                                feed_previous = False,
                                                output_projection = output_projection,
                                                dtype = tf.float32)
    # sampled softmax loss - returns: A batch_size 1-D tensor of per-example sampled softmax losses
    def sampled_loss(labels, logits):
        return tf.nn.sampled_softmax_loss(
                            weights = w_t,
                            biases = b,
                            labels = tf.reshape(labels, [-1, 1]),
                            inputs = logits,
                            num_sampled = 512,
                            num_classes = de_vocab_size)
    # 预测序列与目标序列的log交叉熵损失函数
    loss = tf.contrib.legacy_seq2seq.sequence_loss(outputs, targets, target_weights, softmax_loss_function = sampled_loss)
    # 自定义的softmax函数
    def softmax(x):
        n = np.max(x)
        e_x = np.exp(x - n)
        return e_x / e_x.sum()
    # 自定义占位符feed函数
    def feed_dict(x, y, batch_size = 64):
        feed = {}
        idxes = np.random.choice(len(x), size = batch_size, replace = False)
        for i in range(input_seq_len):
            feed[encoder_inputs[i].name] = np.array([x[j][i] for j in idxes], dtype = np.int32)
        for i in range(output_seq_len):
            feed[decoder_inputs[i].name] = np.array([y[j][i] for j in idxes], dtype = np.int32)
        feed[targets[len(targets)-1].name] = np.full(shape = [batch_size], fill_value = de_word2idx['<pad>'], dtype = np.int32)
        for i in range(output_seq_len-1):
            batch_weights = np.ones(batch_size, dtype = np.float32)
            target = feed[decoder_inputs[i+1].name]
            for j in range(batch_size):
                if target[j] == de_word2idx['<pad>']:
                    batch_weights[j] = 0.0
            feed[target_weights[i].name] = batch_weights
        feed[target_weights[output_seq_len-1].name] = np.zeros(batch_size, dtype = np.float32)
        return feed
    # 自定义编码器输出序列decode output函数
    def decode_output(output_seq):
        words = []
        for i in range(output_seq_len):
            smax = softmax(output_seq[i])
            idx = np.argmax(smax)
        return words
    # ops and hyperparameters
    learning_rate = 5e-3
    batch_size = 64
    steps = 10 ###注:此处原始值为1000,设置10是为了快速检验模型的可运行性
    # ops for projecting outputs
    outputs_proj = [tf.matmul(outputs[i], output_projection[0]) + output_projection[1] for i in range(output_seq_len)]
    # training op
    optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
    # init op
    init = tf.global_variables_initializer()
    # forward step
    def forward_step(sess, feed):
        output_sequences = sess.run(outputs_proj, feed_dict = feed)
        return output_sequences
    # training step
    def backward_step(sess, feed):
        sess.run(optimizer, feed_dict = feed)
    losses = []
    saver = tf.train.Saver() #模型保存
    with tf.Session() as sess:
        t = time.time()
        for step in range(steps):
            feed = feed_dict(X_train, Y_train)
            backward_step(sess, feed)
            if step % 5 == 4 or step == 0:
                loss_value = sess.run(loss, feed_dict = feed)
                print('step: {}, loss: {}'.format(step, loss_value))
            if step % 20 == 19:
                saver.save(sess, 'checkpoints/', global_step=step)
                print('Checkpoint is saved')
        print('Training time for {} steps: {}s'.format(steps, time.time() - t))
    with plt.style.context('fivethirtyeight'):
        plt.plot(losses, linewidth = 1)
        plt.ylim((0, 12))
    with tf.Graph().as_default():
        # placeholders
        encoder_inputs = [tf.placeholder(dtype = tf.int32, shape = [None], name = 'encoder{}'.format(i)) for i in range(input_seq_len)]
        decoder_inputs = [tf.placeholder(dtype = tf.int32, shape = [None], name = 'decoder{}'.format(i)) for i in range(output_seq_len)]
        # output projection
        size = 512
        w_t = tf.get_variable('proj_w', [de_vocab_size, size], tf.float32)
        b = tf.get_variable('proj_b', [de_vocab_size], tf.float32)
        w = tf.transpose(w_t)
        output_projection = (w, b)
        # change the model so that output at time t can be fed as input at time t+1
        outputs, states = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
                                                    num_encoder_symbols = en_vocab_size,
                                                    num_decoder_symbols = de_vocab_size,
                                                    embedding_size = 100,
                                                    feed_previous = True, # <-----this is changed----->
                                                    output_projection = output_projection,
                                                    dtype = tf.float32)
        # ops for projecting outputs
        outputs_proj = [tf.matmul(outputs[i], output_projection[0]) + output_projection[1] for i in range(output_seq_len)]
        # let's translate these sentences     
        en_sentences = ["What' s your name", 'My name is', 'What are you doing', 'I am reading a book',\
                        'How are you', 'I am good', 'Do you speak English', 'What time is it', 'Hi', 'Goodbye', 'Yes', 'No']
        en_sentences_encoded = [[en_word2idx.get(word, 0) for word in en_sentence.split()] for en_sentence in en_sentences]
        # padding to fit encoder input
        for i in range(len(en_sentences_encoded)):
            en_sentences_encoded[i] += (15 - len(en_sentences_encoded[i])) * [en_word2idx['<pad>']]
        # restore all variables - use the last checkpoint saved
        saver = tf.train.Saver()
        path = tf.train.latest_checkpoint('checkpoints')
        with tf.Session() as sess:
            # restore
            saver.restore(sess, path)
            # feed data into placeholders
            feed = {}
            for i in range(input_seq_len):
                feed[encoder_inputs[i].name] = np.array([en_sentences_encoded[j][i] for j in range(len(en_sentences_encoded))], dtype = np.int32)
            feed[decoder_inputs[0].name] = np.array([de_word2idx['<go>']] * len(en_sentences_encoded), dtype = np.int32)
            # translate
            output_sequences = sess.run(outputs_proj, feed_dict = feed)
            # decode seq.
            for i in range(len(en_sentences_encoded)):
                ouput_seq = [output_sequences[j][i] for j in range(output_seq_len)]
                #decode output sequence
                words = decode_output(ouput_seq)
                for i in range(len(words)):
                    if words[i] not in ['<eos>', '<pad>', '<go>']:
                        print(words[i],end=' ')



    I was a Ph.D. student in clinical psychology at Berkeley. 
    She was a 26-year-old woman named Alex. 
    Now Alex walked into her first session wearing jeans and a big slouchy top, and she dropped onto the couch in my office and kicked off her flats and told me she was there to talk about guy problems. 
    Now when I heard this, I was so relieved. 
    My classmate got an arsonist for her first client. 
    And I got a twentysomething who wanted to talk about boys. 
    This I thought I could handle. 
    But I didn't handle it. 
    With the funny stories that Alex would bring to session, it was easy for me just to nod my head while we kicked the can down the road.


    Als ich in meinen 20ern war, hatte ich meine erste Psychotherapie-Patientin. 
    Ich war Doktorandin und studierte Klinische Psychologie in Berkeley. 
    Sie war eine 26-jährige Frau namens Alex. 
    Als Alex in die erste Sitzung kam, trug sie Jeans und ein ausgebeultes Top. Sie fiel auf das Sofa in meinem Büro, schleuderte ihre Sandalen von sich und erzählte mir, sie wäre da, um über Männerprobleme zu reden. 
    Und als ich das hörte, war ich erleichtert. 
    Meine Kommilitonin bekam nämlich einen Brandstifter als ersten Patienten. 
    Und ich bekam eine Frau in den 20ern, die über Jungs reden wollte. 
    Das kriege ich hin, dachte ich mir. 
    Aber ich habe es nicht hingekriegt. 
    Mit den lustigen Geschichten, die Alex mit in die Sitzung brachte, war es leicht für mich, einfach mit dem Kopf zu nicken, während wir die Probleme vor uns herschoben. 


    import pickle
    from collections import Counter
    from operator import itemgetter
    def read_sentences(file_path):
        sentences = []
        with open(file_path, 'r', encoding='utf-8') as reader:
            for s in reader:
        return sentences
    def create_dataset(en_sentences, de_sentences):
        en_vocab_dict = Counter(word.strip(',." ;:)(][?!') for sentence in en_sentences for word in sentence.split())
        de_vocab_dict = Counter(word.strip(',." ;:)(][?!') for sentence in de_sentences for word in sentence.split())
        en_vocab = list(map(lambda x: x[0], sorted(en_vocab_dict.items(), key = lambda x: -x[1])))
        de_vocab = list(map(lambda x: x[0], sorted(de_vocab_dict.items(), key = lambda x: -x[1])))
    #   en_vocab = en_vocab[:20000]
    #   de_vocab = de_vocab[:30000]
        start_idx = 2
        en_word2idx = dict([(word, idx+start_idx) for idx, word in enumerate(en_vocab)])
        en_word2idx['<ukn>'] = 0
        en_word2idx['<pad>'] = 1
        en_idx2word = dict([(idx, word) for word, idx in en_word2idx.items()])
        start_idx = 4
        de_word2idx = dict([(word, idx+start_idx) for idx, word in enumerate(de_vocab)])
        de_word2idx['<ukn>'] = 0
        de_word2idx['<go>']  = 1
        de_word2idx['<eos>'] = 2
        de_word2idx['<pad>'] = 3
        de_idx2word = dict([(idx, word) for word, idx in de_word2idx.items()])
        x = [[en_word2idx.get(word.strip(',." ;:)(][?!'), 0) for word in sentence.split()] for sentence in en_sentences]
        y = [[de_word2idx.get(word.strip(',." ;:)(][?!'), 0) for word in sentence.split()] for sentence in de_sentences]
        X = []
        Y = []
        for i in range(len(x)):
            n1 = len(x[i])
            n2 = len(y[i])
            n = n1 if n1 < n2 else n2 
            if abs(n1 - n2) <= 0.3 * n:
                if n1 <= 15 and n2 <= 15:
        return X, Y, en_word2idx, en_idx2word, en_vocab, de_word2idx, de_idx2word, de_vocab
    def save_dataset(file_path, obj):
        with open(file_path, 'wb') as f:
            pickle.dump(obj, f, -1)
    def read_dataset(file_path):
        with open(file_path, 'rb') as f:
            return pickle.load(f)
    en_sentences = read_sentences('data.en')
    de_sentences = read_sentences('data.de')
    save_dataset('demo_data.pkl', create_dataset(en_sentences, de_sentences))



