LSTM

作者: 凌霄文强 | 来源:发表于2019-05-25 22:42 被阅读0次
import re
import numpy as np
import pandas as pd
from keras import Sequential
from keras.layers import Embedding, LSTM, Flatten, Dense
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

train_data = pd.read_csv('./data/train.csv', lineterminator='\n')
test_data = pd.read_csv('./data/test.csv', lineterminator='\n')
train_data['label'] = train_data['label'].map({'Negative': 0, 'Positive': 1})
train_data = train_data.as_matrix()
test_data = test_data.as_matrix()


# two commom ways to clean data
def cleaner(word):
    word = re.sub(r'\#\.', '', word)
    word = re.sub(r'\n', '', word)
    word = re.sub(r',', '', word)
    word = re.sub(r'\-', ' ', word)
    word = re.sub(r'\.', '', word)
    word = re.sub(r'\\', ' ', word)
    word = re.sub(r'\\x\.+', '', word)
    word = re.sub(r'\d', '', word)
    word = re.sub(r'^_.', '', word)
    word = re.sub(r'_', ' ', word)
    word = re.sub(r'^ ', '', word)
    word = re.sub(r' $', '', word)
    word = re.sub(r'\?', '', word)

    return word.lower()


def hashing(word):
    word = re.sub(r'ain$', r'ein', word)
    word = re.sub(r'ai', r'ae', word)
    word = re.sub(r'ay$', r'e', word)
    word = re.sub(r'ey$', r'e', word)
    word = re.sub(r'ie$', r'y', word)
    word = re.sub(r'^es', r'is', word)
    word = re.sub(r'a+', r'a', word)
    word = re.sub(r'j+', r'j', word)
    word = re.sub(r'd+', r'd', word)
    word = re.sub(r'u', r'o', word)
    word = re.sub(r'o+', r'o', word)
    word = re.sub(r'ee+', r'i', word)
    if not re.match(r'ar', word):
        word = re.sub(r'ar', r'r', word)
    word = re.sub(r'iy+', r'i', word)
    word = re.sub(r'ih+', r'eh', word)
    word = re.sub(r's+', r's', word)
    if re.search(r'[rst]y', 'word') and word[-1] != 'y':
        word = re.sub(r'y', r'i', word)
    if re.search(r'[bcdefghijklmnopqrtuvwxyz]i', word):
        word = re.sub(r'i$', r'y', word)
    if re.search(r'[acefghijlmnoqrstuvwxyz]h', word):
        word = re.sub(r'h', '', word)
    word = re.sub(r'k', r'q', word)
    return word


def array_cleaner(array):
    X = []
    for sentence in array:
        clean_sentence = ''
        words = sentence.split(' ')
        for word in words:
            clean_sentence = clean_sentence + ' ' + cleaner(word)
        X.append(clean_sentence)
    return X


X_test = test_data[:, 1]
X_train = train_data[:, 1]
X_train = array_cleaner(X_train)
X_test = array_cleaner(X_test)
y_train = np.array(train_data[:, 2], dtype=int)

X_all = X_train + X_test
tokenizer = Tokenizer(nb_words=2000, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' ')
tokenizer.fit_on_texts(X_all)
X_all = tokenizer.texts_to_sequences(X_all)
X_all = pad_sequences(X_all)
X_train = X_all[:len(y_train)]
X_test = X_all[len(y_train):]

embed_dim = 128
list_out = 256
batch_size = 32
model = Sequential()
model.add(Embedding(2000, embed_dim, input_length=X_train.shape[1], dropout=0.2))
model.add(LSTM(list_out, dropout=0.2, return_sequences=True))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, batch_size=batch_size, epochs=10)
y_pred = model.predict(X_test)
print(y_pred[:10])
result = pd.DataFrame.from_dict({
    'ID': range(1, len(y_pred) + 1),
    'Pred': y_pred.reshape((-1))
})
result.to_csv('./result/submission.csv', index=None)

相关文章

  • pytorch1.0 搭建LSTM网络

    torch.nn包下实现了LSTM函数,实现LSTM层。多个LSTMcell组合起来是LSTM。 LSTM自动实现...

  • keras lstm 杂记

    1、例子 情感分析 情感分析(苏剑林) lstm多曲线预测 lstm多曲线预测(原文) 2、lstm参数 lstm...

  • 详解 LSTM

    今天的内容有: LSTM 思路 LSTM 的前向计算 LSTM 的反向传播 关于调参 LSTM 长短时记忆网络(L...

  • LSTM Custom

    def InitLSTM(self,LSTM,Name,InputSize) LSTM.Forget_Wight...

  • keras lstm return sequence参数理解

    使用keras构建多层lstm网络时,除了最后一层lstm,中间过程的lstm中的return sequence参...

  • LSTM

    Chris Olah's LSTM postEdwin Chen's LSTM postAndrej Karpat...

  • lstm理解

    本文是自己对于lstm的理解的总结,但是最好的文章帮助理解lstm一定是这篇Understanding LSTM ...

  • Tensorflow神经网络之LSTM

    LSTM 简介 公式 LSTM LSTM作为门控循环神经网络因此我们从门控单元切入理解。主要包括: 输入门:It ...

  • LSTM原理、源码、Demo及习题

    全面整理LSTM相关原理,源码,以及开发demo,设计习题。如转载请注明转载出处。 LSTM 框架 lstm 由3...

  • 双向 LSTM

    本文结构: 为什么用双向 LSTM 什么是双向 LSTM 例子 为什么用双向 LSTM? 单向的 RNN,是根据前...

网友评论

    本文标题:LSTM

    本文链接:https://www.haomeiwen.com/subject/zrhyaqtx.html