prediction用のソースコード

import argparse
import os
import sys
import numpy as np
import chainer
from chainer import optimizers
import chainer.functions as F
import chainer.links as L
import pickle
# import cupy

global vocab
global n_vocab
global inv_vocab

vocab = {'<$>':0, '<s>':1, '<eos>':2}
n_vocab = len(vocab)
inv_vocab = {0:'<$>', 1:'<s>', 2:'<eos>'} #逆引き辞書

n_units = 512 # 隠れ層のユニット数


# LSTMのネットワーク定義
class LSTM(chainer.Chain):
state = {}

def __init__(self, n_vocab, n_units):
print(n_vocab, n_units)
super(LSTM, self).__init__(
l1_embed = L.EmbedID(n_vocab, n_units),
l1_x = L.Linear(n_units, 4 * n_units),
l1_h = L.Linear(n_units, 4 * n_units),
l2_embed = L.EmbedID(n_vocab, n_units),
l2_x = L.Linear(n_units, 4 * n_units),
l2_h = L.Linear(n_units, 4 * n_units),
l3_embed = L.EmbedID(n_vocab, n_units),
l3_x = L.Linear(n_units, 4 * n_units),
l3_h = L.Linear(n_units, 4 * n_units),
l4_embed = L.EmbedID(n_vocab, n_units),
l4_x = L.Linear(n_units, 4 * n_units),
l4_h = L.Linear(n_units, 4 * n_units),
l5_embed = L.EmbedID(n_vocab, n_units),
l5_x = L.Linear(n_units, 4 * n_units),
l5_h = L.Linear(n_units, 4 * n_units),
l_umembed = L.Linear(n_units, n_vocab)
)

def forward(self, x1, x2, x3, x4, x5, t, train=True, dropout_ratio=0.5):
h1 = self.l1_embed(chainer.Variable(np.asarray([x1], dtype=np.int32)))
c1, y1 = F.lstm(chainer.Variable(np.zeros((1, n_units), dtype=np.float32)), F.dropout(self.l1_x(h1), ratio=dropout_ratio, train=train ) + self.l1_h(self.state['y1']))
h2 = self.l2_embed(chainer.Variable(np.asarray([x2])))
c2, y2 = F.lstm(self.state['c1'], F.dropout(self.l2_x(h2), ratio=dropout_ratio, train=train) + self.l2_h(self.state['y2']))
h3 = self.l3_embed(chainer.Variable(np.asarray([x3])))
c3, y3 = F.lstm(self.state['c2'], F.dropout(self.l3_x(h3), ratio=dropout_ratio, train=train) + self.l3_h(self.state['y3']))
h4 = self.l4_embed(chainer.Variable(np.asarray([x4])))
c4, y4 = F.lstm(self.state['c3'], F.dropout(self.l4_x(h4), ratio=dropout_ratio, train=train) + self.l4_h(self.state['y4']))
h5 = self.l5_embed(chainer.Variable(np.asarray([x5])))
c5, y5 = F.lstm(self.state['c4'], F.dropout(self.l5_x(h5), ratio=dropout_ratio, train=train) + self.l5_h(self.state['y5']))
self.state = {'c1': c1, 'y1': y1, 'h1': h1, 'c2': c2, 'y2': y2, 'h2': h2, 'c3': c3, 'y3': y3, 'h3': h3, 'c4': c4, 'y4': y4, 'h4': h4, 'c5': c5, 'y5': y5, 'h5': h5}
y = self.l_umembed(y5)
print('y:',vars(y))
print('ans:', np.asarray([t]), 'pred:', np.argmax(y.data))
if train:
return F.softmax_cross_entropy(y, np.asarray([t]))
else:
return F.softmax(y5), y5.data

def initialize_state(self, n_units, batchsize=1, train=True):
for name in ('c1', 'y1', 'h1', 'c2', 'y2', 'h2', 'c3', 'y3', 'h3', 'c4', 'y4', 'h4', 'c5', 'y5', 'h5'):
self.state[name] = chainer.Variable(np.zeros((batchsize, n_units), dtype=np.float32), volatile=not train)


def load_data(filename):
global vocab, inv_vocab
# 全文について改行を<eos>に変換し、単語毎に区切る
# 日本語の場合は、<eos> の両側にスペースを入れるreplace('
', ' <eos> ')
words = open(filename, encoding='utf-8').read().replace('
', ' <eos> ').strip().split()
dataset = np.ndarray((len(words),), dtype=np.int32)
for i, word in enumerate(words):
if word not in vocab:
vocab[word] = len(vocab)
inv_vocab[len(vocab)-1] = word
dataset[i] = vocab[word]
return dataset


def main():
p = 5 # 文字列長
w = 2 # 前後の単語の数
epoch = 2 #繰り返し回数
total_loss = 0 # 誤差関数の値を入れる変数

# 引数の処理
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', '-g', default=-1, type=int,
help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()


# 訓練データ、評価データ、テストデータの読み込み
os.chdir('/Users/suguruoki/practice/chainer/examples/ptb')
train_data = load_data('last4.dat')
pickle.dump(vocab, open('vocab.bin', 'wb'))
pickle.dump(inv_vocab, open('inv_vocab.bin', 'wb'))
#train_data = load_data('jpn-train.dat')
#train_data = load_data('ptb.train.txt')
#valid_data = load_data('ptb.valid.txt')
#test_data = load_data('ptb.test.txt')

n_vocab = len(vocab)
#print(train_data[0:1000])

# モデルの準備
# 入力は単語数、中間層はmain関数冒頭で定義
lstm = LSTM(n_vocab, n_units)
lstm.initialize_state(n_units)
model = L.Classifier(lstm)
model.compute_accuracy = False
# cuda環境では以下のようにすればよい
if args.gpu >= 0:
xp = cupy
else: np

# if args.gpu >= 0:
# cupy.get_device(args.gpu).use()
# model.to_gpu()

# 既存のモデルがあれば読み込み
# model = pickle.load(open('LSTMmodel.pkl', 'rb'))

# optimizerの設定
optimizer = optimizers.Adam()
optimizer.setup(model)
seq = []
seq_append = seq.append

# 5単語毎に分ける
for i in range(epoch): # 同じデータをepoch数繰り返す
print('Epoch =', i+1, file=sys.stderr)
length = len(train_data)

# 単語を順番に走査
for t in range(length):

print("{}/{}".format(t, length))
# 文頭、文末を考慮する

for k in range(t-w, t+w+1):
if k >= 0:
if k == t:
seq.append(vocab['<$>'])
elif k > len(train_data)-1:
seq.append(vocab['<s>'])
else:
seq.append(train_data[k])
else:
seq.append(vocab['<s>'])
seq.append(train_data[t])
# print('t =', t,', seq :', seq)
tmp = np.array(seq, dtype='i')
seq = [] # seq: 周辺単語のリスト

loss = lstm.forward(tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5])
tmp = []

# 出力する時はlossを記憶
if i%epoch==0:
total_loss += loss.data
# 最適化の実行
model.cleargrads()
loss.backward()
optimizer.update()
# 学習結果を1epochごとにファイルに保存する
# model.to_cpu()
pickle.dump(model, open('LSTMmodel.pkl', 'wb'))
pickle.dump(lstm, open('LSTMlstm.pkl', 'wb'))


if __name__ == '__main__':
main()





===================================================

学習用のソースコード



import argparse
import os
import sys
import numpy as np
import chainer
from chainer import optimizers
import chainer.functions as F
import chainer.links as L
import pickle
# import cupy

global vocab
global n_vocab
global inv_vocab

vocab = {'<$>':0, '<s>':1, '<eos>':2}
n_vocab = len(vocab)
inv_vocab = {0:'<$>', 1:'<s>', 2:'<eos>'} #逆引き辞書

n_units = 512 # 隠れ層のユニット数


# LSTMのネットワーク定義
class LSTM(chainer.Chain):
state = {}

def __init__(self, n_vocab, n_units):
print(n_vocab, n_units)
super(LSTM, self).__init__(
l1_embed = L.EmbedID(n_vocab, n_units),
l1_x = L.Linear(n_units, 4 * n_units),
l1_h = L.Linear(n_units, 4 * n_units),
l2_embed = L.EmbedID(n_vocab, n_units),
l2_x = L.Linear(n_units, 4 * n_units),
l2_h = L.Linear(n_units, 4 * n_units),
l3_embed = L.EmbedID(n_vocab, n_units),
l3_x = L.Linear(n_units, 4 * n_units),
l3_h = L.Linear(n_units, 4 * n_units),
l4_embed = L.EmbedID(n_vocab, n_units),
l4_x = L.Linear(n_units, 4 * n_units),
l4_h = L.Linear(n_units, 4 * n_units),
l5_embed = L.EmbedID(n_vocab, n_units),
l5_x = L.Linear(n_units, 4 * n_units),
l5_h = L.Linear(n_units, 4 * n_units),
l_umembed = L.Linear(n_units, n_vocab)
)

def forward(self, x1, x2, x3, x4, x5, t, train=True, dropout_ratio=0.5):
h1 = self.l1_embed(chainer.Variable(np.asarray([x1], dtype=np.int32)))
c1, y1 = F.lstm(chainer.Variable(np.zeros((1, n_units), dtype=np.float32)), F.dropout(self.l1_x(h1), ratio=dropout_ratio, train=train ) + self.l1_h(self.state['y1']))
h2 = self.l2_embed(chainer.Variable(np.asarray([x2])))
c2, y2 = F.lstm(self.state['c1'], F.dropout(self.l2_x(h2), ratio=dropout_ratio, train=train) + self.l2_h(self.state['y2']))
h3 = self.l3_embed(chainer.Variable(np.asarray([x3])))
c3, y3 = F.lstm(self.state['c2'], F.dropout(self.l3_x(h3), ratio=dropout_ratio, train=train) + self.l3_h(self.state['y3']))
h4 = self.l4_embed(chainer.Variable(np.asarray([x4])))
c4, y4 = F.lstm(self.state['c3'], F.dropout(self.l4_x(h4), ratio=dropout_ratio, train=train) + self.l4_h(self.state['y4']))
h5 = self.l5_embed(chainer.Variable(np.asarray([x5])))
c5, y5 = F.lstm(self.state['c4'], F.dropout(self.l5_x(h5), ratio=dropout_ratio, train=train) + self.l5_h(self.state['y5']))
self.state = {'c1': c1, 'y1': y1, 'h1': h1, 'c2': c2, 'y2': y2, 'h2': h2, 'c3': c3, 'y3': y3, 'h3': h3, 'c4': c4, 'y4': y4, 'h4': h4, 'c5': c5, 'y5': y5, 'h5': h5}
y = self.l_umembed(y5)
print('y:',vars(y))
print('ans:', np.asarray([t]), 'pred:', np.argmax(y.data))
if train:
return F.softmax_cross_entropy(y, np.asarray([t]))
else:
return F.softmax(y5), y5.data

def initialize_state(self, n_units, batchsize=1, train=True):
for name in ('c1', 'y1', 'h1', 'c2', 'y2', 'h2', 'c3', 'y3', 'h3', 'c4', 'y4', 'h4', 'c5', 'y5', 'h5'):
self.state[name] = chainer.Variable(np.zeros((batchsize, n_units), dtype=np.float32), volatile=not train)


def load_data(filename):
global vocab, inv_vocab
# 全文について改行を<eos>に変換し、単語毎に区切る
# 日本語の場合は、<eos> の両側にスペースを入れるreplace('
', ' <eos> ')
words = open(filename, encoding='utf-8').read().replace('
', ' <eos> ').strip().split()
dataset = np.ndarray((len(words),), dtype=np.int32)
for i, word in enumerate(words):
if word not in vocab:
vocab[word] = len(vocab)
inv_vocab[len(vocab)-1] = word
dataset[i] = vocab[word]
return dataset


def main():
p = 5 # 文字列長
w = 2 # 前後の単語の数
epoch = 2 #繰り返し回数
total_loss = 0 # 誤差関数の値を入れる変数

# 引数の処理
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', '-g', default=-1, type=int,
help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()


# 訓練データ、評価データ、テストデータの読み込み
os.chdir('/Users/suguruoki/practice/chainer/examples/ptb')
train_data = load_data('last4.dat')
pickle.dump(vocab, open('vocab.bin', 'wb'))
pickle.dump(inv_vocab, open('inv_vocab.bin', 'wb'))
#train_data = load_data('jpn-train.dat')
#train_data = load_data('ptb.train.txt')
#valid_data = load_data('ptb.valid.txt')
#test_data = load_data('ptb.test.txt')

n_vocab = len(vocab)
#print(train_data[0:1000])

# モデルの準備
# 入力は単語数、中間層はmain関数冒頭で定義
lstm = LSTM(n_vocab, n_units)
lstm.initialize_state(n_units)
model = L.Classifier(lstm)
model.compute_accuracy = False
# cuda環境では以下のようにすればよい
if args.gpu >= 0:
xp = cupy
else: np

# if args.gpu >= 0:
# cupy.get_device(args.gpu).use()
# model.to_gpu()

# 既存のモデルがあれば読み込み
# model = pickle.load(open('LSTMmodel.pkl', 'rb'))

# optimizerの設定
optimizer = optimizers.Adam()
optimizer.setup(model)
seq = []
seq_append = seq.append

# 5単語毎に分ける
for i in range(epoch): # 同じデータをepoch数繰り返す
print('Epoch =', i+1, file=sys.stderr)
length = len(train_data)

# 単語を順番に走査
for t in range(length):

print("{}/{}".format(t, length))
# 文頭、文末を考慮する

for k in range(t-w, t+w+1):
if k >= 0:
if k == t:
seq.append(vocab['<$>'])
elif k > len(train_data)-1:
seq.append(vocab['<s>'])
else:
seq.append(train_data[k])
else:
seq.append(vocab['<s>'])
seq.append(train_data[t])
# print('t =', t,', seq :', seq)
tmp = np.array(seq, dtype='i')
seq = [] # seq: 周辺単語のリスト

loss = lstm.forward(tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5])
tmp = []

# 出力する時はlossを記憶
if i%epoch==0:
total_loss += loss.data
# 最適化の実行
model.cleargrads()
loss.backward()
optimizer.update()
# 学習結果を1epochごとにファイルに保存する
# model.to_cpu()
pickle.dump(model, open('LSTMmodel.pkl', 'wb'))
pickle.dump(lstm, open('LSTMlstm.pkl', 'wb'))


if __name__ == '__main__':
main()




LastUpdate: 2017/07/21 01:00
qr-code
ソーシャルブックマークにおすすめ PoolLink top
copyright (c) plsk.net