Skip to content

Commit 8996d65

Browse files
committed
Update code to TensorFlow 1.14 version
1 parent e01da7a commit 8996d65

10 files changed

Lines changed: 29 additions & 29 deletions

File tree

‎ANN/train_ann.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def train_ann():
3939
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4040

4141
# Build vocabulary
42-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
42+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4343

4444
# Build a graph and ann object
4545
with tf.Graph().as_default():
@@ -53,7 +53,7 @@ def train_ann():
5353
sequence_length=args.pad_seq_len,
5454
vocab_size=VOCAB_SIZE,
5555
embedding_type=args.embedding_type,
56-
embedding_size=args.embedding_dim,
56+
embedding_size=EMBEDDING_SIZE,
5757
fc_hidden_size=args.fc_dim,
5858
num_classes=args.num_classes,
5959
l2_reg_lambda=args.l2_lambda,

‎CNN/train_cnn.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_cnn():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and cnn object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_cnn():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
filter_sizes=args.filter_sizes,
5959
num_filters=args.num_filters,
6060
fc_hidden_size=args.fc_dim,

‎CRNN/train_crnn.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_crnn():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and crnn object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_crnn():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
filter_sizes=args.filter_sizes,
5959
num_filters=args.num_filters,
6060
lstm_hidden_size=args.lstm_dim,

‎FastText/train_fast.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_fasttext():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and fasttext object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_fasttext():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
num_classes=args.num_classes,
5959
l2_reg_lambda=args.l2_lambda,
6060
pretrained_embedding=pretrained_word2vec_matrix)

‎HAN/train_han.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_han():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and han object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_han():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
lstm_hidden_size=args.lstm_dim,
5959
fc_hidden_size=args.fc_dim,
6060
num_classes=args.num_classes,

‎RCNN/train_rcnn.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_rcnn():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and rcnn object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_rcnn():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
lstm_hidden_size=args.lstm_dim,
5959
filter_sizes=args.filter_sizes,
6060
num_filters=args.num_filters,

‎RNN/train_rnn.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_rnn():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and rnn object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_rnn():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
lstm_hidden_size=args.lstm_dim,
5959
fc_hidden_size=args.fc_dim,
6060
num_classes=args.num_classes,

‎SANN/train_sann.py‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def train_sann():
4040
x_val, y_val = dh.pad_data(val_data, args.pad_seq_len)
4141

4242
# Build vocabulary
43-
VOCAB_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.embedding_dim, args.word2vec_file)
43+
VOCAB_SIZE, EMBEDDING_SIZE, pretrained_word2vec_matrix = dh.load_word2vec_matrix(args.word2vec_file)
4444

4545
# Build a graph and sann object
4646
with tf.Graph().as_default():
@@ -54,7 +54,7 @@ def train_sann():
5454
sequence_length=args.pad_seq_len,
5555
vocab_size=VOCAB_SIZE,
5656
embedding_type=args.embedding_type,
57-
embedding_size=args.embedding_dim,
57+
embedding_size=EMBEDDING_SIZE,
5858
lstm_hidden_size=args.lstm_dim,
5959
attention_unit_size=args.attention_dim,
6060
attention_hops_size=args.attention_hops_dim,

‎utils/data_helpers.py‎

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,11 @@ def create_metadata_file(word2vec_file, output_file):
281281
fout.write(word[0] + '\n')
282282

283283

284-
def load_word2vec_matrix(embedding_size, word2vec_file):
284+
def load_word2vec_matrix(word2vec_file):
285285
"""
286286
Return the word2vec model matrix.
287287
288288
Args:
289-
embedding_size: The embedding size
290289
word2vec_file: The word2vec file
291290
Returns:
292291
The word2vec model matrix
@@ -297,13 +296,14 @@ def load_word2vec_matrix(embedding_size, word2vec_file):
297296
raise IOError("[Error] The word2vec file doesn't exist. ")
298297

299298
model = gensim.models.Word2Vec.load(word2vec_file)
300-
vocab_size = len(model.wv.vocab.items())
299+
vocab_size = model.wv.vectors.shape[0]
300+
embedding_size = model.vector_size
301301
vocab = dict([(k, v.index) for k, v in model.wv.vocab.items()])
302302
embedding_matrix = np.zeros([vocab_size, embedding_size])
303303
for key, value in vocab.items():
304304
if key is not None:
305305
embedding_matrix[value] = model[key]
306-
return vocab_size, embedding_matrix
306+
return vocab_size, embedding_size, embedding_matrix
307307

308308

309309
def data_word2vec(input_file, num_labels, word2vec_model):
@@ -475,7 +475,7 @@ def load_data_and_labels(data_file, num_labels, word2vec_file, data_aug_flag):
475475
Args:
476476
data_file: The research data
477477
num_labels: The number of classes
478-
word2vec_file: The embedding size
478+
word2vec_file: The word2vec model file
479479
data_aug_flag: The flag of data augmented
480480
Returns:
481481
The class Data

‎utils/param_parser.py‎

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def parameter_parser():
3838
parser.add_argument("--pad-seq-len",
3939
type=int,
4040
default=150,
41-
help="Padding Sequence length of data. (depends on the data)")
41+
help="Padding sequence length of data. (depends on the data)")
4242

4343
parser.add_argument("--embedding-type",
4444
type=int,
@@ -48,7 +48,7 @@ def parameter_parser():
4848
parser.add_argument("--embedding-dim",
4949
type=int,
5050
default=100,
51-
help="Dimensionality of character embedding. (default: 300)")
51+
help="Dimensionality of character embedding. (default: 100)")
5252

5353
parser.add_argument("--filter-sizes",
5454
type=list,
@@ -68,17 +68,17 @@ def parameter_parser():
6868
parser.add_argument("--lstm-dim",
6969
type=int,
7070
default=256,
71-
help="Dimensionality for LSTM Neurons. (default: 256)")
71+
help="Dimensionality of LSTM neurons. (default: 256)")
7272

7373
parser.add_argument("--lstm-layers",
7474
type=int,
7575
default=1,
76-
help="Number of LSTM Layers. (default: 1)")
76+
help="Number of LSTM layers. (default: 1)")
7777

7878
parser.add_argument("--attention-dim",
7979
type=int,
80-
default=350,
81-
help="Dimensionality of Attention Neurons. (default: 200)")
80+
default=200,
81+
help="Dimensionality of Attention neurons. (default: 200)")
8282

8383
parser.add_argument("--attention-hops-dim",
8484
type=int,
@@ -88,7 +88,7 @@ def parameter_parser():
8888
parser.add_argument("--fc-dim",
8989
type=int,
9090
default=512,
91-
help="Dimensionality for FC Neurons. (default: 512)")
91+
help="Dimensionality for FC neurons. (default: 512)")
9292

9393
parser.add_argument("--dropout-rate",
9494
type=float,
@@ -119,7 +119,7 @@ def parameter_parser():
119119
parser.add_argument("--batch-size",
120120
type=int,
121121
default=256,
122-
help="Batch Size. (default: 256)")
122+
help="Batch size. (default: 256)")
123123

124124
parser.add_argument("--learning-rate",
125125
type=float,

0 commit comments

Comments
 (0)