-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathTensorflow.py
144 lines (96 loc) · 4.16 KB
/
Tensorflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from sklearn.preprocessing import Normalizer
from SharedFunctions import get_current_time, fmt, find_accuracy
from datetime import datetime
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from ReadPreprocessData import read_preprocess
from Tokenize import tokenize
import numpy as np
import tensorflow as tf
import scipy.sparse as ss
tf.logging.set_verbosity(tf.logging.INFO)
vectorizer = TfidfVectorizer(tokenizer=tokenize, stop_words='english')
def convert_sparse_matrix_to_sparse_tensor(X):
coo = X.tocoo()
indices = np.mat([coo.row, coo.col]).transpose()
return tf.SparseTensor(indices, coo.data, coo.shape)
def cnn_model_fn(features, labels, mode):
# Dense Layer
dense = tf.layers.dense(inputs=features, units=0, activation=tf.nn.relu)
dense1 = tf.layers.dense(inputs=dense, units=1000, activation=tf.nn.relu)
dense2 = tf.layers.dense(inputs=dense1, units=200, activation=tf.nn.relu)
# Add dropout operation; 0.6 probability that element will be kept
dropout = tf.layers.dropout(
inputs=dense2, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
# Logits layer
# Input Tensor Shape: [batch_size, 1024]
# Output Tensor Shape: [batch_size, 10]
logits = tf.layers.dense(inputs=dense2, units=2)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def main(unused_argv):
# Load training and eval data
# reading and preprocessing data
t = get_current_time()
train_emails, train_labels, test_emails, test_labels = read_preprocess()
print("Time taken to Read and Preprocess Data:",
datetime.strptime(get_current_time(), fmt) - datetime.strptime(t, fmt))
# vectorizing data
t = get_current_time()
train_features = vectorizer.fit_transform(train_emails)
test_features = vectorizer.transform(test_emails)
print("Time taken to Vectorize:", datetime.strptime(get_current_time(), fmt) - datetime.strptime(t, fmt))
print(len(vectorizer.get_feature_names()))
train_data = train_features.toarray()
eval_data = test_features.toarray()
# Create the Estimator
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x=train_data[0:23687],
y=np.array(train_labels),
batch_size=1,
num_epochs=None,
shuffle=True)
mnist_classifier.train(
input_fn=train_input_fn,
steps=500)
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x=eval_data[0:23687],
y=np.array(test_labels),
num_epochs=1,
shuffle=False)
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
results=mnist_classifier.predict(eval_input_fn)
predictions = list(p["classes"] for p in results)
a=np.asarray(predictions)
np.savetxt("result.csv", np.dstack((np.arange(1, a.size + 1), a))[0], "%d,%d", header="ID,Label",comments="")
if __name__ == "__main__":
tf.app.run()