Tensorflow. Nonlinear regression
I have this function and label that are not linear enough to satisfy a linear solution. I prepared an SVR (kernel = 'rbf') model from sklearn, but now it's time to do it with tensorflow and it's hard to tell what needs to be written to achieve the same or better effect.
Do you see that lazy orange line there? It doesn't fill you with definition
the code itself:
import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model, clean_df
import os
from sklearn import preprocessing
graph = tf.get_default_graph()
# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')
lin_model = tf.add(tf.multiply(x_, w), b)
#loss
loss = tf.reduce_mean(tf.pow(lin_model - y_, 2), name='loss')
train_step = tf.train.GradientDescentOptimizer(0.000000025).minimize(loss)
#nonlinear part
nonlin_model = tf.tanh(tf.add(tf.multiply(x_, w), b))
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.000000025).minimize(nonlin_loss)
# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)
liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)
#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)
n_epochs = 200
train_errors = []
non_train_errors = []
test_errors = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in tqdm.tqdm(range(n_epochs)):
_, train_err, summ = sess.run([train_step, loss, summaries],
feed_dict={x_: parrots, y_: liters})
summary_writer.add_summary(summ, i)
train_errors.append(train_err)
_, non_train_err, = sess.run([train_step_nonlin, nonlin_loss],
feed_dict={x_: parrots, y_: liters})
non_train_errors.append(non_train_err)
plt.plot(list(range(n_epochs)), train_errors, label='train_lin')
plt.plot(list(range(n_epochs)), non_train_errors, label='train_nonlin')
plt.legend()
print(train_errors[:10])
print(non_train_errors[:10])
plt.show()
plt.scatter(parrots, liters, label='actual data')
plt.plot(parrots, sess.run(lin_model, feed_dict={x_: parrots}), label='linear (tf)')
plt.plot(parrots, sess.run(nonlin_model, feed_dict={x_: parrots}), label='nonlinear (tf)')
plt.plot(parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
plt.legend()
plt.show()
How to motivate the orange line?
After.
The code looks like this:
import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model
import os
from sklearn import preprocessing
graph = tf.get_default_graph()
# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')
# nonlinear
nonlin_model = tf.add(tf.multiply(tf.tanh(x_), w), b)
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.01).minimize(nonlin_loss)
# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)
liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)
#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)
nz = preprocessing.MaxAbsScaler() # normalization coz tanh
norm_parrots = nz.fit_transform(parrots)
print(norm_parrots)
n_epochs = 20000
train_errors = []
non_train_errors = []
test_errors = []
weights = []
biases = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in tqdm.tqdm(range(n_epochs)):
_, non_train_err, weight, bias = sess.run([train_step_nonlin, nonlin_loss, w, b],
feed_dict={x_: norm_parrots, y_: liters})
non_train_errors.append(non_train_err)
weights.append(weight)
biases.append(bias)
plt.scatter(norm_parrots, liters, label='actual data')
plt.plot(norm_parrots, sess.run(nonlin_model, feed_dict={x_: norm_parrots}), c='orange', label='nonlinear (tf)')
plt.plot(norm_parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
plt.legend()
plt.show()
Asyoucanclearlysee we got some improvements for the orange line (not quite as good as rbf but just needs more work).
+3
source to share