Re-posted from: https://tensorflowjulia.blogspot.com/2018/09/improving-neural-net-performance.html

This is the last exercise that uses the California housing dataset. We investigate several possibilities of optimizing neural nets:

- Different loss minimization algorithms
- Linear scaling of features
- Logarithmic scaling of features
- Clipping of features
- Z-score normalization
- Thresholding of data

The Jupyter notebook can be downloaded here.

In [0]:

`# Licensed under the Apache License, Version 2.0 (the "License");`

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# https://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

In [1]:

`using Plots`

using StatPlots

using Distributions

gr()

using DataFrames

using TensorFlow

import CSV

import StatsBase

using PyCall

sess=Session(Graph())

california_housing_dataframe = CSV.read("california_housing_train.csv", delim=",");

california_housing_dataframe = california_housing_dataframe[shuffle(1:size(california_housing_dataframe, 1)),:];

In [2]:

`function preprocess_features(california_housing_dataframe)`

"""Prepares input features from California housing data set.

Args:

california_housing_dataframe: A DataFrame expected to contain data

from the California housing data set.

Returns:

A DataFrame that contains the features to be used for the model, including

synthetic features.

"""

selected_features = california_housing_dataframe[

[:latitude,

:longitude,

:housing_median_age,

:total_rooms,

:total_bedrooms,

:population,

:households,

:median_income]]

processed_features = selected_features

# Create a synthetic feature.

processed_features[:rooms_per_person] = (

california_housing_dataframe[:total_rooms] ./

california_housing_dataframe[:population])

return processed_features

end

function preprocess_targets(california_housing_dataframe)

"""Prepares target features (i.e., labels) from California housing data set.

Args:

california_housing_dataframe: A DataFrame expected to contain data

from the California housing data set.

Returns:

A DataFrame that contains the target feature.

"""

output_targets = DataFrame()

# Scale the target to be in units of thousands of dollars.

output_targets[:median_house_value] = (

california_housing_dataframe[:median_house_value] ./ 1000.0)

return output_targets

end

Out[2]:

In [3]:

`# Choose the first 12000 (out of 17000) examples for training.`

training_examples = preprocess_features(head(california_housing_dataframe,12000))

training_targets = preprocess_targets(head(california_housing_dataframe,12000))

# Choose the last 5000 (out of 17000) examples for validation.

validation_examples = preprocess_features(tail(california_housing_dataframe,5000))

validation_targets = preprocess_targets(tail(california_housing_dataframe,5000))

# Double-check that we've done the right thing.

println("Training examples summary:")

describe(training_examples)

println("Validation examples summary:")

describe(validation_examples)

println("Training targets summary:")

describe(training_targets)

println("Validation targets summary:")

describe(validation_targets)

Out[3]:

In [10]:

`function construct_columns(input_features):`

"""Construct the TensorFlow Feature Columns.

Args:

input_features: DataFrame of the numerical input features to use.

Returns:

A set of feature columns

"""

out=convert(Array, input_features[:,:])

return convert.(Float64,out)

end

Out[10]:

In [4]:

`function create_batches(features, targets, steps, batch_size=5, num_epochs=0)`

"""Create batches.

Args:

features: Input features.

targets: Target column.

steps: Number of steps.

batch_size: Batch size.

num_epochs: Number of epochs, 0 will let TF automatically calculate the correct number

Returns:

An extended set of feature and target columns from which batches can be extracted.

"""

if(num_epochs==0)

num_epochs=ceil(batch_size*steps/size(features,1))

end

names_features=names(features);

names_targets=names(targets);

features_batches=copy(features)

target_batches=copy(targets)

for i=1:num_epochs

select=shuffle(1:size(features,1))

if i==1

features_batches=(features[select,:])

target_batches=(targets[select,:])

else

append!(features_batches, features[select,:])

append!(target_batches, targets[select,:])

end

end

return features_batches, target_batches

end

function next_batch(features_batches, targets_batches, batch_size, iter)

"""Next batch.

Args:

features_batches: Features batches from create_batches.

targets_batches: Target batches from create_batches.

batch_size: Batch size.

iter: Number of the current iteration

Returns:

An extended set of feature and target columns from which batches can be extracted.

"""

select=mod((iter-1)*batch_size+1, size(features_batches,1)):mod(iter*batch_size, size(features_batches,1));

ds=features_batches[select,:];

target=targets_batches[select,:];

return ds, target

end

Out[4]:

In [6]:

`function my_input_fn(features_batches, targets_batches, iter, batch_size=5, shuffle_flag=1):`

"""Prepares a batch of features and labels for model training.

Args:

features_batches: Features batches from create_batches.

targets_batches: Target batches from create_batches.

iter: Number of the current iteration

batch_size: Batch size.

shuffle_flag: Determines wether data is shuffled before being returned

Returns:

Tuple of (features, labels) for next data batch

"""

# Construct a dataset, and configure batching/repeating.

ds, target = next_batch(features_batches, targets_batches, batch_size, iter)

# Shuffle the data, if specified.

if shuffle_flag==1

select=shuffle(1:size(ds, 1));

ds = ds[select,:]

target = target[select, :]

end

# Return the next batch of data.

return ds, target

end

Out[6]:

In [14]:

`function train_nn_regression_model(my_optimizer,`

steps,

batch_size,

hidden_units,

keep_probability,

training_examples,

training_targets,

validation_examples,

validation_targets)

"""Trains a neural network model of one feature.

Args:

my_optimizer: Optimizer function for the training step

learning_rate: A `float`, the learning rate.

steps: A non-zero `int`, the total number of training steps. A training step

consists of a forward and backward pass using a single batch.

batch_size: A non-zero `int`, the batch size.

hidden_units: A vector describing the layout of the neural network

keep_probability: A `float`, the probability of keeping a node active during one training step.

Returns:

p1: Plot of RMSE for the different periods

training_rmse: Training RMSE values for the different periods

validation_rmse: Validation RMSE values for the different periods

"""

periods = 10

steps_per_period = steps / periods

# Create feature columns.

feature_columns = placeholder(Float32, shape=[-1, size(construct_columns(training_examples),2)])

target_columns = placeholder(Float32, shape=[-1, size(construct_columns(training_targets),2)])

# Network parameters

push!(hidden_units,size(training_targets,2)) #create an output node that fits to the size of the targets

activation_functions = Vector{Function}(size(hidden_units,1))

activation_functions[1:end-1]=z->nn.dropout(nn.relu(z), keep_probability)

activation_functions[end] = identity #Last function should be idenity as we need the logits

# create network - professional template

Zs = [feature_columns]

for (ii,(hlsize, actfun)) in enumerate(zip(hidden_units, activation_functions))

Wii = get_variable("W_$ii"*randstring(4), [get_shape(Zs[end], 2), hlsize], Float32)

bii = get_variable("b_$ii"*randstring(4), [hlsize], Float32)

Zii = actfun(Zs[end]*Wii + bii)

push!(Zs, Zii)

end

y=Zs[end]

loss=reduce_sum((target_columns - y).^2)

features_batches, targets_batches = create_batches(training_examples, training_targets, steps, batch_size)

# Optimizer setup with gradient clipping

gvs = train.compute_gradients(my_optimizer, loss)

capped_gvs = [(clip_by_norm(grad, 5.), var) for (grad, var) in gvs]

my_optimizer = train.apply_gradients(my_optimizer,capped_gvs)

run(sess, global_variables_initializer())

# Train the model, but do so inside a loop so that we can periodically assess

# loss metrics.

println("Training model...")

println("RMSE (on training data):")

training_rmse = []

validation_rmse=[]

for period in 1:periods

# Train the model, starting from the prior state.

for i=1:steps_per_period

features, labels = my_input_fn(features_batches, targets_batches, convert(Int,(period-1)*steps_per_period+i), batch_size)

run(sess, my_optimizer, Dict(feature_columns=>construct_columns(features), target_columns=>construct_columns(labels)))

end

# Take a break and compute predictions.

training_predictions = run(sess, y, Dict(feature_columns=> construct_columns(training_examples)));

validation_predictions = run(sess, y, Dict(feature_columns=> construct_columns(validation_examples)));

# Compute loss.

training_mean_squared_error = mean((training_predictions- construct_columns(training_targets)).^2)

training_root_mean_squared_error = sqrt(training_mean_squared_error)

validation_mean_squared_error = mean((validation_predictions- construct_columns(validation_targets)).^2)

validation_root_mean_squared_error = sqrt(validation_mean_squared_error)

# Occasionally print the current loss.

println(" period ", period, ": ", training_root_mean_squared_error)

# Add the loss metrics from this period to our list.

push!(training_rmse, training_root_mean_squared_error)

push!(validation_rmse, validation_root_mean_squared_error)

end

println("Model training finished.")

# Output a graph of loss metrics over periods.

p1=plot(training_rmse, label="training", title="Root Mean Squared Error vs. Periods", ylabel="RMSE", xlabel="Periods")

p1=plot!(validation_rmse, label="validation")

#

println("Final RMSE (on training data): ", training_rmse[end])

println("Final RMSE (on validation data): ", validation_rmse[end])

return p1, training_rmse, validation_rmse

end

Out[14]:

In [11]:

`p1, training_rmse, validation_rmse = train_nn_regression_model(`

train.GradientDescentOptimizer(0.0007), #optimizer & learning rate

5000, #steps

70, #batch_size

[10, 10], #hidden_units

1.0, # keep probability

training_examples,

training_targets,

validation_examples,

validation_targets)

Out[11]:

In [12]:

`plot(p1)`

Out[12]:

In [13]:

`function linear_scale(series)`

min_val = minimum(series)

max_val = maximum(series)

scale = (max_val - min_val) / 2.0

return (series .- min_val) ./ scale .- 1.0

end

Out[13]:

In [15]:

`function normalize_linear_scale(examples_dataframe):`

"""Returns a version of the input `DataFrame` that has all its features normalized linearly."""

processed_features = DataFrame()

processed_features[:latitude] = linear_scale(examples_dataframe[:latitude])

processed_features[:longitude] = linear_scale(examples_dataframe[:longitude])

processed_features[:housing_median_age] = linear_scale(examples_dataframe[:housing_median_age])

processed_features[:total_rooms] = linear_scale(examples_dataframe[:total_rooms])

processed_features[:total_bedrooms] = linear_scale(examples_dataframe[:total_bedrooms])

processed_features[:population] = linear_scale(examples_dataframe[:population])

processed_features[:households] = linear_scale(examples_dataframe[:households])

processed_features[:median_income] = linear_scale(examples_dataframe[:median_income])

processed_features[:rooms_per_person] = linear_scale(examples_dataframe[:rooms_per_person])

return processed_features

end

normalized_dataframe = normalize_linear_scale(preprocess_features(california_housing_dataframe))

normalized_training_examples = head(normalized_dataframe, 12000)

normalized_validation_examples = tail(normalized_dataframe, 5000)

p1, graddescent_training_rmse, graddescent_validation_rmse = train_nn_regression_model(

train.GradientDescentOptimizer(0.005),

2000,

50,

[10, 10],

1.0,

normalized_training_examples,

training_targets,

normalized_validation_examples,

validation_targets)

Out[15]:

In [16]:

`describe(normalized_dataframe)`

Out[16]:

In [17]:

`plot(p1)`

Out[17]:

In [42]:

`p1, momentum_training_rmse, momentum_validation_rmse = train_nn_regression_model(`

train.MomentumOptimizer(0.005, 0.05),

2000,

50,

[10, 10],

1.0,

normalized_training_examples,

training_targets,

normalized_validation_examples,

validation_targets)

Out[42]:

In [43]:

`plot(p1)`

Out[43]:

In [52]:

`p1, adam_training_rmse, adam_validation_rmse = train_nn_regression_model(`

train.AdamOptimizer(0.2),

2000,

50,

[10, 10],

1.0,

normalized_training_examples,

training_targets,

normalized_validation_examples,

validation_targets)

Out[52]:

In [53]:

`plot(p1)`

Out[53]:

In [54]:

`p2=plot(graddescent_training_rmse, label="Gradient descent training", ylabel="RMSE", xlabel="Periods", title="Root Mean Squared Error vs. Periods")`

p2=plot!(graddescent_validation_rmse, label="Gradient descent validation")

p2=plot!(adam_training_rmse, label="Adam training")

p2=plot!(adam_validation_rmse, label="Adam validation")

p2=plot!(momentum_training_rmse, label="Momentum training")

p2=plot!(momentum_validation_rmse, label="Momentum validation")

Out[54]:

In [22]:

`# I'd like a better solution to automate this, but all ideas for eval`

# on quoted expressions failed :-()

hist1=histogram(normalized_training_examples[:latitude], bins=20, title="latitude" )

hist2=histogram(normalized_training_examples[:longitude], bins=20, title="longitude" )

hist3=histogram(normalized_training_examples[:housing_median_age], bins=20, title="housing_median_age" )

hist4=histogram(normalized_training_examples[:total_rooms], bins=20, title="total_rooms" )

hist5=histogram(normalized_training_examples[:total_bedrooms], bins=20, title="total_bedrooms" )

hist6=histogram(normalized_training_examples[:population], bins=20, title="population" )

hist7=histogram(normalized_training_examples[:households], bins=20, title="households" )

hist8=histogram(normalized_training_examples[:median_income], bins=20, title="median_income" )

hist9=histogram(normalized_training_examples[:rooms_per_person], bins=20, title="rooms_per_person" )

plot(hist1, hist2, hist3, hist4, hist5, hist6, hist7, hist8, hist9, layout=9, legend=false)

Out[22]:

In [23]:

`function log_normalize(series)`

return log.(series.+1.0)

end

function clip(series, clip_to_min, clip_to_max)

return min.(max.(series, clip_to_min), clip_to_max)

end

function z_score_normalize(series)

mean_val = mean(series)

std_dv = std(series, mean=mean_val)

return (series .- mean) ./ std_dv

end

function binary_threshold(series, threshold)

return map(x->(x > treshold ? 1 : 0), series)

end

Out[23]:

In [24]:

`hist10=histogram(log_normalize(california_housing_dataframe[:households]), title="households")`

hist11=histogram(log_normalize(california_housing_dataframe[:total_rooms]), title="total_rooms")

hist12=histogram(log_normalize(training_examples[:rooms_per_person]), title="rooms_per_person")

plot(hist10, hist11, hist12, layout=3, legend=false)

Out[24]:

In [46]:

`function normalize_df(examples_dataframe)`

"""Returns a version of the input `DataFrame` that has all its features normalized."""

processed_features = DataFrame()

processed_features[:households] = log_normalize(examples_dataframe[:households])

processed_features[:median_income] = log_normalize(examples_dataframe[:median_income])

processed_features[:total_bedrooms] = log_normalize(examples_dataframe[:total_bedrooms])

processed_features[:latitude] = linear_scale(examples_dataframe[:latitude])

processed_features[:longitude] = linear_scale(examples_dataframe[:longitude])

processed_features[:housing_median_age] = linear_scale(examples_dataframe[:housing_median_age])

processed_features[:population] = linear_scale(clip(examples_dataframe[:population], 0, 5000))

processed_features[:rooms_per_person] = linear_scale(clip(examples_dataframe[:rooms_per_person], 0, 5))

processed_features[:total_rooms] = linear_scale(clip(examples_dataframe[:total_rooms], 0, 10000))

return processed_features

end

normalized_dataframe = normalize_df(preprocess_features(california_housing_dataframe))

normalized_training_examples = head(normalized_dataframe,12000)

normalized_validation_examples = tail(normalized_dataframe,5000)

p1, adam_training_rmse, adam_validation_rmse = train_nn_regression_model(

train.AdamOptimizer(0.15),

2000,

50,

[10, 10],

1.0,

normalized_training_examples,

training_targets,

normalized_validation_examples,

validation_targets)

Out[46]:

In [47]:

`plot(p1)`

Out[47]:

In [35]:

`function location_location_location(examples_dataframe)`

"""Returns a version of the input `DataFrame` that keeps only the latitude and longitude."""

processed_features = DataFrame()

processed_features[:latitude] = linear_scale(examples_dataframe[:latitude])

processed_features[:longitude] = linear_scale(examples_dataframe[:longitude])

return processed_features

end

lll_dataframe = location_location_location(preprocess_features(california_housing_dataframe))

lll_training_examples = head(lll_dataframe,12000)

lll_validation_examples = tail(lll_dataframe,5000)

p1, lll_training_rmse, lll_validation_rmse = train_nn_regression_model(

train.AdamOptimizer(0.15),

500,

100,

[10, 10, 5, 5],

1.0,

lll_training_examples,

training_targets,

lll_validation_examples,

validation_targets)

Out[35]:

In [36]:

`plot(p1)`

Out[36]:

In [ ]:

`#EOF`