How to properly configure brain.js Neural Network

I am using the Auto MPG Training Kit from http://archive.ics.uci.edu/ml/datasets/Auto+MPG

My code:

'use strict';
var brain, fs, normalizeData, trainNetwork, _;

_ = require('lodash');

brain = require('brain');

fs = require('fs');

trainNetwork = function(trainNetworkCb) {
  var net;
  net = new brain.NeuralNetwork();
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, trainingData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    trainingData = lines.splice(0, lines.length / 2);
    trainingData = _.map(trainingData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        input: normalizedData,
        output: {
          continuous: normalizedData.continuous
        }
      };
      delete obj.input.continuous;
      return obj;
    });
    net.train(trainingData, {
      log: true,
      logPeriod: 100,
      errorThresh: 0.00005
    });
    return trainNetworkCb(null, net);
  });
};

trainNetwork(function(err, net) {
  if (err) {
    throw err;
  }
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, testData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    testData = lines.splice(lines.length / 2);
    testData = _.filter(testData, function(point) {
      return point !== '';
    });
    testData = _.map(testData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        output: {
          continuous: normalizedData.continuous
        },
        input: normalizedData
      };
      delete obj.input.continuous;
      return obj;
    });
    return _.each(testData, function(dataPoint) {
      var output;
      output = net.run(dataPoint.input);
      console.log(output);
      console.log(dataPoint);
      return console.log('');
    });
  });
});

normalizeData = function(dataRow) {
  var cylinders, dataSet, model_years, origins, row;
  dataSet = dataRow.split(',');
  dataSet = _.map(dataSet, function(point) {
    return Number(point);
  });
  row = {};
  cylinders = [5, 3, 6, 4, 8];
  _.each(cylinders, function(cylinder) {
    row["cylinder" + cylinder] = cylinder === dataSet[0] ? 1 : 0;
  });
  row.displacement = dataSet[1] / 500;
  row.horsepower = dataSet[2] / 500;
  row.weight = dataSet[3] / 10000;
  row.acceleration = dataSet[4] / 100;
  model_years = [82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70];
  _.each(model_years, function(model_year) {
    row["model_year" + model_year] = model_year === dataSet[5] ? 1 : 0;
  });
  origins = [2, 3, 1];
  _.each(origins, function(origin) {
    row["origin" + origin] = origin === dataSet[6] ? 1 : 0;
  });
  row.continuous = dataSet[7] / 100;
  return row;
};

      

I believe I am normalizing everything correctly. I use half of the data for training and the other half for testing. The data is not ordered as far as I can tell, so which half is used, for which it doesn't matter.

My mistakes are pretty big, but in testing. Usually at 10MPG or so (30% error). What am I doing wrong?

thank

+3


source to share


1 answer


The dataset you link is ordered by the model year; perhaps radical changes in technology have made engines more efficient? Neural networks depend on correct results during training. I would try training the network with all but the last line and then testing using that. Can you link me the csv file you are using? The normalizeData function doesn't give us what you want to associate with the file ( http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data )

edit:

It seems that no matter what you specify errorThresh

, the brain won't run more than 20,000 iterations on the training tracks. There are several ways to get around this. You can specify learningRate

your neural network. Raising the training level to 0.6 (default 0.3) helped me get more accurate results

net.train(trainingData, {
  log: true,
  logPeriod: 100,
  errorThresh: 0.00005,
  learningRate: 0.6
});

      



Taller learningRate

means more aggressive weight adjustment, which helps when you don't run as many iterations as you want.

Alternatively, you can specify the total number of iterations in the options object (if not specified, it defaults to 20,000 - see here ).

net.train(trainingData, {
  log: true,
  logPeriod: 100,
  errorThresh: 0.00005,
  iterations: 100000
});

      

The brain stops training when i < iterations && error > errorThresh

evaluates to false. So feel free to check the number of iterations to make sure the expression is above false because it error

is below your specified errorTresh

( source ).

+2


source







All Articles