Thursday, 5 March 2009

K-means clustering in C++

The following program use in-line k-means algorithm to train the centroids of the K-means clustering.

Before using this code you have to change data loading function in loadTrainingData.

The beauty of this program is you do not need to worry about what is the best value of K. You could just run for range of K values (ExperimentBestKValue) and and choose the best perform K value to re-train and predict the your target (ExperimentBestK). A kind of Evolutionary algorithm... don't you agree?

// KMeanClustring.cpp : Defines the entry point for the console application.
// Author: Rudra Poudel

//include libraries
#include "stdafx.h"
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<float.h>
#include<conio.h>
#include<time.h>

//******************************
//** Setting input parameter
//******************************
#define EXPERIMENTAL_TRIALS 1000 //Numbers of trails to estimate the average value of K's success
#define EXPERIMENTAL_TRIALS_BEST_K 1 //10000 //Numbers of trails to estimate the best centroids with K centroids
#define EXPERIMENTAL_K_FROM 2 //Starting K value for experiment
#define EXPERIMENTAL_K_TO 100 //End K value for experiment
#define TESTING_SAMPLE_SIZE 50 //Testing sample size will be taken form the end of the input data
#define NUMBER_OF_TESTING_DATA 100 //Number of testing data
//#define K 20 // value of K
#define LEARNING_RATE 0.1 // Learning rate
#define MAX_TRAIN_ITERATION 50000 //Maximum nos of iteration to train the networks
#define INPUTS_COUNT 365 // Number of input patterns i.e. training sets
#define NN 6 // Number of inputs/neurons (6 Inputs )

#define PRINT_OPTION 2
//********************************

//Random generator parameters
#define IA 16807
#define IM 2147483647
#define AM (1. /IM)
#define IQ 127773
#define IR 2836
#define NTAB 32
#define NDIV (1+(IM-1)/NTAB)
#define EPS 1.2e-7
#define RNMX (1.0-EPS)
long idum;

enum PrintOptions
{
poNone = 0, //Dont print to DAT file
poTraining = 2 //Printing whole training process
};

//Function decleration
void ExperimentBestKValue();
void ExperimentBestK(int exp_k);
float getRand(long *idum);
bool loadTrainingData(double input[INPUTS_COUNT][NN], double targets[INPUTS_COUNT]);
void moveTowards(double centroid[NN], double datum[NN]);
int getClosestCentroid(double datum[NN], double** centroids, int maxK);
double getDistance(double datum[NN], double centroid[NN]);
bool isOldAndNewSame(double** oldCentroids, double** centroids, int maxK);
double assignClusterTarget(double inputs[INPUTS_COUNT][NN], double** centroids, double targets[INPUTS_COUNT], int maxK, int testingSampleSize);
double predictTarget(double inputs[INPUTS_COUNT][NN], double** centroids, double targets[INPUTS_COUNT], int maxK, int testingSampleSize);


///<FunctionName>_tmain</FunctionName>
///<arg name='argc'>Size of argv[]</arg>
///<arg name='argv[]'>Array inputs</arg>
///<Summary>Main entry point of the function. It will train the all possible target patterns for NN-1 inputs for 2-class problem.</Summary>
///<Output> 0 for success, 1 for fail.</Output>
int _tmain(int argc, _TCHAR* argv[])
{
//ExperimentBestKValue();
//ExperimentBestK(EXPERIMENTAL_TRIALS_BEST_K);
ExperimentBestK(23);
return 0;
}

///<FunctionName>ExperimentBestKValue</FunctionName>
///<Summary>Function to experiment.</Summary>
///<Output>SuccessPecentage.dat- which contains average success percent for different value of K</Output>
void ExperimentBestKValue(){
int i,j, epoch, higestSuccessPecntOfAllK=-1;
double sumTotalSuccessPecent,higestSuccessPecnt=-100, higestSuccessPecntForCurrentK=-100,tempSuccessPecent;
char filename[25];
FILE *pSuccessPec;

//Input patterns, last one is always 1 as it is input for bias
double inputs[INPUTS_COUNT][NN];
double targets[INPUTS_COUNT];
double **centroids;
double **oldCentroids;
//int centroidsTargets[K]; //0 or 1 which ever greater
//int clusterPoints[K][2]; //0= Numbers of points; i=miss classified

idum = -time(0);
if(!loadTrainingData(inputs,targets))
return;

if(PRINT_OPTION==poTraining){
sprintf(filename,"SuccessPecentage.dat");
pSuccessPec = fopen(filename,"w");
}

for(int exp_k=EXPERIMENTAL_K_FROM;exp_k<=EXPERIMENTAL_K_TO;exp_k++){
centroids = (double **) malloc(exp_k * sizeof(double *));
oldCentroids = (double **) malloc(exp_k * sizeof(double *) );
for(i=0;i<exp_k;i++) {
centroids[i] = (double *) malloc(NN * sizeof(double));
oldCentroids[i] = (double *) malloc(NN * sizeof(double));
}

sumTotalSuccessPecent = 0;
higestSuccessPecntForCurrentK = -100;
for(int exp_trials=0;exp_trials<EXPERIMENTAL_TRIALS;exp_trials++){

for(i=0;i<exp_k;i++){
for(j=0;j<NN;j++){
centroids[i][j] = getRand(&idum);
}
}

epoch=0;
do{//START: do ... while loop
for(i=0;i<exp_k;i++){
for(j=0;j<NN;j++){
oldCentroids[i][j] = centroids[i][j];
}
}

for (i=0;i<(INPUTS_COUNT-NUMBER_OF_TESTING_DATA-TESTING_SAMPLE_SIZE);i++) {
moveTowards(centroids[getClosestCentroid(inputs[i],centroids,exp_k)], inputs[i]);
}
epoch++;
//printf("epoch: %d\n",epoch);

}while(!isOldAndNewSame(oldCentroids,centroids,exp_k) && epoch<MAX_TRAIN_ITERATION);
//END: do ... while loop
tempSuccessPecent = assignClusterTarget(inputs, centroids, targets,exp_k, TESTING_SAMPLE_SIZE);
if(higestSuccessPecntForCurrentK<tempSuccessPecent){
higestSuccessPecntForCurrentK = tempSuccessPecent;
}
if(higestSuccessPecnt<tempSuccessPecent){
higestSuccessPecnt = tempSuccessPecent;
higestSuccessPecntOfAllK = exp_k;
}
sumTotalSuccessPecent += tempSuccessPecent;
}//END: for ... EXPERIMENTAL_TRIALS
if(PRINT_OPTION==poTraining){
fprintf(pSuccessPec,"%d\t%f\t%f\n",exp_k, sumTotalSuccessPecent/EXPERIMENTAL_TRIALS,higestSuccessPecntForCurrentK);
}
for(i=0;i<exp_k;i++) {
free(centroids[i]);
free(oldCentroids[i]);
}
free(centroids);
free(oldCentroids);

printf("K = %d finished.\n", exp_k);
}//END: for ... EXPERIMENTAL_K_FROM


if(PRINT_OPTION==poTraining){
fclose(pSuccessPec);

sprintf(filename,"BestKandPecentage.dat");
FILE* pFile = fopen(filename,"w");
fprintf(pFile,"%d %f\n",higestSuccessPecntOfAllK,higestSuccessPecnt);
fclose(pFile);
}

/*printf("\n");
printf("\n\nPress any to to close the program...");
getche();*/
}




///<FunctionName>ExperimentBestK</FunctionName>
///<arg name='exp_K'>Value of K</arg>
///<Summary>Function to experiment with multi starting point of K and choose the best predicted K.</Summary>
///<Output>Training_Function.dat- Best K for clustring</Output>
void ExperimentBestK(int exp_k){
int i,j, epoch;
double successPecent, bestSuccessPercent=-100;
char filename[25];
FILE *pFile;

//Input patterns, last one is always 1 as it is input for bias
double inputs[INPUTS_COUNT][NN];
double targets[INPUTS_COUNT];
double **centroids;
double **oldCentroids;
double **bestCentroids;
//int centroidsTargets[K]; //0 or 1 which ever greater
//int clusterPoints[K][2]; //0= Numbers of points; i=miss classified

idum = -time(0);
if(!loadTrainingData(inputs,targets))
return;

if(PRINT_OPTION==poTraining){
sprintf(filename,"KMeanCentroids.dat");
pFile = fopen(filename,"w");
}

centroids = (double **) malloc(exp_k * sizeof(double *));
oldCentroids = (double **) malloc(exp_k * sizeof(double *) );
bestCentroids = (double **) malloc(exp_k * sizeof(double *) );
for(i=0;i<exp_k;i++) {
centroids[i] = (double *) malloc(NN * sizeof(double));
oldCentroids[i] = (double *) malloc(NN * sizeof(double));
bestCentroids[i] = (double *) malloc(NN * sizeof(double));
}

for(int exp_trials=0;exp_trials<EXPERIMENTAL_TRIALS_BEST_K;exp_trials++){

for(i=0;i<exp_k;i++){
for(j=0;j<NN;j++){
centroids[i][j] = getRand(&idum);
}
}

epoch=0;
do{//START: do ... while loop
for(i=0;i<exp_k;i++){
for(j=0;j<NN;j++){
oldCentroids[i][j] = centroids[i][j];
}
}

for (i=0;i<(INPUTS_COUNT-NUMBER_OF_TESTING_DATA);i++) {
moveTowards(centroids[getClosestCentroid(inputs[i],centroids,exp_k)], inputs[i]);
}
epoch++;
//printf("epoch: %d\n",epoch);

}while(!isOldAndNewSame(oldCentroids,centroids,exp_k) && epoch<MAX_TRAIN_ITERATION);
//END: do ... while loop
successPecent = assignClusterTarget(inputs, centroids, targets,exp_k,INPUTS_COUNT-NUMBER_OF_TESTING_DATA);
if(bestSuccessPercent<successPecent){
bestSuccessPercent = successPecent;
for(i=0;i<exp_k;i++){
for(j=0;j<NN;j++){
bestCentroids[i][j] = centroids[i][j];
}
}

}
}//END: for ... EXPERIMENTAL_TRIALS

if(PRINT_OPTION==poTraining){

fprintf(pFile,"%d %f\n\n",exp_k, bestSuccessPercent);
//Printing Centroids
for(i=0;i<exp_k;i++){
for(j=0;j<NN;j++){
fprintf(pFile,"%f\t",bestCentroids[i][j]);
}
printf("\n");
}
}

successPecent = predictTarget(inputs, bestCentroids, targets,exp_k, INPUTS_COUNT-NUMBER_OF_TESTING_DATA);
if(PRINT_OPTION==poTraining){

fprintf(pFile,"\n%d %f",exp_k, successPecent);
printf("\n");
fclose(pFile);
}
for(i=0;i<exp_k;i++) {
free(centroids[i]);
free(oldCentroids[i]);
free(bestCentroids[i]);
}
free(centroids);
free(oldCentroids);
free(bestCentroids);

printf("\n");
printf("\n\nPress any to to close the program...");
getche();
}


///<FunctionName>getRand</FunctionName>
///<arg name='*idum'>Seed for random number generator</arg>
///<Summary>Function follows the Random Number Generators from Numerical Recipes in C, v2.0 (C) Copr. 1986-92 Numerical Recipes Software.</Summary>
///<Output>Random number between 0-1 range.</Output>
float getRand(long *idum)
{
int j;
long k;
static long iy=0;
static long iv[NTAB];
float temp;

if (*idum <= 0 || !iy) {
if (-(*idum) < 1) *idum = 1;
else *idum = -(*idum);
for (j=NTAB+7; j>=0; j--) {
k = (*idum)/IQ;
*idum=IA*(*idum-k*IQ)-IR*k;
if (*idum <0) *idum += IM;
if (j < NTAB) iv[j] = *idum;
}
iy = iv[0];
}
k = (*idum)/IQ;
*idum=IA*(*idum-k*IQ)-IR*k;
if (*idum < 0) *idum += IM;
j = iy/NDIV;
iy=iv[j];
iv[j] = *idum;
if ((temp=AM*iy) > RNMX) return RNMX;
else return temp;
}


///<FunctionName>loadTrainingData</FunctionName>
///<arg name='input[INPUTS_COUNT][NN]'>Training data</arg>
///<arg name='targets[INPUTS_COUNT]'>Target class for each input with same index number</arg>
///<Summary>Load the training data form txt file.</Summary>
///<Output>ture for success, false for fail.</Output>
bool loadTrainingData(double input[INPUTS_COUNT][NN], double targets[INPUTS_COUNT]){
int tempInt;
float tempFloat;
FILE *pData;

pData = fopen("ex1-data.txt","r");
if(pData==NULL) {
printf("Error: can't access data file.\n");
return false; // terminate with error
}

for(int i=0;i<INPUTS_COUNT;i++){
for(int j=0;j<NN;j++){
fscanf (pData, "%f", &tempFloat);
input[i][j]= tempFloat;
}
fscanf (pData, "%d", &tempInt);
targets[i]= tempInt;
}
fclose(pData);

return true;
}

///<FunctionName>getClosestCentroid</FunctionName>
///<arg name='datum[NN]'>One training data</arg>
///<arg name='** centroids'>K Centroids</arg>
///<Summary>Retrun the closest centroids of the given training data among K-centroids.</Summary>
///<Output>Index of closest centroids.</Output>
int getClosestCentroid(double datum[NN], double** centroids, int maxK) {
double distance;
double min = 99999999999;
int k = -1;
for (int i=0;i<maxK;i++) {
distance = getDistance(datum, centroids[i]);
if (distance < min) { k = i; min = distance; }
}
return(k);
}

///<FunctionName>getDistance</FunctionName>
///<arg name='datum[NN]'>One training data</arg>
///<arg name='centroid[NN]'>One Centroid</arg>
///<Summary>Retrun the distance betweent given training data and centroid.</Summary>
///<Output>Distance betweent given training data and centroid.</Output>
double getDistance(double datum[NN], double centroid[NN]) {
double distance = 0.0;
for (int i=0;i<NN;i++) {
distance += pow(datum[i] - centroid[i], 2);
}
return(sqrt(distance));
}


///<FunctionName>moveTowards</FunctionName>
///<arg name='centroid[NN]'>One Centroid</arg>
///<arg name='datum[NN]'>One training data</arg>
///<Summary>Change centroid postion according to distance between points and centroid and global learning rate.</Summary>
///<Output>Void. Node centroid changed by reference</Output>
void moveTowards(double centroid[NN], double datum[NN]) {
for (int i=0;i<NN;i++) {
centroid[i] += ((datum[i] - centroid[i]) * LEARNING_RATE); }
}

///<FunctionName>isOldAndNewSame</FunctionName>
///<arg name='** oldCentroids'>Old centroids</arg>
///<arg name='** centroids'>New centroids</arg>
///<arg name='maxK'>Number of K</arg>
///<Summary>Compare whether all old centroids and new centroids are same or not.</Summary>
///<Output>True if both centroids are same else false.</Output>
bool isOldAndNewSame(double** oldCentroids, double** centroids, int maxK) {
for(int i=0;i<maxK;i++){
for(int j=0;j<NN;j++){
if(oldCentroids[i][j]!=centroids[i][j])
return false;
}
}
return true;
}


///<FunctionName>assignClusterTarget</FunctionName>
///<arg name='inputs[INPUTS_COUNT][NN]'>Input values</arg>
///<arg name='** centroids'>Centroids</arg>
///<arg name='targets[INPUTS_COUNT]'>Targets</arg>
///<arg name='maxK'>Number of K</arg>
///<arg name='testingSampleSize'>Size of testing sample</arg>
///<Summary>Calculate the centroids targets.</Summary>
///<Output>Return the correct classified percent of the last TESTING_SAMPLE_SIZE data.</Output>
double assignClusterTarget(double inputs[INPUTS_COUNT][NN], double** centroids, double targets[INPUTS_COUNT], int maxK,int testingSampleSize){
int totalCorrectClassified=0, totalMissClassified=0;
int** centroidsTargetCounter; //0= Target 0; 1= Target 1
int* centroidsTarget; //Target as eighter 0 or 1

centroidsTargetCounter = (int**) malloc(sizeof(int *) * maxK);
centroidsTarget = (int*) malloc(sizeof(int) * maxK);
for(int i=0;i<maxK;i++) {
centroidsTargetCounter[i] = (int*) malloc(sizeof(double) * 2);
}

for(int i=0;i<maxK;i++){
for(int j=0;j<2;j++){
centroidsTargetCounter[i][j] = 0;
}
}
int testingAdjust;
if((INPUTS_COUNT-NUMBER_OF_TESTING_DATA)==testingSampleSize)
testingAdjust = 0;
else
testingAdjust = testingSampleSize;

for (int i=0;i<(INPUTS_COUNT-NUMBER_OF_TESTING_DATA-testingAdjust);i++) {
int nearestCentroid = getClosestCentroid(inputs[i],centroids,maxK);
if(targets[i]==0)
centroidsTargetCounter[nearestCentroid][0]++;
else
centroidsTargetCounter[nearestCentroid][1]++;
}

for (int i=0;i<maxK;i++) {
if(centroidsTargetCounter[i][0]>centroidsTargetCounter[i][1]){
centroidsTarget[i]=0;
}
else{
centroidsTarget[i]=1;
}
}

for (int i=(INPUTS_COUNT-NUMBER_OF_TESTING_DATA-testingSampleSize);i<(INPUTS_COUNT-NUMBER_OF_TESTING_DATA);i++) {
int nearestCentroid = getClosestCentroid(inputs[i],centroids, maxK);
if(targets[i]==centroidsTarget[nearestCentroid])
totalCorrectClassified++;
else
totalMissClassified++;
}

for(int i=0;i<maxK;i++) {
free(centroidsTargetCounter[i]);
}
free(centroidsTargetCounter);
free(centroidsTarget);

//printf("%f\n",( ((double)totalCorrectClassified/testingSampleSize) * 100));

return ( ( (((double)totalCorrectClassified)/testingSampleSize) * 100));
}

///<FunctionName>predictTarget</FunctionName>
///<arg name='inputs[INPUTS_COUNT][NN]'>Input values</arg>
///<arg name='** centroids'>Centroids</arg>
///<arg name='targets[INPUTS_COUNT]'>Targets</arg>
///<arg name='maxK'>Number of K</arg>
///<arg name='testingSampleSize'>Size of testing sample</arg>
///<Summary>Calculate the centroids targets.</Summary>
///<Output>Void.</Output>
double predictTarget(double inputs[INPUTS_COUNT][NN], double** centroids, double targets[INPUTS_COUNT], int maxK, int testingSampleSize){
int totalCorrectClassified=0, totalMissClassified=0;
int** centroidsTargetCounter; //0= Target 0; 1= Target 1
int* centroidsTarget; //Target as eighter 0 or 1

centroidsTargetCounter = (int**) malloc(sizeof(int *) * maxK);
centroidsTarget = (int*) malloc(sizeof(int) * maxK);
for(int i=0;i<maxK;i++) {
centroidsTargetCounter[i] = (int*) malloc(sizeof(double) * 2);
}

for(int i=0;i<maxK;i++){
for(int j=0;j<2;j++){
centroidsTargetCounter[i][j] = 0;
}
}

int testingAdjust;
if(INPUTS_COUNT-NUMBER_OF_TESTING_DATA==testingSampleSize)
testingAdjust = 0;
else
testingAdjust = testingSampleSize;

for (int i=0;i<INPUTS_COUNT-NUMBER_OF_TESTING_DATA-testingAdjust;i++) {
int nearestCentroid = getClosestCentroid(inputs[i],centroids,maxK);
if(targets[i]==0)
centroidsTargetCounter[nearestCentroid][0]++;
else
centroidsTargetCounter[nearestCentroid][1]++;
}

for (int i=0;i<maxK;i++) {
if(centroidsTargetCounter[i][0]>centroidsTargetCounter[i][1]){
centroidsTarget[i]=0;
totalCorrectClassified +=centroidsTargetCounter[i][0];
}
else{
centroidsTarget[i]=1;
totalCorrectClassified +=centroidsTargetCounter[i][1];
}
}


char filename[20];
FILE* pFile;
sprintf(filename,"Prediction.txt");
pFile = fopen(filename,"w");
for (int i=(INPUTS_COUNT-NUMBER_OF_TESTING_DATA);i<INPUTS_COUNT;i++) {
fprintf(pFile,"%d\n", centroidsTarget[getClosestCentroid(inputs[i],centroids, maxK)]);
}
fclose(pFile);

for(int i=0;i<maxK;i++) {
free(centroidsTargetCounter[i]);
}
free(centroidsTargetCounter);
free(centroidsTarget);

//printf("%f\n",( ((double)totalCorrectClassified/testingSampleSize) * 100));

return ( ((double)totalCorrectClassified/testingSampleSize) * 100);
}

Backpropagation in Matlab

Following code is written in Matlab using back propagation algorithm for error propagation to hidden layers. It used batch update mode and has ability to work with N numbers of hidden layers also as many as required numbers of nodes in each hidden layer.

%References: Patterson, D. [Online]Available from: http://www.csee.umbc.edu/~dpatte3/nn/backprop.html [accessed 24 February 2009]
function MLPUsingBackpropagation()
[x1,x2,x3,x4,x5,x6,t] = textread('ex1-data.txt','%f %f %f %f %f %f %f');

trainingIndex=0;
testingIndex=0;
for i=1:265
if (rand > 0.2)
trainingIndex = trainingIndex + 1;
trainingInputs(trainingIndex,1) = x1(i);
trainingInputs(trainingIndex,2) = x2(i);
trainingInputs(trainingIndex,3) = x3(i);
trainingInputs(trainingIndex,4) = x4(i);
trainingInputs(trainingIndex,5) = x5(i);
trainingInputs(trainingIndex,6) = x6(i);

trainingOutputs(trainingIndex,1) = t(i);

else
testingIndex = testingIndex + 1;
testingInputs(testingIndex,1) = x1(i);
testingInputs(testingIndex,2) = x2(i);
testingInputs(testingIndex,3) = x3(i);
testingInputs(testingIndex,4) = x4(i);
testingInputs(testingIndex,5) = x5(i);
testingInputs(testingIndex,6) = x6(i);

testingOutput(testingIndex,1) = t(i);

end

end
%***** Memory Clean up **********
clear x1
clear x2
clear x3
clear x4
clear x5
clear x6
clear t
%********************************
layers = [6 23 23 1];

for i=1:50

Output = BackpropagationTraining(trainingInputs,trainingOutputs,0.1,0.05,0.01,layers);

OutputTesting = BackpropagationTesting(testingInputs, testingOutput, layers, Output.weights);
OutputTesting = BackpropagationTesting(testingInputs, testingOutput, layers, Output.bestweights);
end

%***** Memory Clean up **********
clear trainingInputs
clear testingInputs
clear layers
%********************************

end

function Results = BackpropagationTraining(inputs,targets,learningRate,momentumWeight,minimumMSE,layers)

[trainingDataCount,inputNodesCount] = size(inputs);
[targetDataCount,targetNodesCount] = size(targets);

if trainingDataCount ~= targetDataCount
error('BackpropagationTraining:TrainingAndTargetDataLengthMismatch', 'The number of input vectors and desired ouput vectors do not match');
end

if length(layers) < 3
error('BackpropagationTraining:InvalidNetworkStructure','The network must have at least 3 layers');
end

if inputNodesCount ~= layers(1)
msg = sprintf('Dimensions of input nodes (%d) does not match with numbers of input layer (%d).',inputNodesCount,layers(1));
error('BackpropagationTraining:InvalidInputLayerSize', msg);
end

if targetNodesCount ~= layers(end)
msg = sprintf('Dimensions of output nodes (%d) does not match with numbers of output layer (%d)',targetNodesCount,layers(end));
error('BackpropagationTraining:InvalidOutLayerSize', msg);
end

leayersLength = length(layers);

weights = cell(leayersLength-1,1);
for i=1:leayersLength-2
weights{i} = [-15 + 30 .* rand(layers(i+1),layers(i)+1); zeros(1,layers(i)+1)];
end
weights{end} = -15 + 30 .* rand(layers(end),layers(end-1)+1);

MSE = Inf;
epochs = 0;

activation = cell(leayersLength,1);
activation{1} = [inputs ones(trainingDataCount,1)]; % activation{1} is the input + 1 for the bias node activation
% activation{1} remains the same throught the computation
for i=2:leayersLength-1
activation{i} = ones(trainingDataCount,layers(i)+1); % inner layers include a bias node (trainingDataCount-by-Nodes+1)
end
activation{end} = ones(trainingDataCount,layers(end)); % no bias node at output layer

net = cell(leayersLength-1,1); % one net matrix for each layer exclusive input
for i=1:leayersLength-2;
net{i} = ones(trainingDataCount,layers(i+1)+1); % affix bias node
end
net{end} = ones(trainingDataCount,layers(end));

previousDeltaW = cell(leayersLength-1,1);
sumDeltaW = cell(leayersLength-1,1);
for i=1:leayersLength-1
previousDeltaW{i} = zeros(size(weights{i})); % previousDeltaW starts at 0
sumDeltaW{i} = zeros(size(weights{i}));
end

lowestsse = 999999;
bestweights = weights;

while MSE > minimumMSE && epochs < 3000

for i=1:leayersLength-1
net{i} = activation{i} * weights{i}'; % compute inputs to current layer

if i < leayersLength-1 % inner layers
%a{i+1} = [2./(1+exp(-net{i}(:,1:end-1)))-1
%ones(trainingDataCount,1)]; % for bi-polar
activation{i+1} = [1./(1+exp(-net{i}(:,1:end-1))) ones(trainingDataCount,1)]; % for sigmoid
%activation{i+1} = [(net{i}(:,1:end-1)) ones(trainingDataCount,1)]; %without sigmoid i.e for linear activation function
else % output layers
%a{i+1} = 2 ./ (1 + exp(-net{i})) - 1;
%activation{i+1} = 1 ./ (1 + exp(-net{i}));
%a{i+1} = net{i};
activation{end} = net{i} > 0;
end
end

% calculate sum squared error of all samples
err = (targets-activation{end}); % save this for later
sse = sum(sum(err.^2)); % sum of the error for all samples, and all nodes
if(lowestsse>sse)
lowestsse = sse;
bestweights = weights;
end

%delta = err .* (1 + a{end}) .* (1 - a{end});
%delta = err .* a{end} .* (1 - a{end});
%delta = err;
for
delta = err + weights{i}' weights{i};
for i=leayersLength-1:-1:1
sumDeltaW{i} = learningRate * delta' * activation{i};
if i > 1
%delta = (1+a{i}) .* (1-a{i}) .* (delta*weights{i});
delta = activation{i} .* (1-activation{i}) .* (delta*weights{i});
%delta = (delta*weights{i}); % where there is no activation
%function
end
end

% update the prev_w, weight matrices, epoch count and MSE
for i=1:leayersLength-1
previousDeltaW{i} = (sumDeltaW{i} ./ trainingDataCount) + (momentumWeight * previousDeltaW{i});
weights{i} = weights{i} + previousDeltaW{i};
end
epochs = epochs + 1;
MSE = sse/(trainingDataCount*targetNodesCount); % MSE = 1/trainingDataCount * 1/targetNodesCount * summed squared error
%MSEPerEpoch(epochs) = MSE;

end

%printing error
% hold on
% axis ( [0 3000 0 1]);
% x = 1:1:3000;
% plot(x,MSEPerEpoch);
% hold off
% s=input('Press enter to continue, enter 0 to stop \n');

% return the trained network
Results.structure = layers;
Results.weights = weights;
Results.bestweights = bestweights;
Results.epochs = epochs;
Results.MSE = MSE;
sse
lowestsse

end

function Results = BackpropagationTesting(inputs,targets,layers, weights)

[trainingDataCount,inputNodesCount] = size(inputs);
[targetDataCount,targetNodesCount] = size(targets);

if trainingDataCount ~= targetDataCount
error('BackpropagationTraining:TrainingAndTargetDataLengthMismatch', 'The number of input vectors and desired ouput vectors do not match');
end

leayersLength = length(layers);

activation = cell(leayersLength,1);
activation{1} = [inputs ones(trainingDataCount,1)]; % activation{1} is the input + 1 for the bias node activation
% activation{1} remains the same throught the computation
for i=2:leayersLength-1
activation{i} = ones(trainingDataCount,layers(i)+1); % inner layers include a bias node (trainingDataCount-by-Nodes+1)
end
activation{end} = ones(trainingDataCount,layers(end)); % no bias node at output layer

net = cell(leayersLength-1,1); % one net matrix for each layer exclusive input
for i=1:leayersLength-2;
net{i} = ones(trainingDataCount,layers(i+1)+1); % affix bias node
end
net{end} = ones(trainingDataCount,layers(end));

for i=1:leayersLength-1
net{i} = activation{i} * weights{i}'; % compute inputs to current layer

if i < leayersLength-1 % inner layers
%a{i+1} = [2./(1+exp(-net{i}(:,1:end-1)))-1 ones(trainingDataCount,1)];
activation{i+1} = [1./(1+exp(-net{i}(:,1:end-1))) ones(trainingDataCount,1)];
%a{i+1} = [(-net{i}(:,1:end-1)) ones(trainingDataCount,1)];
else % output layers
%a{i+1} = 2 ./ (1 + exp(-net{i})) - 1;
%activation{i+1} = 1 ./ (1 + exp(-net{i}));
activation{end} = net{i} > 0;
%a{i+1} = net{i};
end
end

%activation{end} = activation{end} > 0.5;
% calculate sum squared error of all samples
err = (targets-activation{end}); % save this for later
sse = sum(sum(err.^2)); % sum of the error for all samples, and all nodes

MSE = sse/(trainingDataCount*targetNodesCount); % MSE = 1/trainingDataCount * 1/targetNodesCount * summed squared error

% return the trained network
Results.MSE = MSE;
Results.sse = (sse/trainingDataCount)*100;
100-((sse/trainingDataCount)*100)

end