Update: My Neural Network Posts
- Basic Auto Differentiation: Post
- Basic Auto Differentiation with Weight Sharing: Post
- Auto Differentiation Code from Book: Post
- Refactoring The Ideas: Post
import NeuralNetork._ import breeze.linalg._ trait Computation { def fwd(requestID: Int, parents: Array[Int], results: ComputationTable): DenseMatrix[Double] def bwd(requestID: Int, parents: Array[Int], gradient: DenseMatrix[Double], inputs: ComputationTable): DenseMatrix[Double] } class NeuralNetork(val children: Graph, val parents: Graph, cmp: Computations) { def fwd(requestID: Int, results: ComputationTable): Unit = { if(results(requestID) == None) { for(p <- parents(requestID)) { fwd(p, results) } results(requestID) = Some(cmp(requestID).fwd(requestID, parents(requestID), results)) } } def bwd(requestID: Int, gradients: ComputationTable, inputs: ComputationTable): DenseMatrix[Double] = { if (gradients(requestID) != None) gradients(requestID).get else { val g = for (c <- children(requestID)) yield { val gradient = bwd(c, gradients, inputs) cmp(c).bwd(requestID, parents(c), gradient, inputs) } gradients(requestID) = Some(g.reduce(_ + _)) gradients(requestID).get } } } object NeuralNetork { type ComputationTable = Array[Option[DenseMatrix[Double]]] type Graph = Array[Array[Int]] type Computations = Map[Int, Computation] }In the end this computation does the same thing as the previous code, it is just another way of writing things down. The Computations can then be implemented in the same way as earlier. For more details check out Chapter 6 of the book. Now we can actually implement our nodes. In this example I am only using Add and Mul
class Add extends Computation { def fwd(requestID: Int, parents: Array[Int], results: ComputationTable): DenseMatrix[Double] = { results(parents(0)).get + results(parents(1)).get } def bwd(requestID: Int, parents: Array[Int], gradient: DenseMatrix[Double], inputs: ComputationTable): DenseMatrix[Double] = gradient } class Mul extends Computation { def fwd(requestID: Int, parents: Array[Int], results: ComputationTable): DenseMatrix[Double] = { results(parents(0)).get :* results(parents(1)).get } def bwd(requestID: Int, parents: Array[Int], gradient: DenseMatrix[Double], inputs: ComputationTable): DenseMatrix[Double] = { if(parents(0) == requestID) inputs(parents(1)).get :* gradient else inputs(parents(0)).get :* gradient } }And then we can set up a simple computation:
- a := -2
- b := 5
- c := -4
- d := 2
- (a + b) * c
- (a + b) * d
val in = Array.fill[Option[DenseMatrix[Double]]](7)(None) in(0) = Some(DenseMatrix.ones[Double](1, 2) * -2.0) in(1) = Some(DenseMatrix.ones[Double](1, 2) * 5.0) in(2) = Some(DenseMatrix.ones[Double](1, 2) * -4.0) in(3) = Some(DenseMatrix.ones[Double](1, 2) * 2.0) val cmp = Map( 4 -> new Add, 5 -> new Mul, 6 -> new Mul ) val children = Array( Array(4), Array(4), Array(5), Array(6), Array(5, 6), Array.empty[Int], Array.empty[Int] ) val parents = Array( Array.empty[Int], Array.empty[Int], Array.empty[Int], Array.empty[Int], Array(0, 1), Array(2, 4), Array(3, 4) ) val nn = new NeuralNetork(children, parents, cmp) val gradients = Array.fill[Option[DenseMatrix[Double]]](7)(None) gradients(5) = Some(DenseMatrix.ones[Double](1, 2) * 1.0) gradients(6) = Some(DenseMatrix.ones[Double](1, 2) * 1.0) nn.fwd(5, in) nn.fwd(6, in) println(in(5)) println(in(6)) println("---") nn.bwd(0, gradients, in) nn.bwd(1, gradients, in) nn.bwd(2, gradients, in) nn.bwd(3, gradients, in) gradients.zipWithIndex.foreach(println)The computation for the forward pass results in:
- Some(-12.0 -12.0 )
- Some(6.0 6.0 )
- a = (Some(-2.0 -2.0 ),a)
- b = (Some(-2.0 -2.0 ),b)
- c = (Some(3.0 3.0 ),c)
- d = (Some(3.0 3.0 ),d)
I also implemented a feed forward neural network with two layers:
class Add extends Computation { def fwd( requestID: Int, parents: Array[Int], results: ComputationTable ): DenseMatrix[Double] = { results(parents(0)).get + results(parents(1)).get } def bwd( requestID: Int, parents: Array[Int], gradient: DenseMatrix[Double], inputs: ComputationTable ): DenseMatrix[Double] = gradient } class Mul extends Computation { def fwd( requestID: Int, parents: Array[Int], results: ComputationTable ): DenseMatrix[Double] = { results(parents(0)).get * results(parents(1)).get } def bwd( requestID: Int, parents: Array[Int], gradient: DenseMatrix[Double], inputs: ComputationTable ): DenseMatrix[Double] = if(parents(0) == requestID) gradient * inputs(parents(1)).get.t else inputs(parents(0)).get.t * gradient } class Sigmoid extends Computation { def fwd( requestID: Int, parents: Array[Int], results: ComputationTable ): DenseMatrix[Double] = sigmoid(results(parents(0)).get) def bwd( requestID: Int, parents: Array[Int], gradient: DenseMatrix[Double], inputs: ComputationTable ): DenseMatrix[Double] = (1.0 - fwd(requestID, parents, inputs)) :* fwd(requestID, parents, inputs) :* gradient } def twoLayerNet = { val update = Array(1, 2, 3, 4) val output = 10 val cmp = Map( 5 -> new Mul, 6 -> new Add, 7 -> new Sigmoid, 8 -> new Mul, 9 -> new Add, 10 -> new Sigmoid ) val children = Array( Array(5), Array(5), Array(6), Array(8), Array(9), Array(6), Array(7), Array(8), Array(9), Array(10), Array.empty[Int] ) val parents = Array( Array.empty[Int], Array.empty[Int], Array.empty[Int], Array.empty[Int], Array.empty[Int], Array(0, 1), Array(5, 2), Array(6), Array(7, 3), Array(8, 4), Array(9) ) val nn = new NeuralNetork(children, parents, cmp) (nn, output, update) }
On the spiral dataset and on the MNIST dataset I got around 98% accuracy.