1
+ import copy , numpy as np
2
+ np .random .seed (0 )
3
+
4
+ # compute sigmoid nonlinearity
5
+ def sigmoid (x ):
6
+ output = 1 / (1 + np .exp (- x ))
7
+ return output
8
+
9
+ # convert output of sigmoid function to its derivative
10
+ def sigmoid_output_to_derivative (output ):
11
+ return output * (1 - output )
12
+
13
+ # training dataset generation
14
+ int2binary = {}
15
+ binary_dim = 8
16
+
17
+ largest_number = pow (2 ,binary_dim )
18
+ binary = np .unpackbits (
19
+ np .array ([range (largest_number )],dtype = np .uint8 ).T ,axis = 1 )
20
+ for i in range (largest_number ):
21
+ int2binary [i ] = binary [i ]
22
+
23
+ # input variables
24
+ alpha = 0.1
25
+ input_dim = 2
26
+ hidden_dim = 16
27
+ output_dim = 1
28
+
29
+
30
+ # initialize neural network weights
31
+ synapse_0 = 2 * np .random .random ((input_dim ,hidden_dim )) - 1
32
+ synapse_1 = 2 * np .random .random ((hidden_dim ,output_dim )) - 1
33
+ synapse_h = 2 * np .random .random ((hidden_dim ,hidden_dim )) - 1
34
+
35
+ synapse_0_update = np .zeros_like (synapse_0 )
36
+ synapse_1_update = np .zeros_like (synapse_1 )
37
+ synapse_h_update = np .zeros_like (synapse_h )
38
+
39
+ # training logic
40
+ for j in range (10000 ):
41
+
42
+ # generate a simple addition problem (a + b = c)
43
+ a_int = np .random .randint (largest_number / 2 ) # int version
44
+ a = int2binary [a_int ] # binary encoding
45
+
46
+ b_int = np .random .randint (largest_number / 2 ) # int version
47
+ b = int2binary [b_int ] # binary encoding
48
+
49
+ # true answer
50
+ c_int = a_int + b_int
51
+ c = int2binary [c_int ]
52
+
53
+ # where we'll store our best guess (binary encoded)
54
+ d = np .zeros_like (c )
55
+
56
+ overallError = 0
57
+
58
+ layer_2_deltas = list ()
59
+ layer_1_values = list ()
60
+ layer_1_values .append (np .zeros (hidden_dim ))
61
+
62
+ # moving along the positions in the binary encoding
63
+ for position in range (binary_dim ):
64
+
65
+ # generate input and output
66
+ X = np .array ([[a [binary_dim - position - 1 ],b [binary_dim - position - 1 ]]])
67
+ y = np .array ([[c [binary_dim - position - 1 ]]]).T
68
+
69
+ # hidden layer (input ~+ prev_hidden)
70
+ layer_1 = sigmoid (np .dot (X ,synapse_0 ) + np .dot (layer_1_values [- 1 ],synapse_h ))
71
+
72
+ # output layer (new binary representation)
73
+ layer_2 = sigmoid (np .dot (layer_1 ,synapse_1 ))
74
+
75
+ # did we miss?... if so, by how much?
76
+ layer_2_error = y - layer_2
77
+ layer_2_deltas .append ((layer_2_error )* sigmoid_output_to_derivative (layer_2 ))
78
+ overallError += np .abs (layer_2_error [0 ])
79
+
80
+ # decode estimate so we can print it out
81
+ d [binary_dim - position - 1 ] = np .round (layer_2 [0 ][0 ])
82
+
83
+ # store hidden layer so we can use it in the next timestep
84
+ layer_1_values .append (copy .deepcopy (layer_1 ))
85
+
86
+ future_layer_1_delta = np .zeros (hidden_dim )
87
+
88
+ for position in range (binary_dim ):
89
+
90
+ X = np .array ([[a [position ],b [position ]]])
91
+ layer_1 = layer_1_values [- position - 1 ]
92
+ prev_layer_1 = layer_1_values [- position - 2 ]
93
+
94
+ # error at output layer
95
+ layer_2_delta = layer_2_deltas [- position - 1 ]
96
+ # error at hidden layer
97
+ layer_1_delta = (future_layer_1_delta .dot (synapse_h .T ) + layer_2_delta .dot (synapse_1 .T )) * sigmoid_output_to_derivative (layer_1 )
98
+
99
+ # let's update all our weights so we can try again
100
+ synapse_1_update += np .atleast_2d (layer_1 ).T .dot (layer_2_delta )
101
+ synapse_h_update += np .atleast_2d (prev_layer_1 ).T .dot (layer_1_delta )
102
+ synapse_0_update += X .T .dot (layer_1_delta )
103
+
104
+ future_layer_1_delta = layer_1_delta
105
+
106
+
107
+ synapse_0 += synapse_0_update * alpha
108
+ synapse_1 += synapse_1_update * alpha
109
+ synapse_h += synapse_h_update * alpha
110
+
111
+ synapse_0_update *= 0
112
+ synapse_1_update *= 0
113
+ synapse_h_update *= 0
114
+
115
+ # print out progress
116
+ if (j % 1000 == 0 ):
117
+ print "Error:" + str (overallError )
118
+ print "Pred:" + str (d )
119
+ print "True:" + str (c )
120
+ out = 0
121
+ for index ,x in enumerate (reversed (d )):
122
+ out += x * pow (2 ,index )
123
+ print str (a_int ) + " + " + str (b_int ) + " = " + str (out )
124
+ print "------------"
0 commit comments