MNIST Example
This is an example of a very simple neural network that is trained on the MNIST dataset. The network is a simple feedforward network with 2 hidden layers.
Download the data
Begin by downloading the data from here (opens in a new tab).
Create main.py file
Create your main.py
file to begin writing code.
Import Libraries
import numpy as np
import os
import gzip
import numpy as np
from tinygrad.nn import Linear
from tinygrad.helpers import Timing
from tinygrad.tensor import Tensor
from tinygrad.helpers import dtypes
from tinygrad.nn.optim import SGD
Fetch MNIST Data
def fetch_mnist():
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
X_train = parse(os.path.dirname(__file__)+"/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_train = parse(os.path.dirname(__file__)+"/train-labels-idx1-ubyte.gz")[8:]
X_test = parse(os.path.dirname(__file__)+"/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_test = parse(os.path.dirname(__file__)+"/t10k-labels-idx1-ubyte.gz")[8:]
return X_train, Y_train, X_test, Y_test
Define the model
This is just going to be a model with two hidden layers
class TinyNet:
def __init__(self):
self.l1 = Linear(784, 128, bias=False)
self.l2 = Linear(128, 10, bias=False)
def __call__(self, x):
x = self.l1(x)
x = x.leakyrelu()
x = self.l2(x)
return x
net = TinyNet()
Training
# Training
Tensor.training = True
opt = SGD([net.l1.weight, net.l2.weight], lr=3e-4)
X_train, Y_train, X_test, Y_test = fetch_mnist()
for step in range(20000):
# random sample a batch
samp = np.random.randint(0, X_train.shape[0], size=(64))
batch = Tensor(X_train[samp], requires_grad=False)
# get the corresponding labels
labels = Tensor(Y_train[samp])
# forward pass
out = net(batch)
# compute loss
loss = Tensor.sparse_categorical_crossentropy(out, labels)
opt.zero_grad()
loss.backward()
opt.step()
pred = out.argmax(axis=-1)
acc = (pred == labels).mean()
if step % 100 == 0:
print(f"Step {step} - Loss {loss.numpy()} - Accuracy {acc.numpy()}")
Evaluation
Getting the results of your model.
# Evaluation
with Timing("Time: "):
avg_acc = 0
for step in range(2000):
samp = np.random.randint(0, X_test.shape[0], size=(64))
batch = Tensor(X_test[samp], requires_grad=False)
labels = Tensor(Y_test[samp])
out = net(batch)
pred = out.argmax(axis=-1).numpy()
avg_acc += (pred == labels.numpy()).mean()
print(f"Accuracy {avg_acc / 2000}")
Entire Code
import numpy as np
from tinygrad.nn import Linear
from tinygrad.helpers import Timing
from tinygrad.tensor import Tensor
from tinygrad.helpers import dtypes
from tinygrad.nn.optim import SGD
import os
import gzip
import numpy as np
def fetch_mnist():
parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
X_train = parse(os.path.dirname(__file__)+"/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_train = parse(os.path.dirname(__file__)+"/train-labels-idx1-ubyte.gz")[8:]
X_test = parse(os.path.dirname(__file__)+"/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
Y_test = parse(os.path.dirname(__file__)+"/t10k-labels-idx1-ubyte.gz")[8:]
return X_train, Y_train, X_test, Y_test
class TinyNet:
def __init__(self):
self.l1 = Linear(784, 128, bias=False)
self.l2 = Linear(128, 10, bias=False)
def __call__(self, x):
x = self.l1(x)
x = x.leakyrelu()
x = self.l2(x)
return x
net = TinyNet()
# Training
Tensor.training = True
opt = SGD([net.l1.weight, net.l2.weight], lr=3e-4)
X_train, Y_train, X_test, Y_test = fetch_mnist()
for step in range(20000):
# random sample a batch
samp = np.random.randint(0, X_train.shape[0], size=(64))
batch = Tensor(X_train[samp], requires_grad=False)
# get the corresponding labels
labels = Tensor(Y_train[samp])
# forward pass
out = net(batch)
# compute loss
loss = Tensor.sparse_categorical_crossentropy(out, labels)
opt.zero_grad()
loss.backward()
opt.step()
pred = out.argmax(axis=-1)
acc = (pred == labels).mean()
if step % 100 == 0:
print(f"Step {step} - Loss {loss.numpy()} - Accuracy {acc.numpy()}")
# Evaluation
with Timing("Time: "):
avg_acc = 0
for step in range(2000):
samp = np.random.randint(0, X_test.shape[0], size=(64))
batch = Tensor(X_test[samp], requires_grad=False)
labels = Tensor(Y_test[samp])
out = net(batch)
pred = out.argmax(axis=-1).numpy()
avg_acc += (pred == labels.numpy()).mean()
print(f"Accuracy {avg_acc / 2000}")