Examples
MNIST

MNIST Example

This is an example of a very simple neural network that is trained on the MNIST dataset. The network is a simple feedforward network with 2 hidden layers.

Download the data

Begin by downloading the data from here (opens in a new tab).

  • t10k-images-idx3-ubyte.gz
  • t10k-labels-idx1-ubyte.gz
  • train-images-idx3-ubyte.gz
  • train-labels-idx1-ubyte.gz
  • Create main.py file

    Create your main.py file to begin writing code.

  • t10k-images-idx3-ubyte.gz
  • t10k-labels-idx1-ubyte.gz
  • train-images-idx3-ubyte.gz
  • train-labels-idx1-ubyte.gz
  • main.py
  • Import Libraries

    import numpy as np
    import os
    import gzip
    import numpy as np
    from tinygrad.nn import Linear
    from tinygrad.helpers import Timing
    from tinygrad.tensor import Tensor
    from tinygrad.helpers import dtypes
    from tinygrad.nn.optim import SGD

    Fetch MNIST Data

    def fetch_mnist():
      parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
      X_train = parse(os.path.dirname(__file__)+"/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
      Y_train = parse(os.path.dirname(__file__)+"/train-labels-idx1-ubyte.gz")[8:]
      X_test = parse(os.path.dirname(__file__)+"/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
      Y_test = parse(os.path.dirname(__file__)+"/t10k-labels-idx1-ubyte.gz")[8:]
      return X_train, Y_train, X_test, Y_test

    Define the model

    This is just going to be a model with two hidden layers

    class TinyNet:
        def __init__(self):
            self.l1 = Linear(784, 128, bias=False)
            self.l2 = Linear(128, 10, bias=False)
     
        def __call__(self, x):
            x = self.l1(x)
            x = x.leakyrelu()
            x = self.l2(x)
            return x
     
     
    net = TinyNet()

    Training

    # Training
    Tensor.training = True
     
    opt = SGD([net.l1.weight, net.l2.weight], lr=3e-4)
     
    X_train, Y_train, X_test, Y_test = fetch_mnist()
     
    for step in range(20000):
        # random sample a batch
        samp = np.random.randint(0, X_train.shape[0], size=(64))
        batch = Tensor(X_train[samp], requires_grad=False)
        # get the corresponding labels
        labels = Tensor(Y_train[samp])
     
        # forward pass
        out = net(batch)
     
        # compute loss
        loss = Tensor.sparse_categorical_crossentropy(out, labels)
     
        opt.zero_grad()
     
        loss.backward()
     
        opt.step()
     
        pred = out.argmax(axis=-1)
        acc = (pred == labels).mean()
     
        if step % 100 == 0:
            print(f"Step {step} - Loss {loss.numpy()} - Accuracy {acc.numpy()}")

    Evaluation

    Getting the results of your model.

    # Evaluation
    with Timing("Time: "):
        avg_acc = 0
     
        for step in range(2000):
            samp = np.random.randint(0, X_test.shape[0], size=(64))
            batch = Tensor(X_test[samp], requires_grad=False)
            labels = Tensor(Y_test[samp])
     
            out = net(batch)
     
            pred = out.argmax(axis=-1).numpy()
            avg_acc += (pred == labels.numpy()).mean()
     
        print(f"Accuracy {avg_acc / 2000}")

    Entire Code

    import numpy as np
    from tinygrad.nn import Linear
    from tinygrad.helpers import Timing
    from tinygrad.tensor import Tensor
    from tinygrad.helpers import dtypes
    from tinygrad.nn.optim import SGD
    import os
    import gzip
    import numpy as np
     
    def fetch_mnist():
      parse = lambda file: np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()
      X_train = parse(os.path.dirname(__file__)+"/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
      Y_train = parse(os.path.dirname(__file__)+"/train-labels-idx1-ubyte.gz")[8:]
      X_test = parse(os.path.dirname(__file__)+"/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28)).astype(np.float32)
      Y_test = parse(os.path.dirname(__file__)+"/t10k-labels-idx1-ubyte.gz")[8:]
      return X_train, Y_train, X_test, Y_test
     
    class TinyNet:
        def __init__(self):
            self.l1 = Linear(784, 128, bias=False)
            self.l2 = Linear(128, 10, bias=False)
     
        def __call__(self, x):
            x = self.l1(x)
            x = x.leakyrelu()
            x = self.l2(x)
            return x
     
     
    net = TinyNet()
     
    # Training
    Tensor.training = True
     
    opt = SGD([net.l1.weight, net.l2.weight], lr=3e-4)
     
    X_train, Y_train, X_test, Y_test = fetch_mnist()
     
    for step in range(20000):
        # random sample a batch
        samp = np.random.randint(0, X_train.shape[0], size=(64))
        batch = Tensor(X_train[samp], requires_grad=False)
        # get the corresponding labels
        labels = Tensor(Y_train[samp])
     
        # forward pass
        out = net(batch)
     
        # compute loss
        loss = Tensor.sparse_categorical_crossentropy(out, labels)
     
        opt.zero_grad()
     
        loss.backward()
     
        opt.step()
     
        pred = out.argmax(axis=-1)
        acc = (pred == labels).mean()
     
        if step % 100 == 0:
            print(f"Step {step} - Loss {loss.numpy()} - Accuracy {acc.numpy()}")
     
    # Evaluation
     
    with Timing("Time: "):
        avg_acc = 0
     
        for step in range(2000):
            samp = np.random.randint(0, X_test.shape[0], size=(64))
            batch = Tensor(X_test[samp], requires_grad=False)
            labels = Tensor(Y_test[samp])
     
            out = net(batch)
     
            pred = out.argmax(axis=-1).numpy()
            avg_acc += (pred == labels.numpy()).mean()
     
        print(f"Accuracy {avg_acc / 2000}")
    ;