# Waste classification using Deep Learning

Khatuna Kakhiani\
HLRS, UniversitÃ¤t Stuttgart
Online course: Deep Learning and GPU programming using OpenACC\
17.07.2020

Waste sorting is a more complex task than just assigning the material labels: Plastic, Glass, Metal, Paper or Composite. 
In this example we will learn litter clasification using a small dataset[1]. Litter consists of waste products that have been discarded incorrectly, without consent, at an unsuitable location.


In [1]:
#Import libraries and modules 

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)    # ignores warnings about future version of numpy

#For JSON data
import json

#For interacting with operating system 
import os

#For copying files
import shutil

#For modeling
import tensorflow as tf
from tensorflow import keras
#from keras.callbacks import TensorBoard
#import pandas as pd 

#For vector/array operations
import numpy as np
from numpy import asarray
from random import sample
import random 
from random import shuffle
import math

#Load the TensorBoard notebook extension
%load_ext tensorboard
#%reload_ext tensorboard

import tensorboard
print('Tensorboard', tensorboard.__version__)

#For loading and preprocessing images
from PIL import Image 
import matplotlib.pyplot as plt


import datetime

#For fine-grained evaluation metrics
import sklearn
print('Sklearn version', sklearn.__file__)

print('Tensorflow version', tf.__version__)
print('Numpy version', np.__version__)

Tensorboard 1.14.0
Sklearn version /home/khat/.local/lib/python3.6/site-packages/sklearn/__init__.py
Tensorflow version 1.14.0
Numpy version 1.18.4


## 1. Dataset exploration 

1.0 Identify current working directories and clear logs from previous runs

In [2]:
#Current directory 
print(os.getcwd())
!rm -rf ./logs/

/home/khat/TACO/TACO/Notebooks


1.1 Load annotations: annotations.json[1] contains annotations in COCO[2] format 

In [3]:
data_dir = os.path.join(os.getcwd(), os.pardir, "data")     #your job: define data_dir
anno_file = os.path.join(data_dir, "annotations.json")      #your job: path for *.json file

In [4]:
with open(anno_file, "r") as f: # annotations.json is a nested dictionary (keys are mapped to another dictionary within original dictionary)                     
    annotations = json.load(f)  # your job: parse JSON string        

1.2 Explore annotations dictionary

Exercise: explore each annotation key

In [5]:
#Number of images
len(annotations["images"])

1500

In [6]:
#Number of annotations 
len(annotations["annotations"]) 

4784

1.2.1 Question to audience: Explain why there are more annotations than images ?

In [None]:
#Image information
annotations["images"][320]

In [None]:
annotations["images"][1210]  #your job: explore the annotation

In [None]:
#1.2.2 Exercise: explore annotation for image_id 6
for anno in annotations ["annotations"]:
    if anno["image_id"]==6:
        print(anno)

In [None]:
annotations["categories"][11]

In [None]:
for anno in annotations ["scene_annotations"]:
    if anno["image_id"]==6:
        print(anno)


1.2.3 Exercise for advanced participants: explore nested dictionary in details.

# 2. Preprocessing   

For simplicity, we create a simplified dictionary w.r.t to each image and its associated categories. We store only a subset of information, e.g. 'image_id', 'file_name', 'height', 'width', 'category_ids', 'category_names', and 'super_categories'

In [None]:
# 2.1 Open annotation file and read into memory
with open(anno_file, "r") as f:
    annotations = json.load(f)

In [None]:
# 2.2 Prepare category id to name mappings. Items are ordered by category_id, so you can get the
# category name of a category_id via the category_id, e.g.
# via annotations["categories"][category_id]
categories = annotations["categories"]

In [None]:
#2.3 Create new python dictionary with subset of relevant information (e.g. image -> category data)
data = {}
for i, item in enumerate(annotations["annotations"]):
    #Map image_id to image filename using the "images" part of the dataset.
    image_id = item["image_id"]
    image_info = annotations["images"][image_id]
    file_name = image_info["file_name"]
    height,width = image_info["height"], image_info["width"]
    
    #Map category_id of instance to category name
    category_id = item["category_id"]
    category_info = categories[category_id]
    category_name = category_info["name"]
    super_category = category_info["supercategory"]
    
    #A labeled image can have multiple categories, so check if we have already added to the dictionary (e.g. if it's in the keys)
    if image_id in data.keys():
        data[image_id]["category_ids"].add(category_id)
        data[image_id]["category_names"].add(category_name)
        data[image_id]["super_categories"].add(super_category)
    else:
        data[image_id] = {"file_name": file_name, "category_ids": {category_id}, "image_id": image_id, "height": height, "width": width, "category_names": {category_name}, "super_categories": {super_category}}


In [None]:
len(data)

In [None]:
data.keys()   

In [None]:
data[320] #labels in particular instance in our dataset (image_id = 320)

# 3. Binary Classification

In [None]:
#We can construct a binary classification problem in a one vs all setting, e.g. does this image contain a specific 
# supercategory or not. Let's create the numpy arrays corresponding to the images and labels that we can use for training.

#3.1 Split data into training, validation, and test

data_ids = list(data.keys())
random.shuffle(data_ids) #Shuffle list of keys and store

#Configure proportion of training, validation, and test data
train_perc = 0.8
val_perc = 0.1
test_perc = 0.1
train_size=int(len(data_ids)*train_perc)
val_size=int(len(data_ids)*val_perc)
train_ids, val_ids, test_ids = (
    data_ids[:train_size],
    data_ids[train_size : train_size + val_size],
    data_ids[train_size + val_size :],
    )

print("Number of images in training dataset:", len(train_ids))
print("Training image_ids:", train_ids)

print("Number of images in validation dataset:", len(val_ids))
print("Validation image_ids:", val_ids)

print("Number of images in  dataset:", len(test_ids))
print("test image_ids:", test_ids)

print(len(test_ids))

In [None]:
#3.2 Define helper function for loading data and converting to numpy arrrays

def load_data(ids, data, supercategory):
    num_instances = len(ids)
    max_height, max_width =  , 1000 #TBD revisit max heigh and width
    labels = np.zeros((num_instances,))
    images = np.zeros((num_instances,max_height,max_width))

    for i, image_id in enumerate(ids):
        #Convert labels into a binary classification problem (e.g. 0 or 1 depending on the super_category)
        if supercategory in data[image_id]["super_categories"]:
            labels[i] = 1

        #Load images into numpy arrays
        try: 
            image = Image.open(os.path.join(data_dir,data[image_id]["file_name"])).convert("L")  # Grayscale
            image = image.resize(size=(max_height, max_width))
            images[i] = asarray(image) #Convert and store as numpy array

        except Exception as e:
            print(e) #Use this to catch and print exceptions
    return images, labels

In [None]:
#3.3 Define supercategory of interest (in this case 'Bottle') and load training, validation, and test data 
supercategory = "Bottle"
train_images, train_labels = load_data(train_ids, data, supercategory)
val_images, val_labels = load_data(val_ids, data, supercategory)
test_images, test_labels = load_data(test_ids, data, supercategory)

In [None]:
#LECTURE ONLY: save numpy arrays for faster loading during lecture
work_dir = os.path.join(os.getcwd(), "work_data")  # directory: combination of current directory and saved model string
arr_file_trimg = os.path.join(work_dir, "train_images")        # path to the trained model to save  
arr_file_bl = os.path.join(work_dir, "train_labels")        # path to the trained model to save

#Check if directory exists. If not, create it
try:
    os.stat(work_dir)
except:
    os.mkdir(work_dir)

np.save(arr_file_trimg, train_images, allow_pickle=False, fix_imports=False)
np.save(arr_file_bl, train_labels, allow_pickle=False, fix_imports=False)

In [None]:
#LECTURE ONLY: load saved numpy arrays instead of computed ones

train_images=np.load(os.path.join(work_dir, "train_images.npy"), mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')
train_labels=np.load(os.path.join(work_dir, "train_labels.npy"), mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII')

In [None]:
#3.4 Summarize training, validation, and test data.
#Notice the datasets are not balanced (e.g. proportion in supercategory <50%).
#This is especially important for what evaluation metrics to use.
print('Training Number of instances: {:d}, Proportion in supercategory: {:f}%'.format(len(train_labels), 100*sum(train_labels)/len(train_labels)))
print('Validation Number of instances: {:d}, Proportion in supercategory: {:f}%'.format(len(val_labels), 100*sum(val_labels)/len(val_labels)))
print('Test Number of instances: {:d}, Proportion in supercategory: {:f}%'.format(len(test_labels), 100*sum(test_labels)/len(test_labels)))

In [None]:
#3.5 We scale these values to a range of 0 to 1 before feeding to the neural network model !  
train_images = train_images / 255.0
val_images = val_images / 255.0
test_images = test_images / 255.0

In [None]:
#3.6 End-to-end Exercise: Verify that the data is in the correct format and ready to build and train the network.
#3.6.1 View colored image
example_index = 0
example_id = train_ids[example_index]
img_path = data[example_id]["file_name"]
image = Image.open(os.path.join(data_dir, img_path))
image

In [None]:
#3.6.2 Compare to grayscaled and resized version
image = image.convert("L")
image = image.resize(size=(500, 500))
image

In [None]:
#3.6.3 View corresponding pixel representation (before normalization)
asarray(image)

In [None]:
#3.6.4 View corresponding pixel representation (after normalization)
print(train_images[example_index])

In [None]:
#3.6.5 Annotations corresponding to this image
print(data[example_id])

In [None]:
#3.6.6 Label corresponding to image
print(train_labels[example_index])

# 4. Model Training

In [None]:
#4.1 CNN overview and visualizing an example
from IPython.display import YouTubeVideo
#Convolutional Neural Networks (CNNs) 
#Video credit: Deeplizard
YouTubeVideo('YRhxdVk_sIs')

In [None]:
%load_ext tensorboard
%reload_ext tensorboard

In [None]:
#4.2 Configure model using built-in keras functionality
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(1000, 1000)),
    keras.layers.Dense(128, activation=tf.nn.relu),
  #  keras.layers.Dropout(0.1),
#    keras.layers.Dense(128, activation=tf.nn.relu), # one can add additional layers
    keras.layers.Dense(2, activation=tf.nn.softmax)
])

Building the neural network requires configuring the layers of the model, then compiling the model.

The first layer in this network, tf.keras.layers.Flatten, transforms the format of the images from a 2d-array (of 1000 by 1000 pixels), to a 1d-array of 28 * 28 = 784 pixels. Think of this layer as unstacking rows of pixels in the image and lining them up. This layer has no parameters to learn; it only reformats the data.

After the pixels are flattened, the network consists of a sequence of two tf.keras.layers.Dense layers. These are densely-connected, or fully-connected, neural layers. The first and second Dense layer has 128 nodes (or neurons). The last layer is a 2-node softmax layer, this returns an array of 2 probability scores that sum to 1.

Each node contains a score that indicates the probability that the current image belongs to one of the 2 classes.

Before the model is ready for training, it needs a few more settings. These are added during the model's compile step:

    Loss function; This measures how accurate the model is during training. We want to minimize this function to "steer" the model in the right direction.

    Optimizer; This is how the model is updated based on the data it sees and its loss function.

    Metrics; Used to monitor the training and testing steps. The following example uses accuracy, the fraction of the images that are correctly classified.

In [None]:
#4.3 Overview of model training and optimization
from IPython.display import YouTubeVideo
#Gradient descent, how neural networks learn  
#Video credit: 3BLUE1BROWN SERIES  S3  E2
YouTubeVideo('IHZwWFHWa-w')

In [None]:
#4.4 Configure loss function using built-in keras functionality
optimizer = keras.optimizers.Adam(lr=0.001) #learning rate is a hyper-parameter that we are fixing for now
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)


#Print a summary of the model to understand model complexity
print(model.summary())

Train the model

Training the neural network model requires the following steps:

    Feed the training data to the model - in this example, the train_images and train_labels arrays.
    The model learns to associate images and labels.
    We ask the model to make predictions about a test set in this example, the test_images array. We verify that the predictions match the labels from the bottle_labels_test array.

#To start training, call the model.fit method; the model is "fit" to the training data:



In [None]:
#4.5 Train model based on previously defined parameters

model.fit(train_images,
          train_labels,
          epochs=20, #Number of epochs is a hyper-parameter we are fixing for now
          callbacks=[tensorboard_callback]) #Default batch size is 32 (when not specified)

#Define directory for storing models
model_dir = os.path.join(os.getcwd(), "saved_model")  # directory: combination of current directory and saved model string
mod_file = os.path.join(model_dir, "my_model")        # path to the trained model to save

#Create directory if it doesn't already exist
try:
    os.stat(model_dir)
except:
    os.mkdir(model_dir)
print(mod_file)

#Save trained model
model.save(mod_file) 

In [None]:
# Reload saved model 
#new_model = tf.keras.models.load_model(mod_file)

# Check its architecture
#new_model.summary()



# 5. Model Evaluation

In [None]:
#5.2 Determine how much performance varies between training and unseen (e.g. validation or test) data.
#Notice the large difference between training and unseen data, indicating the need for regularization.
model.evaluate(train_images, train_labels)
model.evaluate(val_images, val_labels)

In [None]:
#5.3 Determine appropriate evaluation metrics
#In section 3.4, we saw the dataset is imbalanced. This means accuracy is not an approriate evaluation measure.
#Let's use precision, recall, and f1 to measure the model's performance on the supercategory of interest.
# Precision = TP / TP + FP ... i.e. how many selected items are relevant
# Recall = TP / TP + FN ... i.e. how many relevant items are selected

#5.3.1 Get predicted model scores
#A prediction is an array of 2 numbers. These describe the "confidence" of the model that the image corresponds
#to bottle or no bottle (i.e each different litter).
#We can see which label has the highest confidence value using maxium value. Alternative approaches involve 
#using thresholds for predictions (depending on whether precision or recall is preferred).

prediction_scores = model.predict(val_images)

prediction_scores[80]

In [None]:
#5.3.2 Convert predicted scores to class labels
# argmax returns the index of the maximum value in a list
# e.g. a = [1,5,10] ... a.argmax() will return the index 2, as 10 is the highest value at index position 2
# axis determines the axis in the a matrix or tensor
# e.g. a = [[1,5,10], [11,5,1]] ... a.argmax() = 3 because argmax flattens the matrix into a 1-D vector if 
# no axis is provided. a.argmax(axis=1) will result in a list of indices for each sublist like so [2,0] as
# 10 with index 2 has the highest value in the first list, and 11 with index 0 is highest in the other list
predictions = prediction_scores.argmax(axis=1)
predictions[80]

In [None]:
#5.3.3 Generate detailed report with desired evaluation metrics
#Notice the lower performance on the minority class (supercategory) and how this was not captured in the accuracy metric.
from sklearn.metrics import classification_report
print(classification_report(val_labels, predictions))

# 6. Regularization and Hyper-Parameter Tuning

In [None]:
# 6.1 Define the search space for the hyper-parameters
parameters = {"learning_rate": [0.001, 0.005, 0.01, 0.02],
              "batch_size": [1, 10, 50, 100],
              "layer_dimension": [8, 32, 64, 128],
              "dropout_rate": [0, 0.05, 0.1, 0.2]
             }
# 6.2 Create helper function for model training based on hyper-parameters
def train_iteration(learning_rate, batch_size, layer_dimension, dropout_rate, train_images, train_labels):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(1000, 1000)),
        keras.layers.Dense(layer_dimension, activation=tf.nn.relu),
        keras.layers.Dropout(dropout_rate),
      #  keras.layers.Dropout(0.1),
    #    keras.layers.Dense(128, activation=tf.nn.relu), # one can add additional layers
        keras.layers.Dense(2, activation=tf.nn.softmax)
    ])
    optimizer = keras.optimizers.Adam(lr=learning_rate) #learning rate is a hyper-parameter that we are fixing for now
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_images,
          train_labels,
          epochs=20, #Number of epochs is a hyper-parameter we are fixing for now
          callbacks=[tensorboard_callback]) #Default batch size is 32 (when not specified)
    model.save(os.path.join(model_dir, "model_lr_"+str(learning_rate)+"_batch_size_"+str(batch_size)+"_layer_dimension_"+str(layer_dimension)+"_dropout_rate_"+str(dropout_rate)))
    return model

#6.3 Iteratively explore the search space
for lr in parameters["learning_rate"]:
    for batch_size in parameters["batch_size"]:
        for layer_dimension in parameters["layer_dimension"]:
            for dropout in parameters["dropout_rate"]:
                model = train_iteration(lr, batch_size, layer_dimension, dropout, train_images, train_labels)
                prediction_scores = model.predict(val_images)
                predictions = prediction_scores.argmax(axis=1)
                print(classification_report(val_labels, predictions))

## References 

In [None]:
[1] http://tacodataset.org dataset