Download the Dataset and Glove Embeddings¶

!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/sadness-ratings-0to1.train.txt
!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/anger-ratings-0to1.train.txt
!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/fear-ratings-0to1.train.txt
!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/joy-ratings-0to1.train.txt

!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip /content/glove.6B.zip

Import Libraries¶

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

Load and Prepocess the Dataset¶

Joy = open("/content/joy-ratings-0to1.train.txt", "r+")
Joy = Joy.read()

Fear = open("/content/fear-ratings-0to1.train.txt", "r+")
Fear = Fear.read()

Anger = open("/content/anger-ratings-0to1.train.txt", "r+")
Anger = Anger.read()

Sadness = open("/content/sadness-ratings-0to1.train.txt", "r+")
Sadness = Sadness.read()

def preprocessing(File):
  X = []
  Y = []
  List = list(File.split("\n"))
  for i in range(len(List)):
    try:
      List[i] = list(List[i].split("\t"))
      if float(List[i][3]) >= 0.4:
        X.append(List[i][1])
        if List[i][2] == 'joy':
          Y.append(0)
        elif List[i][2] == 'fear':
          Y.append(1)
        elif List[i][2] == 'anger':
          Y.append(2)
        elif List[i][2] == 'sadness':
          Y.append(3)
      elif float(List[i][3]) >= 0.2:
        X.append(List[i][1])
        Y.append(4)
    except:
      pass
  
  return X, Y

class_names = {0: 'joy', 1: 'fear', 2: 'anger', 3: 'sadness', 4: 'neutral'}

X_Joy, Y_Joy = preprocessing(Joy)
X_Fear, Y_Fear = preprocessing(Fear)
X_Anger, Y_Anger = preprocessing(Anger)
X_Sadness, Y_Sadness = preprocessing(Sadness)

X = []
Y = []

X = X_Joy + X_Fear + X_Anger + X_Sadness
Y = Y_Joy + Y_Fear + Y_Anger + Y_Sadness

embeddings = open("/content/glove.6B.200d.txt")

embeddings_index = {}
for line in embeddings:
  word, coefs = line.split(maxsplit = 1)
  coefs = np.fromstring(coefs, "f", sep = " ")
  embeddings_index[word] = coefs

MAX_LENGTH = 200
vectorizer = TextVectorization(max_tokens = 20000, output_sequence_length = MAX_LENGTH)
vectorizer.adapt(X)

vocablary = vectorizer.get_vocabulary()
word_index = dict(zip(vocablary, range(len(vocablary))))

Build Weight Matrix

num_tokens = len(vocablary) + 2
EMBEDDING_DIM = 200
hits = 0
misses = 0

weight_matrix = np.zeros((num_tokens, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        weight_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1
print("Converted %d words (%d misses)" % (hits, misses))

Converted 6962 words (3422 misses)

X = vectorizer(np.array([[s] for s in X])).numpy()
Y = np.array(Y)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

Build the Model using Subclassing API of TensorFlow¶

class Model(tf.keras.Model):
  
  def __init__(self, weight_matrix, EMBEDDING_DIM, MAX_LENGTH):
    super(Model, self).__init__()
    self.Embedding = tf.keras.layers.Embedding(input_dim = len(weight_matrix),
                                               output_dim = EMBEDDING_DIM,
                                               embeddings_initializer = tf.keras.initializers.Constant(weight_matrix),
                                               trainable = False,
                                               input_length = MAX_LENGTH)
    self.Bidirectional = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, dropout = 0.2, recurrent_dropout = 0.2))
    self.Dense = tf.keras.layers.Dense(512, activation = 'relu')
    self.Dropout = tf.keras.layers.Dropout(0.5)
    self.Output = tf.keras.layers.Dense(5, activation = 'softmax')
  
  def call(self, inputs):
    x = self.Embedding(inputs)
    x = self.Bidirectional(x)
    x = self.Dense(x)
    x = self.Dropout(x)
    x = self.Output(x)
    return x

model = Model(weight_matrix, EMBEDDING_DIM, MAX_LENGTH)

model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics = ['accuracy'])

history = model.fit(X_train, Y_train, epochs = 10, validation_data = (X_test, Y_test), batch_size = 32)

Epoch 1/10
86/86 [==============================] - 105s 1s/step - loss: 1.5472 - accuracy: 0.3050 - val_loss: 1.3282 - val_accuracy: 0.4217
Epoch 2/10
86/86 [==============================] - 101s 1s/step - loss: 1.2521 - accuracy: 0.4631 - val_loss: 1.2490 - val_accuracy: 0.4729
Epoch 3/10
86/86 [==============================] - 95s 1s/step - loss: 1.1026 - accuracy: 0.5464 - val_loss: 1.0529 - val_accuracy: 0.5842
Epoch 4/10
86/86 [==============================] - 95s 1s/step - loss: 0.9473 - accuracy: 0.6154 - val_loss: 1.0031 - val_accuracy: 0.5900
Epoch 5/10
86/86 [==============================] - 96s 1s/step - loss: 0.7860 - accuracy: 0.6903 - val_loss: 0.9672 - val_accuracy: 0.6164
Epoch 6/10
86/86 [==============================] - 94s 1s/step - loss: 0.6980 - accuracy: 0.7323 - val_loss: 0.9578 - val_accuracy: 0.6237
Epoch 7/10
86/86 [==============================] - 95s 1s/step - loss: 0.6499 - accuracy: 0.7409 - val_loss: 0.9831 - val_accuracy: 0.6252
Epoch 8/10
86/86 [==============================] - 95s 1s/step - loss: 0.5846 - accuracy: 0.7766 - val_loss: 1.0090 - val_accuracy: 0.6369
Epoch 9/10
86/86 [==============================] - 95s 1s/step - loss: 0.4988 - accuracy: 0.8098 - val_loss: 1.0110 - val_accuracy: 0.6428
Epoch 10/10
86/86 [==============================] - 94s 1s/step - loss: 0.4528 - accuracy: 0.8195 - val_loss: 1.1210 - val_accuracy: 0.6457

Create a plot of accuracy and loss over time¶

history_dict = history.history

loss = history_dict["loss"]
accuracy = history_dict["accuracy"]
val_loss = history_dict["val_loss"]
val_accuracy = history_dict["val_accuracy"]

epochs = range(1, len(loss) + 1)

plt.figure(figsize = (20, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, loss, label = "Training Loss")
plt.plot(epochs, val_loss, label = "Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, accuracy, label = "Training Accuracy")
plt.plot(epochs, val_accuracy, label = "Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

Make Predictions¶

def prediction(sentence, class_names):
  prediction = vectorizer(np.array([[s] for s in sentence])).numpy()
  prediction = model.predict(prediction)
  prediction = np.argmax(prediction, axis = 1).tolist()
  for i in range(len(prediction)):
    prediction[i] = class_names[prediction[i]]
  return prediction

Test = ['this is such an amazing movie!',
        'The movie was meh.',
        'The movie was okish.',
        'The movie was terrible...',
        'Be at the end of your rope',
        'Pumped up',
        'Afraid of your own shadow',
        'Black mood']

prediction = prediction(Test, class_names)
print(prediction)

['joy', 'neutral', 'neutral', 'fear', 'neutral', 'neutral', 'fear', 'sadness']