!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/sadness-ratings-0to1.train.txt
!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/anger-ratings-0to1.train.txt
!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/fear-ratings-0to1.train.txt
!wget http://saifmohammad.com/WebDocs/EmoInt%20Train%20Data/joy-ratings-0to1.train.txt
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip /content/glove.6B.zip
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
Joy = open("/content/joy-ratings-0to1.train.txt", "r+")
Joy = Joy.read()
Fear = open("/content/fear-ratings-0to1.train.txt", "r+")
Fear = Fear.read()
Anger = open("/content/anger-ratings-0to1.train.txt", "r+")
Anger = Anger.read()
Sadness = open("/content/sadness-ratings-0to1.train.txt", "r+")
Sadness = Sadness.read()
def preprocessing(File):
X = []
Y = []
List = list(File.split("\n"))
for i in range(len(List)):
try:
List[i] = list(List[i].split("\t"))
if float(List[i][3]) >= 0.4:
X.append(List[i][1])
if List[i][2] == 'joy':
Y.append(0)
elif List[i][2] == 'fear':
Y.append(1)
elif List[i][2] == 'anger':
Y.append(2)
elif List[i][2] == 'sadness':
Y.append(3)
elif float(List[i][3]) >= 0.2:
X.append(List[i][1])
Y.append(4)
except:
pass
return X, Y
class_names = {0: 'joy', 1: 'fear', 2: 'anger', 3: 'sadness', 4: 'neutral'}
X_Joy, Y_Joy = preprocessing(Joy)
X_Fear, Y_Fear = preprocessing(Fear)
X_Anger, Y_Anger = preprocessing(Anger)
X_Sadness, Y_Sadness = preprocessing(Sadness)
X = []
Y = []
X = X_Joy + X_Fear + X_Anger + X_Sadness
Y = Y_Joy + Y_Fear + Y_Anger + Y_Sadness
embeddings = open("/content/glove.6B.200d.txt")
embeddings_index = {}
for line in embeddings:
word, coefs = line.split(maxsplit = 1)
coefs = np.fromstring(coefs, "f", sep = " ")
embeddings_index[word] = coefs
MAX_LENGTH = 200
vectorizer = TextVectorization(max_tokens = 20000, output_sequence_length = MAX_LENGTH)
vectorizer.adapt(X)
vocablary = vectorizer.get_vocabulary()
word_index = dict(zip(vocablary, range(len(vocablary))))
Build Weight Matrix
num_tokens = len(vocablary) + 2
EMBEDDING_DIM = 200
hits = 0
misses = 0
weight_matrix = np.zeros((num_tokens, EMBEDDING_DIM))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
weight_matrix[i] = embedding_vector
hits += 1
else:
misses += 1
print("Converted %d words (%d misses)" % (hits, misses))
X = vectorizer(np.array([[s] for s in X])).numpy()
Y = np.array(Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)
class Model(tf.keras.Model):
def __init__(self, weight_matrix, EMBEDDING_DIM, MAX_LENGTH):
super(Model, self).__init__()
self.Embedding = tf.keras.layers.Embedding(input_dim = len(weight_matrix),
output_dim = EMBEDDING_DIM,
embeddings_initializer = tf.keras.initializers.Constant(weight_matrix),
trainable = False,
input_length = MAX_LENGTH)
self.Bidirectional = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, dropout = 0.2, recurrent_dropout = 0.2))
self.Dense = tf.keras.layers.Dense(512, activation = 'relu')
self.Dropout = tf.keras.layers.Dropout(0.5)
self.Output = tf.keras.layers.Dense(5, activation = 'softmax')
def call(self, inputs):
x = self.Embedding(inputs)
x = self.Bidirectional(x)
x = self.Dense(x)
x = self.Dropout(x)
x = self.Output(x)
return x
model = Model(weight_matrix, EMBEDDING_DIM, MAX_LENGTH)
model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics = ['accuracy'])
history = model.fit(X_train, Y_train, epochs = 10, validation_data = (X_test, Y_test), batch_size = 32)
history_dict = history.history
loss = history_dict["loss"]
accuracy = history_dict["accuracy"]
val_loss = history_dict["val_loss"]
val_accuracy = history_dict["val_accuracy"]
epochs = range(1, len(loss) + 1)
plt.figure(figsize = (20, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, loss, label = "Training Loss")
plt.plot(epochs, val_loss, label = "Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(epochs, accuracy, label = "Training Accuracy")
plt.plot(epochs, val_accuracy, label = "Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()
def prediction(sentence, class_names):
prediction = vectorizer(np.array([[s] for s in sentence])).numpy()
prediction = model.predict(prediction)
prediction = np.argmax(prediction, axis = 1).tolist()
for i in range(len(prediction)):
prediction[i] = class_names[prediction[i]]
return prediction
Test = ['this is such an amazing movie!',
'The movie was meh.',
'The movie was okish.',
'The movie was terrible...',
'Be at the end of your rope',
'Pumped up',
'Afraid of your own shadow',
'Black mood']
prediction = prediction(Test, class_names)
print(prediction)