Plotting the Coaching and Validation Loss Curves for the Transformer Mannequin

[ad_1]

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.optimizers.schedules import LearningRateSchedule

from tensorflow.keras.metrics import Imply

from tensorflow import knowledge, practice, math, reduce_sum, solid, equal, argmax, float32, GradientTape, perform

from keras.losses import sparse_categorical_crossentropy

from mannequin import TransformerModel

from prepare_dataset import PrepareDataset

from time import time

from pickle import dump

# Outline the mannequin parameters

h = 8 # Variety of self-attention heads

d_k = 64 # Dimensionality of the linearly projected queries and keys

d_v = 64 # Dimensionality of the linearly projected values

d_model = 512 # Dimensionality of mannequin layers’ outputs

d_ff = 2048 # Dimensionality of the inside totally related layer

n = 6 # Variety of layers within the encoder stack

# Outline the coaching parameters

epochs = 20

batch_size = 64

beta_1 = 0.9

beta_2 = 0.98

epsilon = 1e–9

dropout_rate = 0.1

# Implementing a studying price scheduler

class LRScheduler(LearningRateSchedule):

def __init__(self, d_model, warmup_steps=4000, **kwargs):

tremendous(LRScheduler, self).__init__(**kwargs)

self.d_model = solid(d_model, float32)

self.warmup_steps = warmup_steps

def __call__(self, step_num):

# Linearly rising the training price for the primary warmup_steps, and reducing it thereafter

arg1 = step_num ** –0.5

arg2 = step_num * (self.warmup_steps ** –1.5)

return (self.d_model ** –0.5) * math.minimal(arg1, arg2)

# Instantiate an Adam optimizer

optimizer = Adam(LRScheduler(d_model), beta_1, beta_2, epsilon)

# Put together the coaching dataset

dataset = PrepareDataset()

trainX, trainY, valX, valY, train_orig, val_orig, enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size = dataset(‘english-german.pkl’)

print(enc_seq_length, dec_seq_length, enc_vocab_size, dec_vocab_size)

# Put together the coaching dataset batches

train_dataset = knowledge.Dataset.from_tensor_slices((trainX, trainY))

train_dataset = train_dataset.batch(batch_size)

# Put together the validation dataset batches

val_dataset = knowledge.Dataset.from_tensor_slices((valX, valY))

val_dataset = val_dataset.batch(batch_size)

# Create mannequin

training_model = TransformerModel(enc_vocab_size, dec_vocab_size, enc_seq_length, dec_seq_length, h, d_k, d_v, d_model, d_ff, n, dropout_rate)

# Defining the loss perform

def loss_fcn(goal, prediction):

# Create masks in order that the zero padding values usually are not included within the computation of loss

padding_mask = math.logical_not(equal(goal, 0))

padding_mask = solid(padding_mask, float32)

# Compute a sparse categorical cross-entropy loss on the unmasked values

loss = sparse_categorical_crossentropy(goal, prediction, from_logits=True) * padding_masks

# Compute the imply loss over the unmasked values

return reduce_sum(loss) / reduce_sum(padding_mask)

# Defining the accuracy perform

def accuracy_fcn(goal, prediction):

# Create masks in order that the zero padding values usually are not included within the computation of accuracy

padding_mask = math.logical_not(equal(goal, 0))

# Discover equal prediction and goal values, and apply the padding masks

accuracy = equal(goal, argmax(prediction, axis=2))

accuracy = math.logical_and(padding_mask, accuracy)

# Solid the True/False values to 32-bit-precision floating-point numbers

padding_mask = solid(padding_mask, float32)

accuracy = solid(accuracy, float32)

# Compute the imply accuracy over the unmasked values

return reduce_sum(accuracy) / reduce_sum(padding_mask)

# Embody metrics monitoring

train_loss = Imply(identify=‘train_loss’)

train_accuracy = Imply(identify=‘train_accuracy’)

val_loss = Imply(identify=‘val_loss’)

# Create a checkpoint object and supervisor to handle a number of checkpoints

ckpt = practice.Checkpoint(mannequin=training_model, optimizer=optimizer)

ckpt_manager = practice.CheckpointManager(ckpt, “./checkpoints”, max_to_keep=None)

# Initialise dictionaries to retailer the coaching and validation losses

train_loss_dict = {}

val_loss_dict = {}

# Rushing up the coaching course of

@perform

def train_step(encoder_input, decoder_input, decoder_output):

with GradientTape() as tape:

# Run the ahead cross of the mannequin to generate a prediction

prediction = training_model(encoder_input, decoder_input, coaching=True)

# Compute the coaching loss

loss = loss_fcn(decoder_output, prediction)

# Compute the coaching accuracy

accuracy = accuracy_fcn(decoder_output, prediction)

# Retrieve gradients of the trainable variables with respect to the coaching loss

gradients = tape.gradient(loss, training_model.trainable_weights)

# Replace the values of the trainable variables by gradient descent

optimizer.apply_gradients(zip(gradients, training_model.trainable_weights))

train_loss(loss)

train_accuracy(accuracy)

for epoch in vary(epochs):

train_loss.reset_states()

train_accuracy.reset_states()

val_loss.reset_states()

print(“nStart of epoch %d” % (epoch + 1))

start_time = time()

# Iterate over the dataset batches

for step, (train_batchX, train_batchY) in enumerate(train_dataset):

# Outline the encoder and decoder inputs, and the decoder output

encoder_input = train_batchX[:, 1:]

decoder_input = train_batchY[:, :–1]

decoder_output = train_batchY[:, 1:]

train_step(encoder_input, decoder_input, decoder_output)

if step % 50 == 0:

print(f‘Epoch {epoch + 1} Step {step} Loss {train_loss.outcome():.4f} Accuracy {train_accuracy.outcome():.4f}’)

# Run a validation step after each epoch of coaching

for val_batchX, val_batchY in val_dataset:

# Outline the encoder and decoder inputs, and the decoder output

encoder_input = val_batchX[:, 1:]

decoder_input = val_batchY[:, :–1]

decoder_output = val_batchY[:, 1:]

# Generate a prediction

prediction = training_model(encoder_input, decoder_input, coaching=False)

# Compute the validation loss

loss = loss_fcn(decoder_output, prediction)

val_loss(loss)

# Print epoch quantity and accuracy and loss values on the finish of each epoch

print(“Epoch %d: Coaching Loss %.4f, Coaching Accuracy %.4f, Validation Loss %.4f” % (epoch + 1, train_loss.outcome(), train_accuracy.outcome(), val_loss.outcome()))

# Save a checkpoint after each epoch

if (epoch + 1) % 1 == 0:

save_path = ckpt_manager.save()

print(“Saved checkpoint at epoch %d” % (epoch + 1))

# Save the skilled mannequin weights

training_model.save_weights(“weights/wghts” + str(epoch + 1) + “.ckpt”)

train_loss_dict[epoch] = train_loss.outcome()

val_loss_dict[epoch] = val_loss.outcome()

# Save the coaching loss values

with open(‘./train_loss.pkl’, ‘wb’) as file:

dump(train_loss_dict, file)

# Save the validation loss values

with open(‘./val_loss.pkl’, ‘wb’) as file:

dump(val_loss_dict, file)

print(“Whole time taken: %.2fs” % (time() – start_time))

[ad_2]

Leave a ReplyCancel Reply