# it is position wise! # https://stackoverflow.com/questions/74979359/how-is-position-wise-feed-forward-neural-network-implemented-for-transformers # Why do we need a fixed size # https://ai.stackexchange.com/questions/37624/why-do-transformers-have-a-fixed-input-length import torch.nn as nn class FeedForwardNetwork(nn.Module): def __init__(self, embedding_size: int, feed_forward_hidden_layer_dimension: int): super().__init__() self.__fully_connected_1 = nn.Linear( embedding_size, feed_forward_hidden_layer_dimension ) # expand in higher dimension self.__relu = nn.ReLU() self.__dropout = nn.Dropout( 0.1 ) # during training we drop something, with eval it got deactivated self.__fully_connected_2 = nn.Linear( feed_forward_hidden_layer_dimension, embedding_size ) # return into the model dimension def forward(self, x): # -> NN1 -> RELU -> (Droput during training) -> NN2 -> # 1) Linear Layer x = self.__fully_connected_1(x) # 2) ReLU x = self.__relu(x) # 3) Dropout x = self.__dropout(x) # 4) Linear Layer x = self.__fully_connected_2(x) return x