2025-10-05 15:40:29 +02:00
|
|
|
# it is position wise!
|
|
|
|
|
# https://stackoverflow.com/questions/74979359/how-is-position-wise-feed-forward-neural-network-implemented-for-transformers
|
|
|
|
|
|
|
|
|
|
# Why do we need a fixed size
|
|
|
|
|
# https://ai.stackexchange.com/questions/37624/why-do-transformers-have-a-fixed-input-length
|
|
|
|
|
|
|
|
|
|
import torch.nn as nn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FeedForwardNetwork(nn.Module):
|
|
|
|
|
|
|
|
|
|
def __init__(self, embedding_size: int, feed_forward_hidden_layer_dimension: int):
|
|
|
|
|
|
|
|
|
|
super().__init__()
|
|
|
|
|
self.__fully_connected_1 = nn.Linear(
|
|
|
|
|
embedding_size, feed_forward_hidden_layer_dimension
|
|
|
|
|
) # expand in higher dimension
|
|
|
|
|
|
|
|
|
|
self.__relu = nn.ReLU()
|
|
|
|
|
self.__dropout = nn.Dropout(
|
|
|
|
|
0.1
|
|
|
|
|
) # during training we drop something, with eval it got deactivated
|
|
|
|
|
|
|
|
|
|
self.__fully_connected_2 = nn.Linear(
|
|
|
|
|
feed_forward_hidden_layer_dimension, embedding_size
|
|
|
|
|
) # return into the model dimension
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
# -> NN1 -> RELU -> (Droput during training) -> NN2 ->
|
|
|
|
|
|
|
|
|
|
# 1) Linear Layer
|
|
|
|
|
x = self.__fully_connected_1(x)
|
|
|
|
|
|
|
|
|
|
# 2) ReLU
|
|
|
|
|
x = self.__relu(x)
|
|
|
|
|
|
|
|
|
|
# 3) Dropout
|
2025-10-11 11:18:44 +02:00
|
|
|
# x = self.__dropout(x)
|
2025-10-05 15:40:29 +02:00
|
|
|
|
|
|
|
|
# 4) Linear Layer
|
|
|
|
|
x = self.__fully_connected_2(x)
|
|
|
|
|
|
|
|
|
|
return x
|