Source code for easy_torch.losses

import torch
import torch.nn.functional as F
import math
import numpy as np

[docs] class PatriniLoss(torch.nn.Module): def __init__(self, noise_level, num_classes): super().__init__() self.noise_rate = noise_level self.num_classes = num_classes matrix = self._construct_matrix(self.noise_rate, self.num_classes) self.register_buffer("matrix", matrix) #register buffer is needed to move the tensor to the right device
[docs] def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: raise NotImplementedError("PatriniLoss class is meant to be inherited")
def _construct_matrix(self, noise_rate, num_classes): diagonal = 1 - noise_rate rest = noise_rate / (num_classes - 1) matrix = torch.full((num_classes, num_classes), rest) matrix.fill_diagonal_(diagonal) return matrix
#https://openaccess.thecvf.com/content_cvpr_2017/papers/Patrini_Making_Deep_Neural_CVPR_2017_paper.pdf
[docs] class ForwardNRL(PatriniLoss): def __init__(self, noise_level, num_classes): super().__init__(noise_level, num_classes)
[docs] def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: p = F.softmax(input, dim=1) p = torch.matmul(p, self.matrix.t()) p = torch.log(p) loss = -torch.sum(p * target, dim=1) return torch.mean(loss)
#https://openaccess.thecvf.com/content_cvpr_2017/papers/Patrini_Making_Deep_Neural_CVPR_2017_paper.pdf
[docs] class BackwardNRL(PatriniLoss): def __init__(self, noise_level, num_classes): super().__init__(noise_level, num_classes) self.matrix = torch.inverse(matrix) #matrix used here is the inverse
[docs] def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: log_probs = F.log_softmax(input, dim=1) log_probs = -1 * log_probs a = torch.matmul(self.matrix, log_probs.t()).t() loss = torch.sum(a * target, dim=1) #minus here or above??? return torch.mean(loss)
#https://github.com/dmizr/phuber/blob/master/phuber/loss.py
[docs] class GCELoss(torch.nn.Module): """ Computes the Generalized Cross Entropy (GCE) loss, which is especially useful for training deep neural networks with noisy labels. Refer to "Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels" <https://arxiv.org/abs/1805.07836> Attributes: q (float): Box-Cox transformation parameter. Must be in (0,1]. epsilon (float): A small value to avoid undefined gradient. softmax (nn.Softmax): Softmax function to convert raw scores to probabilities. """ def __init__(self, q: float = 0.7) -> None: """ Initializes the GCELoss module. Args: q (float): Box-Cox transformation parameter. Default is 0.7. """ super().__init__() self.q = q self.epsilon = 1e-9 # A small value to avoid division by zero or log(0) self.softmax = torch.nn.Softmax(dim=1) # Softmax function to get probabilities
[docs] def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor: """ Compute the GCE loss between the predictions and targets. Args: param input: Predictions from the model (before softmax) shape: (batch_size, num_classes) param target: True labels (one-hot encoded) shape: (batch_size, num_classes) Returns: torch.Tensor: The mean GCE loss. """ # Apply softmax to the raw scores to get probabilities p = self.softmax(input) # Multiply the softmax probabilities by the one-hot targets # and sum across classes to get the correct class probability p = torch.sum(p * target, dim=1) # Add epsilon to avoid undefined gradient due to log(0) or division by zero p += self.epsilon # Compute the GCE loss based on the selected probability and the Box-Cox transformation parameter loss = (1 - p ** self.q) / self.q # Return the mean loss return torch.mean(loss)
#NCODLoss has manual optmization as written here https://lightning.ai/docs/pytorch/stable/model/manual_optimization.html# according #to the paper https://github.com/RSTLess-research/NCOD-Learning-with-noisy-labels/tree/main
[docs] class NCODLoss(torch.nn.Module): def __init__(self, sample_labels=None, num_examp=50000, num_classes=100, ratio_consistency=0, ratio_balance=0, total_epochs=4000, encoder_features=512): super().__init__() self.mean = 1e-8 self.std = 1e-9 self.num_classes = num_classes self.num_examp = num_examp self.encoder_features = encoder_features self.total_epochs = total_epochs self.ratio_consistency = ratio_consistency self.ratio_balance = ratio_balance self.u = torch.nn.Parameter(torch.empty(num_examp, 1, dtype=torch.float32)) self.init_param(mean=self.mean, std=self.std) self.beginning = True self.prevSimilarity = torch.rand((num_examp, encoder_features)) self.masterVector = torch.rand((num_classes, encoder_features)) self.sample_labels = sample_labels self.bins = [] for i in range(0, num_classes): self.bins.append(np.where(self.sample_labels == i)[0])
[docs] def init_param(self, mean=1e-8, std=1e-9): torch.nn.init.normal_(self.u, mean=self.mean, std=self.std)
[docs] def forward(self, index, outputs, label, out, flag, epoch): if len(outputs) > len(index): output, output2 = torch.chunk(outputs, 2) out1, out2 = torch.chunk(out, 2) else: output = outputs out1 = out eps = 1e-4 u = self.u[index] if flag == 0: if self.beginning: percent = math.ceil((50 - (50 / self.total_epochs) * epoch) + 50) for i in range(0, len(self.bins)): class_u = self.u.detach()[self.bins[i]] bottomK = int((len(class_u) / 100) * percent) important_indexs = torch.topk(class_u, bottomK, largest=False, dim=0)[1] self.masterVector[i] = torch.mean( self.prevSimilarity[self.bins[i]][important_indexs.view(-1)], dim=0 ) masterVector_norm = self.masterVector.norm(p=2, dim=1, keepdim=True) masterVector_normalized = self.masterVector.div(masterVector_norm) self.masterVector_transpose = torch.transpose(masterVector_normalized, 0, 1) self.beginning = True self.prevSimilarity[index] = out1.detach() prediction = F.softmax(output, dim=1) out_norm = out1.detach().norm(p=2, dim=1, keepdim=True) out_normalized = out1.detach().div(out_norm) similarity = torch.mm(out_normalized, self.masterVector_transpose) similarity = similarity * label sim_mask = (similarity > 0.000).type(torch.float32) similarity = similarity * sim_mask u = u * label prediction = torch.clamp((prediction + u.detach()), min=eps, max=1.0) loss = torch.mean(-torch.sum((similarity) * torch.log(prediction), dim=1)) label_one_hot = self.soft_to_hard(output.detach()) MSE_loss = F.mse_loss((label_one_hot + u), label, reduction="sum") / len(label) loss += MSE_loss if self.ratio_balance > 0: avg_prediction = torch.mean(prediction, dim=0) prior_distr = 1.0 / self.num_classes * torch.ones_like(avg_prediction) avg_prediction = torch.clamp(avg_prediction, min=eps, max=1.0) balance_kl = torch.mean(-(prior_distr * torch.log(avg_prediction)).sum(dim=0)) loss += self.ratio_balance * balance_kl if (len(outputs) > len(index)) and (self.ratio_consistency > 0): consistency_loss = self.consistency_loss(output, output2) loss += self.ratio_consistency * torch.mean(consistency_loss) return loss
[docs] def consistency_loss(self, output1, output2): preds1 = F.softmax(output1, dim=1).detach() preds2 = F.log_softmax(output2, dim=1) loss_kldiv = F.kl_div(preds2, preds1, reduction="none") loss_kldiv = torch.sum(loss_kldiv, dim=1) return loss_kldiv
[docs] def soft_to_hard(self, x): with torch.no_grad(): return (torch.zeros(len(x), self.num_classes)).cuda().scatter_(1, (x.argmax(dim=1)).view(-1, 1), 1)
# class SequentialCrossEntropyLoss(torch.nn.CrossEntropyLoss): # def __init__(self, padding_value=0, *args, **kwargs): # super().__init__(*args, **kwargs) # self.padding_value = padding_value # def forward(self, input, target, out_data): # # use in_data timeinstants that match the target # #is_not_padding = in_data[:, -input.shape[1]:] != self.padding_value # is_not_padding = out_data != self.padding_value #do not use where the output was padded # output = super().forward(input[is_not_padding], target[is_not_padding]) # return output