pybind11_ke.module.strategy.NegativeSampling 源代码

# coding:utf-8
#
# pybind11_ke/module/strategy/NegativeSampling.py
#
# git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 4, 2023
#
# 该脚本定义了 KGE 模型的训练策略.

"""
NegativeSampling - 训练策略类，包含损失函数。
"""

import torch
import typing
from ..loss import Loss
from ..model import Model
from .Strategy import Strategy

[文档]class NegativeSampling(Strategy):

	"""
	将模型和损失函数封装到一起，方便模型训练。
	
	例子::

		from pybind11_ke.config import Trainer
		from pybind11_ke.module.model import TransE
		from pybind11_ke.module.loss import MarginLoss
		from pybind11_ke.module.strategy import NegativeSampling
		
		# define the model
		transe = TransE(
			ent_tol = train_dataloader.get_ent_tol(),
			rel_tol = train_dataloader.get_rel_tol(),
			dim = 50, 
			p_norm = 1, 
			norm_flag = True)
		
		# define the loss function
		model = NegativeSampling(
			model = transe, 
			loss = MarginLoss(margin = 1.0),
			batch_size = train_dataloader.get_batch_size()
		)
		
		# train the model
		trainer = Trainer(model = model, data_loader = train_dataloader,
			train_times = 1000, lr = 0.01, use_gpu = True, device = 'cuda:1',
			tester = tester, test = True, valid_interval = 100,
			log_interval = 100, save_interval = 100, save_path = '../../checkpoint/transe.pth')
		trainer.run()
	"""

[文档]	def __init__(
		self,
		model: Model = None,
		loss: Loss = None,
		batch_size: int = 256,
		regul_rate: float = 0.0,
		l3_regul_rate: float = 0.0):
		
		"""创建 NegativeSampling 对象。

		:param model: KGE 模型
		:type model: :py:class:`pybind11_ke.module.model.Model`
		:param loss: 损失函数。
		:type loss: :py:class:`pybind11_ke.module.loss.Loss`
		:param batch_size: batch size
		:type batch_size: int
		:param regul_rate: 权重衰减系数
		:type regul_rate: float
		:param l3_regul_rate: l3 正则化系数
		:type l3_regul_rate: float
		"""

		super(NegativeSampling, self).__init__()
		#: KGE 模型，即 :py:class:`pybind11_ke.module.model.Model`
		self.model: Model = model
		#: 损失函数，即 :py:class:`pybind11_ke.module.loss.Loss`
		self.loss: Loss = loss
		#: batch size
		self.batch_size: int = batch_size
		#: 权重衰减系数
		self.regul_rate: float = regul_rate
		#: l3 正则化系数
		self.l3_regul_rate: float = l3_regul_rate

[文档]	def _get_positive_score(self, score: torch.Tensor) -> torch.Tensor:

		"""
		获得正样本的得分，由于底层 C++ 处理模块的原因，
		所以正样本的得分处于前 batch size 位置。

		:param score: 所有样本的得分。
		:type n_score: torch.Tensor
		:returns: 正样本的得分
		:rtype: torch.Tensor
		"""

		positive_score = score[:self.batch_size]
		positive_score = positive_score.view(-1, self.batch_size).permute(1, 0)
		return positive_score

[文档]	def _get_negative_score(self, score: torch.Tensor) -> torch.Tensor:

		"""
		获得负样本的得分，由于底层 C++ 处理模块的原因，
		所以正样本的得分处于前 batch size 位置，负样本处于正样本后面。

		:param score: 所有样本的得分。
		:type n_score: torch.Tensor
		:returns: 负样本的得分
		:rtype: torch.Tensor
		"""
				
		negative_score = score[self.batch_size:]
		negative_score = negative_score.view(-1, self.batch_size).permute(1, 0)
		return negative_score

[文档]	def forward(self, data: dict[str, typing.Union[torch.Tensor,str]]) -> torch.Tensor:
		
		"""计算最后的损失值。定义每次调用时执行的计算。
		:py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
		
		:param data: 数据
		:type data: dict[str, typing.Union[torch.Tensor,str]]
		:returns: 损失值
		:rtype: torch.Tensor
		"""

		score = self.model(data)
		p_score = self._get_positive_score(score)
		n_score = self._get_negative_score(score)
		loss_res = self.loss(p_score, n_score)
		if self.regul_rate != 0:
			loss_res += self.regul_rate * self.model.regularization(data)
		if self.l3_regul_rate != 0:
			loss_res += self.l3_regul_rate * self.model.l3_regularization()
		return loss_res

[文档]def get_negative_sampling_hpo_config() -> dict[str, dict[str, typing.Any]]:

	"""返回 :py:class:`NegativeSampling` 的默认超参数优化配置。
	
	默认配置为::
	
		parameters_dict = {
			'strategy': {
				'value': 'NegativeSampling'
			},
			'regul_rate': {
				'value': 0.0
			},
			'l3_regul_rate': {
				'value': 0.0
			}
		}
	
	:returns: :py:class:`NegativeSampling` 的默认超参数优化配置
	:rtype: dict[str, dict[str, typing.Any]]
	"""

	parameters_dict = {
		'strategy': {
			'value': 'NegativeSampling'
		},
		'regul_rate': {
			'value': 0.0
		},
		'l3_regul_rate': {
			'value': 0.0
		}
	}
		
	return parameters_dict
pybind11_ke.module.strategy.NegativeSampling 源代码

Docs