Module likelihood.models.deep.bandit
Classes
class MultiBanditNet (state_dim: int,
num_options: int,
num_actions_per_option: int | List[int],
num_neurons: int = 128,
num_layers: int = 1,
activation: torch.nn.modules.module.Module = ReLU())-
Expand source code
class MultiBanditNet(nn.Module): def __init__( self, state_dim: int, num_options: int, num_actions_per_option: int | List[int], num_neurons: int = 128, num_layers: int = 1, activation: nn.Module = nn.ReLU(), ): super(MultiBanditNet, self).__init__() self.state_dim = state_dim self.num_options = num_options self.num_actions_per_option = num_actions_per_option self.num_neurons = num_neurons self.num_layers = num_layers self.activation = activation self.option_network = nn.Sequential( nn.Linear(state_dim, self.num_neurons), nn.ReLU(), nn.Linear( self.num_neurons, num_options ), # Output a probability distribution over options ) # Low-level (action) Q-networks for each option with additional linear layers self.action_networks = nn.ModuleList() for i in range(num_options): action_network_layers = [nn.Linear(state_dim, self.num_neurons), self.activation] for _ in range(self.num_layers - 1): action_network_layers.extend( [nn.Linear(self.num_neurons, self.num_neurons), self.activation] ) num_actions = ( num_actions_per_option if not isinstance(num_actions_per_option, list) else num_actions_per_option[i] ) # Output Q-values for each action in this option action_network_layers.append(nn.Linear(self.num_neurons, num_actions)) self.action_networks.append(nn.Sequential(*action_network_layers)) # Option termination network self.termination_network = nn.Sequential( nn.Linear(state_dim, self.num_neurons), nn.ReLU(), nn.Linear(self.num_neurons, 1), # Single output for termination probability (0-1) nn.Sigmoid(), ) def forward(self, state): if state.dim() == 1: state = state.unsqueeze(0) batch_size = state.shape[0] option_probs = torch.softmax(self.option_network(state), dim=-1) action_probs = [] selected_actions = [] for i in range(batch_size): selected_option = torch.multinomial(option_probs[i], 1).item() # Get Q-values for this option q_values = self.action_networks[selected_option](state[i].unsqueeze(0)) action_prob = torch.softmax(q_values, dim=-1) action_probs.append(action_prob) selected_action = torch.argmax(action_prob, dim=-1) selected_actions.append(selected_action) if len(action_probs) > 0: action_probs = torch.cat(action_probs, dim=0).squeeze(1) selected_actions = torch.stack(selected_actions, dim=0).squeeze(1) else: warnings.warn( "The list of action probabilities is empty, initializing with default values.", UserWarning, ) action_probs = torch.empty((batch_size, 1)) selected_actions = torch.zeros(batch_size, dtype=torch.long) termination_prob = self.termination_network(state) return ( option_probs, action_probs, termination_prob, torch.argmax(option_probs, dim=-1), # selected_options selected_actions, )Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self) -> None: super().__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))Submodules assigned in this way will be registered, and will also have their parameters converted when you call :meth:
to, etc.Note
As per the example above, an
__init__()call to the parent class must be made before assignment on the child.:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool
Initialize internal Module state, shared by both nn.Module and ScriptModule.
Ancestors
- torch.nn.modules.module.Module
Methods
def forward(self, state) ‑> Callable[..., Any]-
Expand source code
def forward(self, state): if state.dim() == 1: state = state.unsqueeze(0) batch_size = state.shape[0] option_probs = torch.softmax(self.option_network(state), dim=-1) action_probs = [] selected_actions = [] for i in range(batch_size): selected_option = torch.multinomial(option_probs[i], 1).item() # Get Q-values for this option q_values = self.action_networks[selected_option](state[i].unsqueeze(0)) action_prob = torch.softmax(q_values, dim=-1) action_probs.append(action_prob) selected_action = torch.argmax(action_prob, dim=-1) selected_actions.append(selected_action) if len(action_probs) > 0: action_probs = torch.cat(action_probs, dim=0).squeeze(1) selected_actions = torch.stack(selected_actions, dim=0).squeeze(1) else: warnings.warn( "The list of action probabilities is empty, initializing with default values.", UserWarning, ) action_probs = torch.empty((batch_size, 1)) selected_actions = torch.zeros(batch_size, dtype=torch.long) termination_prob = self.termination_network(state) return ( option_probs, action_probs, termination_prob, torch.argmax(option_probs, dim=-1), # selected_options selected_actions, )Define the computation performed at every call.
Should be overridden by all subclasses.
Note
Although the recipe for forward pass needs to be defined within this function, one should call the :class:
Moduleinstance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.