Module likelihood.models.deep.bandit

Classes

class MultiBanditNet (state_dim: int,
num_options: int,
num_actions_per_option: int | List[int],
num_neurons: int = 128,
num_layers: int = 1,
activation: torch.nn.modules.module.Module = ReLU())
Expand source code
class MultiBanditNet(nn.Module):
    def __init__(
        self,
        state_dim: int,
        num_options: int,
        num_actions_per_option: int | List[int],
        num_neurons: int = 128,
        num_layers: int = 1,
        activation: nn.Module = nn.ReLU(),
    ):
        super(MultiBanditNet, self).__init__()
        self.state_dim = state_dim
        self.num_options = num_options
        self.num_actions_per_option = num_actions_per_option
        self.num_neurons = num_neurons
        self.num_layers = num_layers
        self.activation = activation

        self.option_network = nn.Sequential(
            nn.Linear(state_dim, self.num_neurons),
            nn.ReLU(),
            nn.Linear(
                self.num_neurons, num_options
            ),  # Output a probability distribution over options
        )

        # Low-level (action) Q-networks for each option with additional linear layers
        self.action_networks = nn.ModuleList()
        for i in range(num_options):
            action_network_layers = [nn.Linear(state_dim, self.num_neurons), self.activation]
            for _ in range(self.num_layers - 1):
                action_network_layers.extend(
                    [nn.Linear(self.num_neurons, self.num_neurons), self.activation]
                )
            num_actions = (
                num_actions_per_option
                if not isinstance(num_actions_per_option, list)
                else num_actions_per_option[i]
            )  # Output Q-values for each action in this option
            action_network_layers.append(nn.Linear(self.num_neurons, num_actions))
            self.action_networks.append(nn.Sequential(*action_network_layers))

        # Option termination network
        self.termination_network = nn.Sequential(
            nn.Linear(state_dim, self.num_neurons),
            nn.ReLU(),
            nn.Linear(self.num_neurons, 1),  # Single output for termination probability (0-1)
            nn.Sigmoid(),
        )

    def forward(self, state):
        if state.dim() == 1:
            state = state.unsqueeze(0)

        batch_size = state.shape[0]
        option_probs = torch.softmax(self.option_network(state), dim=-1)

        action_probs = []
        selected_actions = []

        for i in range(batch_size):
            selected_option = torch.multinomial(option_probs[i], 1).item()

            # Get Q-values for this option
            q_values = self.action_networks[selected_option](state[i].unsqueeze(0))
            action_prob = torch.softmax(q_values, dim=-1)
            action_probs.append(action_prob)
            selected_action = torch.argmax(action_prob, dim=-1)
            selected_actions.append(selected_action)

        if len(action_probs) > 0:
            action_probs = torch.cat(action_probs, dim=0).squeeze(1)
            selected_actions = torch.stack(selected_actions, dim=0).squeeze(1)
        else:
            warnings.warn(
                "The list of action probabilities is empty, initializing with default values.",
                UserWarning,
            )
            action_probs = torch.empty((batch_size, 1))
            selected_actions = torch.zeros(batch_size, dtype=torch.long)

        termination_prob = self.termination_network(state)

        return (
            option_probs,
            action_probs,
            termination_prob,
            torch.argmax(option_probs, dim=-1),  # selected_options
            selected_actions,
        )

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing them to be nested in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F


class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will also have their parameters converted when you call :meth:to, etc.

Note

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

Initialize internal Module state, shared by both nn.Module and ScriptModule.

Ancestors

  • torch.nn.modules.module.Module

Methods

def forward(self, state) ‑> Callable[..., Any]
Expand source code
def forward(self, state):
    if state.dim() == 1:
        state = state.unsqueeze(0)

    batch_size = state.shape[0]
    option_probs = torch.softmax(self.option_network(state), dim=-1)

    action_probs = []
    selected_actions = []

    for i in range(batch_size):
        selected_option = torch.multinomial(option_probs[i], 1).item()

        # Get Q-values for this option
        q_values = self.action_networks[selected_option](state[i].unsqueeze(0))
        action_prob = torch.softmax(q_values, dim=-1)
        action_probs.append(action_prob)
        selected_action = torch.argmax(action_prob, dim=-1)
        selected_actions.append(selected_action)

    if len(action_probs) > 0:
        action_probs = torch.cat(action_probs, dim=0).squeeze(1)
        selected_actions = torch.stack(selected_actions, dim=0).squeeze(1)
    else:
        warnings.warn(
            "The list of action probabilities is empty, initializing with default values.",
            UserWarning,
        )
        action_probs = torch.empty((batch_size, 1))
        selected_actions = torch.zeros(batch_size, dtype=torch.long)

    termination_prob = self.termination_network(state)

    return (
        option_probs,
        action_probs,
        termination_prob,
        torch.argmax(option_probs, dim=-1),  # selected_options
        selected_actions,
    )

Define the computation performed at every call.

Should be overridden by all subclasses.

Note

Although the recipe for forward pass needs to be defined within this function, one should call the :class:Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.