Module `likelihood.models.deep.predictor`

Classes

class GetInsights (model: AutoClassifier, inputs: numpy.ndarray)

Expand source code

class GetInsights:
    """
    A class to analyze the output of a neural network model, including visualizations
    of the weights, t-SNE representation, and feature statistics.

    Parameters
    ----------
    model : `AutoClassifier`
        The trained model to analyze.
    inputs : `np.ndarray`
        The input data for analysis.
    """

    def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
        """
        Initializes the GetInsights class.

        Parameters
        ----------
        model : `AutoClassifier`
            The trained model to analyze.
        inputs : `np.ndarray`
            The input data for analysis.
        """
        self.inputs = inputs
        self.model = model

        self.encoder_layer = (
            self.model.encoder.layers[1]
            if isinstance(self.model.encoder.layers[0], InputLayer)
            else self.model.encoder.layers[0]
        )
        self.decoder_layer = self.model.decoder.layers[0]

        self.encoder_weights = self.encoder_layer.get_weights()[0]
        self.decoder_weights = self.decoder_layer.get_weights()[0]

        self.sorted_names = self._generate_sorted_color_names()

    def _generate_sorted_color_names(self) -> list:
        """
        Generate sorted color names based on their HSV values.

        Parameters
        ----------
        `None`

        Returns
        -------
        `list` : Sorted color names.
        """
        colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
        by_hsv = sorted(
            (tuple(mcolors.rgb_to_hsv(mcolors.to_rgba(color)[:3])), name)
            for name, color in colors.items()
        )
        sorted_names = [name for hsv, name in by_hsv if hsv[1] > 0.4 and hsv[2] >= 0.4]
        random.shuffle(sorted_names)
        return sorted_names

    def render_html_report(
        self,
        frac: float = 0.2,
        top_k: int = 5,
        threshold_factor: float = 1.0,
        max_rows: int = 5,
        **kwargs,
    ) -> None:
        """
        Generate and display an embedded HTML report in a Jupyter Notebook cell.
        """
        display(HTML("<h2 style='margin-top:20px;'>📊 Predictor Analysis</h2>"))
        display(
            HTML(
                "<p>This section visualizes how the model predicts the data. "
                "You will see original inputs, reconstructed outputs, and analyses such as t-SNE "
                "that reduce dimensionality to visualize latent space clustering.</p>"
            )
        )
        stats_df = self.predictor_analyzer(frac=frac, **kwargs)

        display(HTML("<h2 style='margin-top:30px;'>🔁 Encoder-Decoder Graph</h2>"))
        display(
            HTML(
                "<p>This visualization displays the connections between layers in the encoder and decoder. "
                "Edges with the strongest weights are highlighted to emphasize influential features "
                "in the model's transformation.</p>"
            )
        )
        if not self.model.encoder.name.startswith("vae"):
            self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)

            display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
            display(
                HTML(
                    "<p>This visualization shows how features propagate through each dense layer in the classifier. "
                    "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
                )
            )
        self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)

        display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
        display(
            HTML(
                "<p>This table summarizes feature statistics grouped by predicted classes, "
                "including means, standard deviations, and modes, providing insight into "
                "feature distributions across different classes.</p>"
            )
        )

        if max_rows is not None and max_rows > 0:
            stats_to_display = stats_df.head(max_rows)
        else:
            stats_to_display = stats_df

        display(
            stats_to_display.style.set_table_attributes(
                "style='display:inline;border-collapse:collapse;'"
            )
            .set_caption("Feature Summary per Class")
            .set_properties(
                **{
                    "border": "1px solid #ddd",
                    "padding": "8px",
                    "text-align": "center",
                }
            )
        )

        display(
            HTML(
                "<p style='color: gray; margin-top:30px;'>Report generated with "
                "<code>GetInsights</code> class. For detailed customization, extend "
                "<code>render_html_report</code>.</p>"
            )
        )

    def viz_classifier_graphs(self, threshold_factor=1.0, top_k=5, save_path=None):
        """
        Visualize all Dense layers in self.model.classifier as a single directed graph,
        connecting each Dense layer to the next.
        """

        def get_top_k_edges(weights, src_prefix, dst_prefix, k):
            flat_weights = np.abs(weights.flatten())
            indices = np.argpartition(flat_weights, -k)[-k:]
            top_k_flat_indices = indices[np.argsort(-flat_weights[indices])]
            top_k_edges = []

            for flat_index in top_k_flat_indices:
                i, j = np.unravel_index(flat_index, weights.shape)
                top_k_edges.append((f"{src_prefix}_{i}", f"{dst_prefix}_{j}", weights[i, j]))
            return top_k_edges

        def add_dense_layer_edges(G, weights, layer_idx, threshold_factor, top_k):
            src_prefix = f"L{layer_idx}"
            dst_prefix = f"L{layer_idx + 1}"
            input_nodes = [f"{src_prefix}_{i}" for i in range(weights.shape[0])]
            output_nodes = [f"{dst_prefix}_{j}" for j in range(weights.shape[1])]

            G.add_nodes_from(input_nodes + output_nodes)

            abs_weights = np.abs(weights)
            threshold = threshold_factor * np.mean(abs_weights)
            top_k_edges = get_top_k_edges(weights, src_prefix, dst_prefix, top_k)
            top_k_set = set((u, v) for u, v, _ in top_k_edges)

            for i, src in enumerate(input_nodes):
                for j, dst in enumerate(output_nodes):
                    w = weights[i, j]
                    if abs(w) > threshold:
                        G.add_edge(src, dst, weight=w, highlight=(src, dst) in top_k_set)

        def compute_layout(G):
            pos = {}
            layer_nodes = {}

            for node in G.nodes():
                layer_idx = int(node.split("_")[0][1:])
                layer_nodes.setdefault(layer_idx, []).append(node)

            for layer_idx, nodes in sorted(layer_nodes.items()):
                y_positions = np.linspace(1, -1, len(nodes))
                for y, node in zip(y_positions, nodes):
                    pos[node] = (layer_idx * 2, y)

            return pos

        def draw_graph(G, pos, title, save_path=None):
            weights = [abs(G[u][v]["weight"]) for u, v in G.edges()]
            if not weights:
                print("No edges to draw.")
                return

            norm = Normalize(vmin=min(weights), vmax=max(weights))
            cmap = cm.get_cmap("coolwarm")

            edge_colors = [cmap(norm(G[u][v]["weight"])) for u, v in G.edges()]
            edge_widths = [1.0 + 2.0 * norm(abs(G[u][v]["weight"])) for u, v in G.edges()]

            fig, ax = plt.subplots(figsize=(12, 8))

            nx.draw(
                G,
                pos,
                ax=ax,
                with_labels=True,
                node_color="lightgray",
                node_size=1000,
                font_size=8,
                edge_color=edge_colors,
                width=edge_widths,
                arrows=True,
            )

            ax.set_title(title, fontsize=14)

            sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
            sm.set_array([])
            plt.colorbar(sm, ax=ax, orientation="vertical", label="Edge Weight")

            plt.tight_layout()
            if save_path:
                plt.savefig(save_path)
            plt.show()

        dense_layers = [
            layer
            for layer in self.model.classifier.layers
            if isinstance(layer, tf.keras.layers.Dense)
        ]

        if len(dense_layers) < 1:
            print("No Dense layers found in classifier.")
            return

        G = nx.DiGraph()
        for idx, layer in enumerate(dense_layers):
            weights = layer.get_weights()[0]
            add_dense_layer_edges(G, weights, idx, threshold_factor, top_k)

        pos = compute_layout(G)
        draw_graph(G, pos, "Classifier Dense Layers Graph", save_path)

    def viz_encoder_decoder_graphs(self, threshold_factor=1.0, top_k=5, save_path=None):
        """
        Visualize Dense layers in self.model.encoder and self.model.decoder as directed graphs.
        """

        def get_top_k_edges(weights, labels_src, labels_dst_prefix, k):
            flat_weights = np.abs(weights.flatten())
            indices = np.argpartition(flat_weights, -k)[-k:]
            top_k_flat_indices = indices[np.argsort(-flat_weights[indices])]
            top_k_edges = []
            for flat_index in top_k_flat_indices:
                i, j = np.unravel_index(flat_index, weights.shape)
                src_label = labels_src[i] if isinstance(labels_src, list) else f"{labels_src}_{i}"
                dst_label = f"{labels_dst_prefix}_{j}"
                top_k_edges.append((src_label, dst_label, weights[i, j]))
            return top_k_edges

        def add_layer_to_graph(
            G, weights, labels_src, labels_dst_prefix, x_offset, top_k_set, threshold
        ):
            output_nodes = [f"{labels_dst_prefix}_{j}" for j in range(weights.shape[1])]

            for node in labels_src + output_nodes:
                if node not in G:
                    G.add_node(node, x=x_offset if node in labels_src else x_offset + 1)

            for i, src in enumerate(labels_src):
                for j, dst in enumerate(output_nodes):
                    w = weights[i, j]
                    if abs(w) > threshold:
                        G.add_edge(src, dst, weight=w, highlight=(src, dst) in top_k_set)
            return output_nodes

        def layout_graph(G):
            pos = {}
            layers = {}
            for node, data in G.nodes(data=True):
                x = data["x"]
                layers.setdefault(x, []).append(node)

            for x in sorted(layers):
                nodes = layers[x]
                y_positions = np.linspace(1, -1, len(nodes))
                for y, node in zip(y_positions, nodes):
                    pos[node] = (x, y)
            return pos

        def draw_graph(G, title, ax):
            weights = [abs(G[u][v]["weight"]) for u, v in G.edges()]
            if not weights:
                return

            norm = Normalize(vmin=min(weights), vmax=max(weights))
            cmap = cm.get_cmap("coolwarm")

            edge_colors = [cmap(norm(G[u][v]["weight"])) for u, v in G.edges()]
            edge_widths = [1.0 + 2.0 * norm(abs(G[u][v]["weight"])) for u, v in G.edges()]

            pos = layout_graph(G)
            nx.draw(
                G,
                pos,
                ax=ax,
                with_labels=True,
                node_color="lightgray",
                node_size=1000,
                font_size=8,
                edge_color=edge_colors,
                width=edge_widths,
                arrows=True,
            )

            ax.set_title(title, fontsize=12)
            sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
            sm.set_array([])
            plt.colorbar(sm, ax=ax, orientation="vertical", label="Edge Weight")

        def build_graph(layers, label_prefix, input_labels=None):
            G = nx.DiGraph()
            x_offset = 0
            prev_labels = input_labels or [
                f"{label_prefix}0_{i}" for i in range(layers[0].get_weights()[0].shape[0])
            ]

            for idx, layer in enumerate(layers):
                weights = layer.get_weights()[0]
                label = f"{label_prefix}{idx+1}"
                threshold = threshold_factor * np.mean(np.abs(weights))
                top_k_edges = get_top_k_edges(weights, prev_labels, label, top_k)
                top_k_set = set((src, dst) for src, dst, _ in top_k_edges)

                prev_labels = add_layer_to_graph(
                    G, weights, prev_labels, label, x_offset, top_k_set, threshold
                )
                x_offset += 2

            return G

        encoder_layers = [
            l for l in self.model.encoder.layers if isinstance(l, tf.keras.layers.Dense)
        ]
        decoder_layers = [
            l for l in self.model.decoder.layers if isinstance(l, tf.keras.layers.Dense)
        ]

        if not encoder_layers and not decoder_layers:
            print("No Dense layers found in encoder or decoder.")
            return

        n_graphs = int(bool(encoder_layers)) + int(bool(decoder_layers))
        fig, axes = plt.subplots(1, n_graphs, figsize=(7 * n_graphs, 6), squeeze=False)

        col = 0
        if encoder_layers:
            input_labels = (
                self.y_labels
                if self.y_labels
                and len(self.y_labels) == encoder_layers[0].get_weights()[0].shape[0]
                else None
            )
            encoder_graph = build_graph(encoder_layers, "E", input_labels)
            draw_graph(encoder_graph, "Encoder", axes[0][col])
            col += 1

        if decoder_layers:
            decoder_graph = build_graph(decoder_layers, "D")
            draw_graph(decoder_graph, "Decoder", axes[0][col])

        fig.suptitle("Encoder & Decoder Dense Layer Graphs", fontsize=15)
        plt.tight_layout(rect=[0, 0, 1, 0.95])

        if save_path:
            plt.savefig(save_path)
        plt.show()

        if encoder_layers:
            weights = encoder_layers[0].get_weights()[0]
            importances = np.abs(weights).mean(axis=1)
            sorted_idx = np.argsort(-importances)
            xticks = [
                (
                    self.y_labels[i]
                    if self.y_labels and len(self.y_labels) == weights.shape[0]
                    else f"Input_{i}"
                )
                for i in sorted_idx
            ]

            plt.figure(figsize=(10, 4))
            plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
            plt.xticks(range(len(importances)), xticks, rotation=45, ha="right")
            plt.title("Feature Importances (Encoder Input Layer)", fontsize=13)
            plt.ylabel("Mean |Weight|")
            plt.tight_layout()
            plt.show()

    def predictor_analyzer(
        self,
        frac: float = None,
        cmap: str = "viridis",
        aspect: str = "auto",
        highlight: bool = True,
        **kwargs,
    ) -> None:
        """
        Analyze the model's predictions and visualize data.

        Parameters
        ----------
        frac : `float`, optional
            Fraction of data to use for analysis (default is `None`).
        cmap : `str`, optional
            The colormap for visualization (default is `"viridis"`).
        aspect : `str`, optional
            Aspect ratio for the visualization (default is `"auto"`).
        highlight : `bool`, optional
            Whether to highlight the maximum weights (default is `True`).
        **kwargs : `dict`, optional
            Additional keyword arguments for customization.

        Returns
        -------
        `DataFrame` : The statistical summary of the input data.
        """
        self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
        inputs = self.inputs.copy()
        inputs = self._prepare_inputs(inputs, frac)
        self.y_labels = kwargs.get("y_labels", None)
        encoded, reconstructed = self._encode_decode(inputs)
        self._visualize_data(inputs, reconstructed, cmap, aspect)
        self._prepare_data_for_analysis(inputs, reconstructed, encoded, self.y_labels)

        try:
            self._get_tsne_repr(inputs, frac)
            self._viz_tsne_repr(c=self.classification)

            self._viz_radviz(self.data, "class", "Radviz Visualization of Latent Space")
            self._viz_radviz(self.data_input, "class", "Radviz Visualization of Input Data")
        except ValueError:
            warnings.warn(
                "Some functions or processes will not be executed for regression problems.",
                UserWarning,
            )

        return self._statistics(self.data_input)

    def _prepare_inputs(self, inputs: np.ndarray, frac: float) -> np.ndarray:
        """
        Prepare the input data, possibly selecting a fraction of it.

        Parameters
        ----------
        inputs : `np.ndarray`
            The input data.
        frac : `float`
            Fraction of data to use.

        Returns
        -------
        `np.ndarray` : The prepared input data.
        """
        if frac:
            n = int(frac * self.inputs.shape[0])
            indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
            inputs = inputs[indexes]
        inputs[np.isnan(inputs)] = 0.0
        return inputs

    def _encode_decode(self, inputs: np.ndarray) -> tuple:
        """
        Perform encoding and decoding on the input data.

        Parameters
        ----------
        inputs : `np.ndarray`
            The input data.

        Returns
        -------
        `tuple` : The encoded and reconstructed data.
        """
        try:
            mean, log_var = self.model.encoder(inputs)
            encoded = sampling(mean, log_var)
        except:
            encoded = self.model.encoder(inputs)
        reconstructed = self.model.decoder(encoded)
        return encoded, reconstructed

    def _visualize_data(
        self, inputs: np.ndarray, reconstructed: np.ndarray, cmap: str, aspect: str
    ) -> None:
        """
        Visualize the original data and the reconstructed data.

        Parameters
        ----------
        inputs : `np.ndarray`
            The input data.
        reconstructed : `np.ndarray`
            The reconstructed data.
        cmap : `str`
            The colormap for visualization.
        aspect : `str`
            Aspect ratio for the visualization.

        Returns
        -------
        `None`
        """
        ax = plt.subplot(1, 2, 1)
        plt.imshow(inputs, cmap=cmap, aspect=aspect)
        plt.colorbar()
        plt.title("Original Data")

        plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
        plt.imshow(reconstructed, cmap=cmap, aspect=aspect)
        plt.colorbar()
        plt.title("Decoder Layer Reconstruction")
        plt.show()

    def _prepare_data_for_analysis(
        self,
        inputs: np.ndarray,
        reconstructed: np.ndarray,
        encoded: np.ndarray,
        y_labels: List[str],
    ) -> None:
        """
        Prepare data for statistical analysis.

        Parameters
        ----------
        inputs : `np.ndarray`
            The input data.
        reconstructed : `np.ndarray`
            The reconstructed data.
        encoded : `np.ndarray`
            The encoded data.
        y_labels : `List[str]`
            The labels of features.

        Returns
        -------
        `None`
        """
        self.classification = (
            self.model.classifier(tf.concat([reconstructed, encoded], axis=1))
            .numpy()
            .argmax(axis=1)
        )

        self.data = pd.DataFrame(encoded, columns=[f"Feature {i}" for i in range(encoded.shape[1])])
        self.data_input = pd.DataFrame(
            inputs,
            columns=(
                [f"Feature {i}" for i in range(inputs.shape[1])] if y_labels is None else y_labels
            ),
        )

        self.data["class"] = self.classification
        self.data_input["class"] = self.classification

    def _get_tsne_repr(self, inputs: np.ndarray = None, frac: float = None) -> None:
        """
        Perform t-SNE dimensionality reduction on the input data.

        Parameters
        ----------
        inputs : `np.ndarray`
            The input data.
        frac : `float`
            Fraction of data to use.

        Returns
        -------
        `None`
        """
        if inputs is None:
            inputs = self.inputs.copy()
            if frac:
                n = int(frac * self.inputs.shape[0])
                indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
                inputs = inputs[indexes]
            inputs[np.isnan(inputs)] = 0.0
        self.latent_representations = inputs @ self.encoder_weights

        tsne = TSNE(n_components=2)
        self.reduced_data_tsne = tsne.fit_transform(self.latent_representations)

    def _viz_tsne_repr(self, **kwargs) -> None:
        """
        Visualize the t-SNE representation of the latent space.

        Parameters
        ----------
        **kwargs : `dict`
            Additional keyword arguments for customization.

        Returns
        -------
        `None`
        """
        c = kwargs.get("c", None)
        self.colors = (
            kwargs.get("colors", self.sorted_names[: len(np.unique(c))]) if c is not None else None
        )

        plt.scatter(
            self.reduced_data_tsne[:, 0],
            self.reduced_data_tsne[:, 1],
            cmap=matplotlib.colors.ListedColormap(self.colors) if c is not None else None,
            c=c,
        )

        if c is not None:
            cb = plt.colorbar()
            loc = np.arange(0, max(c), max(c) / float(len(self.colors)))
            cb.set_ticks(loc)
            cb.set_ticklabels(np.unique(c))

        plt.title("t-SNE Visualization of Latent Space")
        plt.xlabel("t-SNE 1")
        plt.ylabel("t-SNE 2")
        plt.show()

    def _viz_radviz(self, data: pd.DataFrame, color_column: str, title: str) -> None:
        """
        Visualize the data using RadViz.

        Parameters
        ----------
        data : `pd.DataFrame`
            The data to visualize.
        color_column : `str`
            The column to use for coloring.
        title : `str`
            The title of the plot.

        Returns
        -------
        `None`
        """
        data_normalized = data.copy(deep=True)
        data_normalized.iloc[:, :-1] = (
            2.0
            * (data_normalized.iloc[:, :-1] - data_normalized.iloc[:, :-1].min())
            / (data_normalized.iloc[:, :-1].max() - data_normalized.iloc[:, :-1].min())
            - 1
        )
        radviz(data_normalized, color_column, color=self.colors)
        plt.title(title)
        plt.show()

    def _viz_weights(
        self, cmap: str = "viridis", aspect: str = "auto", highlight: bool = True, **kwargs
    ) -> None:
        """
        Visualize the encoder layer weights of the model.

        Parameters
        ----------
        cmap : `str`, optional
            The colormap for visualization (default is `"viridis"`).
        aspect : `str`, optional
            Aspect ratio for the visualization (default is `"auto"`).
        highlight : `bool`, optional
            Whether to highlight the maximum weights (default is `True`).
        **kwargs : `dict`, optional
            Additional keyword arguments for customization.

        Returns
        -------
        `None`
        """
        title = kwargs.get("title", "Encoder Layer Weights (Dense Layer)")
        y_labels = kwargs.get("y_labels", None)
        cmap_highlight = kwargs.get("cmap_highlight", "Pastel1")
        highlight_mask = np.zeros_like(self.encoder_weights, dtype=bool)

        plt.imshow(self.encoder_weights, cmap=cmap, aspect=aspect)
        plt.colorbar()
        plt.title(title)
        if y_labels is not None:
            plt.yticks(ticks=np.arange(self.encoder_weights.shape[0]), labels=y_labels)
        if highlight:
            for i, j in enumerate(self.encoder_weights.argmax(axis=1)):
                highlight_mask[i, j] = True
            plt.imshow(
                np.ma.masked_where(~highlight_mask, self.encoder_weights),
                cmap=cmap_highlight,
                alpha=0.5,
                aspect=aspect,
            )
        plt.show()

    def _statistics(self, data_input: DataFrame) -> DataFrame:
        """
        Compute statistical summaries of the input data.

        Parameters
        ----------
        data_input : `DataFrame`
            The data to compute statistics for.

        Returns
        -------
        `DataFrame` : The statistical summary of the input data.
        """
        data = data_input.copy(deep=True)

        if not pd.api.types.is_string_dtype(data["class"]):
            data["class"] = data["class"].astype(str)

        data.ffill(inplace=True)
        grouped_data = data.groupby("class")

        numerical_stats = grouped_data.agg(["mean", "min", "max", "std", "median"])
        numerical_stats.columns = ["_".join(col).strip() for col in numerical_stats.columns.values]

        def get_mode(x):
            mode_series = x.mode()
            return mode_series.iloc[0] if not mode_series.empty else None

        mode_stats = grouped_data.apply(get_mode, include_groups=False)
        mode_stats.columns = [f"{col}_mode" for col in mode_stats.columns]
        combined_stats = pd.concat([numerical_stats, mode_stats], axis=1)

        return combined_stats.T

A class to analyze the output of a neural network model, including visualizations of the weights, t-SNE representation, and feature statistics.

Parameters

model : AutoClassifier: The trained model to analyze.
inputs : np.ndarray: The input data for analysis.

Initializes the GetInsights class.

Parameters

model : AutoClassifier: The trained model to analyze.
inputs : np.ndarray: The input data for analysis.

Methods

def predictor_analyzer(self, frac: float = None, cmap: str = 'viridis', aspect: str = 'auto', highlight: bool = True, **kwargs) ‑> None

Expand source code

def predictor_analyzer(
    self,
    frac: float = None,
    cmap: str = "viridis",
    aspect: str = "auto",
    highlight: bool = True,
    **kwargs,
) -> None:
    """
    Analyze the model's predictions and visualize data.

    Parameters
    ----------
    frac : `float`, optional
        Fraction of data to use for analysis (default is `None`).
    cmap : `str`, optional
        The colormap for visualization (default is `"viridis"`).
    aspect : `str`, optional
        Aspect ratio for the visualization (default is `"auto"`).
    highlight : `bool`, optional
        Whether to highlight the maximum weights (default is `True`).
    **kwargs : `dict`, optional
        Additional keyword arguments for customization.

    Returns
    -------
    `DataFrame` : The statistical summary of the input data.
    """
    self._viz_weights(cmap=cmap, aspect=aspect, highlight=highlight, **kwargs)
    inputs = self.inputs.copy()
    inputs = self._prepare_inputs(inputs, frac)
    self.y_labels = kwargs.get("y_labels", None)
    encoded, reconstructed = self._encode_decode(inputs)
    self._visualize_data(inputs, reconstructed, cmap, aspect)
    self._prepare_data_for_analysis(inputs, reconstructed, encoded, self.y_labels)

    try:
        self._get_tsne_repr(inputs, frac)
        self._viz_tsne_repr(c=self.classification)

        self._viz_radviz(self.data, "class", "Radviz Visualization of Latent Space")
        self._viz_radviz(self.data_input, "class", "Radviz Visualization of Input Data")
    except ValueError:
        warnings.warn(
            "Some functions or processes will not be executed for regression problems.",
            UserWarning,
        )

    return self._statistics(self.data_input)

Analyze the model's predictions and visualize data.

Parameters

frac : float, optional: Fraction of data to use for analysis (default is None).
cmap : str, optional: The colormap for visualization (default is "viridis").
aspect : str, optional: Aspect ratio for the visualization (default is "auto").
highlight : bool, optional: Whether to highlight the maximum weights (default is True).
**kwargs : dict, optional: Additional keyword arguments for customization.

Returns

DataFrame : The statistical summary of the input data.

def render_html_report(self, frac: float = 0.2, top_k: int = 5, threshold_factor: float = 1.0, max_rows: int = 5, **kwargs) ‑> None

Expand source code

def render_html_report(
    self,
    frac: float = 0.2,
    top_k: int = 5,
    threshold_factor: float = 1.0,
    max_rows: int = 5,
    **kwargs,
) -> None:
    """
    Generate and display an embedded HTML report in a Jupyter Notebook cell.
    """
    display(HTML("<h2 style='margin-top:20px;'>📊 Predictor Analysis</h2>"))
    display(
        HTML(
            "<p>This section visualizes how the model predicts the data. "
            "You will see original inputs, reconstructed outputs, and analyses such as t-SNE "
            "that reduce dimensionality to visualize latent space clustering.</p>"
        )
    )
    stats_df = self.predictor_analyzer(frac=frac, **kwargs)

    display(HTML("<h2 style='margin-top:30px;'>🔁 Encoder-Decoder Graph</h2>"))
    display(
        HTML(
            "<p>This visualization displays the connections between layers in the encoder and decoder. "
            "Edges with the strongest weights are highlighted to emphasize influential features "
            "in the model's transformation.</p>"
        )
    )
    if not self.model.encoder.name.startswith("vae"):
        self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)

        display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
        display(
            HTML(
                "<p>This visualization shows how features propagate through each dense layer in the classifier. "
                "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
            )
        )
    self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)

    display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
    display(
        HTML(
            "<p>This table summarizes feature statistics grouped by predicted classes, "
            "including means, standard deviations, and modes, providing insight into "
            "feature distributions across different classes.</p>"
        )
    )

    if max_rows is not None and max_rows > 0:
        stats_to_display = stats_df.head(max_rows)
    else:
        stats_to_display = stats_df

    display(
        stats_to_display.style.set_table_attributes(
            "style='display:inline;border-collapse:collapse;'"
        )
        .set_caption("Feature Summary per Class")
        .set_properties(
            **{
                "border": "1px solid #ddd",
                "padding": "8px",
                "text-align": "center",
            }
        )
    )

    display(
        HTML(
            "<p style='color: gray; margin-top:30px;'>Report generated with "
            "<code>GetInsights</code> class. For detailed customization, extend "
            "<code>render_html_report</code>.</p>"
        )
    )

Generate and display an embedded HTML report in a Jupyter Notebook cell.

def viz_classifier_graphs(self, threshold_factor=1.0, top_k=5, save_path=None)

Expand source code

def viz_classifier_graphs(self, threshold_factor=1.0, top_k=5, save_path=None):
    """
    Visualize all Dense layers in self.model.classifier as a single directed graph,
    connecting each Dense layer to the next.
    """

    def get_top_k_edges(weights, src_prefix, dst_prefix, k):
        flat_weights = np.abs(weights.flatten())
        indices = np.argpartition(flat_weights, -k)[-k:]
        top_k_flat_indices = indices[np.argsort(-flat_weights[indices])]
        top_k_edges = []

        for flat_index in top_k_flat_indices:
            i, j = np.unravel_index(flat_index, weights.shape)
            top_k_edges.append((f"{src_prefix}_{i}", f"{dst_prefix}_{j}", weights[i, j]))
        return top_k_edges

    def add_dense_layer_edges(G, weights, layer_idx, threshold_factor, top_k):
        src_prefix = f"L{layer_idx}"
        dst_prefix = f"L{layer_idx + 1}"
        input_nodes = [f"{src_prefix}_{i}" for i in range(weights.shape[0])]
        output_nodes = [f"{dst_prefix}_{j}" for j in range(weights.shape[1])]

        G.add_nodes_from(input_nodes + output_nodes)

        abs_weights = np.abs(weights)
        threshold = threshold_factor * np.mean(abs_weights)
        top_k_edges = get_top_k_edges(weights, src_prefix, dst_prefix, top_k)
        top_k_set = set((u, v) for u, v, _ in top_k_edges)

        for i, src in enumerate(input_nodes):
            for j, dst in enumerate(output_nodes):
                w = weights[i, j]
                if abs(w) > threshold:
                    G.add_edge(src, dst, weight=w, highlight=(src, dst) in top_k_set)

    def compute_layout(G):
        pos = {}
        layer_nodes = {}

        for node in G.nodes():
            layer_idx = int(node.split("_")[0][1:])
            layer_nodes.setdefault(layer_idx, []).append(node)

        for layer_idx, nodes in sorted(layer_nodes.items()):
            y_positions = np.linspace(1, -1, len(nodes))
            for y, node in zip(y_positions, nodes):
                pos[node] = (layer_idx * 2, y)

        return pos

    def draw_graph(G, pos, title, save_path=None):
        weights = [abs(G[u][v]["weight"]) for u, v in G.edges()]
        if not weights:
            print("No edges to draw.")
            return

        norm = Normalize(vmin=min(weights), vmax=max(weights))
        cmap = cm.get_cmap("coolwarm")

        edge_colors = [cmap(norm(G[u][v]["weight"])) for u, v in G.edges()]
        edge_widths = [1.0 + 2.0 * norm(abs(G[u][v]["weight"])) for u, v in G.edges()]

        fig, ax = plt.subplots(figsize=(12, 8))

        nx.draw(
            G,
            pos,
            ax=ax,
            with_labels=True,
            node_color="lightgray",
            node_size=1000,
            font_size=8,
            edge_color=edge_colors,
            width=edge_widths,
            arrows=True,
        )

        ax.set_title(title, fontsize=14)

        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])
        plt.colorbar(sm, ax=ax, orientation="vertical", label="Edge Weight")

        plt.tight_layout()
        if save_path:
            plt.savefig(save_path)
        plt.show()

    dense_layers = [
        layer
        for layer in self.model.classifier.layers
        if isinstance(layer, tf.keras.layers.Dense)
    ]

    if len(dense_layers) < 1:
        print("No Dense layers found in classifier.")
        return

    G = nx.DiGraph()
    for idx, layer in enumerate(dense_layers):
        weights = layer.get_weights()[0]
        add_dense_layer_edges(G, weights, idx, threshold_factor, top_k)

    pos = compute_layout(G)
    draw_graph(G, pos, "Classifier Dense Layers Graph", save_path)

Visualize all Dense layers in self.model.classifier as a single directed graph, connecting each Dense layer to the next.

def viz_encoder_decoder_graphs(self, threshold_factor=1.0, top_k=5, save_path=None)

Expand source code

def viz_encoder_decoder_graphs(self, threshold_factor=1.0, top_k=5, save_path=None):
    """
    Visualize Dense layers in self.model.encoder and self.model.decoder as directed graphs.
    """

    def get_top_k_edges(weights, labels_src, labels_dst_prefix, k):
        flat_weights = np.abs(weights.flatten())
        indices = np.argpartition(flat_weights, -k)[-k:]
        top_k_flat_indices = indices[np.argsort(-flat_weights[indices])]
        top_k_edges = []
        for flat_index in top_k_flat_indices:
            i, j = np.unravel_index(flat_index, weights.shape)
            src_label = labels_src[i] if isinstance(labels_src, list) else f"{labels_src}_{i}"
            dst_label = f"{labels_dst_prefix}_{j}"
            top_k_edges.append((src_label, dst_label, weights[i, j]))
        return top_k_edges

    def add_layer_to_graph(
        G, weights, labels_src, labels_dst_prefix, x_offset, top_k_set, threshold
    ):
        output_nodes = [f"{labels_dst_prefix}_{j}" for j in range(weights.shape[1])]

        for node in labels_src + output_nodes:
            if node not in G:
                G.add_node(node, x=x_offset if node in labels_src else x_offset + 1)

        for i, src in enumerate(labels_src):
            for j, dst in enumerate(output_nodes):
                w = weights[i, j]
                if abs(w) > threshold:
                    G.add_edge(src, dst, weight=w, highlight=(src, dst) in top_k_set)
        return output_nodes

    def layout_graph(G):
        pos = {}
        layers = {}
        for node, data in G.nodes(data=True):
            x = data["x"]
            layers.setdefault(x, []).append(node)

        for x in sorted(layers):
            nodes = layers[x]
            y_positions = np.linspace(1, -1, len(nodes))
            for y, node in zip(y_positions, nodes):
                pos[node] = (x, y)
        return pos

    def draw_graph(G, title, ax):
        weights = [abs(G[u][v]["weight"]) for u, v in G.edges()]
        if not weights:
            return

        norm = Normalize(vmin=min(weights), vmax=max(weights))
        cmap = cm.get_cmap("coolwarm")

        edge_colors = [cmap(norm(G[u][v]["weight"])) for u, v in G.edges()]
        edge_widths = [1.0 + 2.0 * norm(abs(G[u][v]["weight"])) for u, v in G.edges()]

        pos = layout_graph(G)
        nx.draw(
            G,
            pos,
            ax=ax,
            with_labels=True,
            node_color="lightgray",
            node_size=1000,
            font_size=8,
            edge_color=edge_colors,
            width=edge_widths,
            arrows=True,
        )

        ax.set_title(title, fontsize=12)
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])
        plt.colorbar(sm, ax=ax, orientation="vertical", label="Edge Weight")

    def build_graph(layers, label_prefix, input_labels=None):
        G = nx.DiGraph()
        x_offset = 0
        prev_labels = input_labels or [
            f"{label_prefix}0_{i}" for i in range(layers[0].get_weights()[0].shape[0])
        ]

        for idx, layer in enumerate(layers):
            weights = layer.get_weights()[0]
            label = f"{label_prefix}{idx+1}"
            threshold = threshold_factor * np.mean(np.abs(weights))
            top_k_edges = get_top_k_edges(weights, prev_labels, label, top_k)
            top_k_set = set((src, dst) for src, dst, _ in top_k_edges)

            prev_labels = add_layer_to_graph(
                G, weights, prev_labels, label, x_offset, top_k_set, threshold
            )
            x_offset += 2

        return G

    encoder_layers = [
        l for l in self.model.encoder.layers if isinstance(l, tf.keras.layers.Dense)
    ]
    decoder_layers = [
        l for l in self.model.decoder.layers if isinstance(l, tf.keras.layers.Dense)
    ]

    if not encoder_layers and not decoder_layers:
        print("No Dense layers found in encoder or decoder.")
        return

    n_graphs = int(bool(encoder_layers)) + int(bool(decoder_layers))
    fig, axes = plt.subplots(1, n_graphs, figsize=(7 * n_graphs, 6), squeeze=False)

    col = 0
    if encoder_layers:
        input_labels = (
            self.y_labels
            if self.y_labels
            and len(self.y_labels) == encoder_layers[0].get_weights()[0].shape[0]
            else None
        )
        encoder_graph = build_graph(encoder_layers, "E", input_labels)
        draw_graph(encoder_graph, "Encoder", axes[0][col])
        col += 1

    if decoder_layers:
        decoder_graph = build_graph(decoder_layers, "D")
        draw_graph(decoder_graph, "Decoder", axes[0][col])

    fig.suptitle("Encoder & Decoder Dense Layer Graphs", fontsize=15)
    plt.tight_layout(rect=[0, 0, 1, 0.95])

    if save_path:
        plt.savefig(save_path)
    plt.show()

    if encoder_layers:
        weights = encoder_layers[0].get_weights()[0]
        importances = np.abs(weights).mean(axis=1)
        sorted_idx = np.argsort(-importances)
        xticks = [
            (
                self.y_labels[i]
                if self.y_labels and len(self.y_labels) == weights.shape[0]
                else f"Input_{i}"
            )
            for i in sorted_idx
        ]

        plt.figure(figsize=(10, 4))
        plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
        plt.xticks(range(len(importances)), xticks, rotation=45, ha="right")
        plt.title("Feature Importances (Encoder Input Layer)", fontsize=13)
        plt.ylabel("Mean |Weight|")
        plt.tight_layout()
        plt.show()

Visualize Dense layers in self.model.encoder and self.model.decoder as directed graphs.