Module `likelihood.tools.numeric_tools`

Functions

def ecprint(A: numpy.ndarray) ‑> None

Expand source code

def ecprint(A: ndarray) -> None:
    """Function that prints the augmented matrix.

    Parameters
    ----------
    A : `np.array`
        The augmented matrix.

    Returns
    -------
    `None`
        Prints the matrix to console.
    """

    n = len(A)
    for i in range(0, n):
        line = ""
    for j in range(0, n + 1):
        line += str(format(round(A[i][j], 2))) + "\t"
        if j == n - 1:
            line += "| "
        print(line)
    print()

Function that prints the augmented matrix.

Parameters

A : np.array: The augmented matrix.

Returns

None Prints the matrix to console.

def find_multiples(target: int) ‑> tuple[int, int] | None

Expand source code

def find_multiples(target: int) -> tuple[int, int] | None:
    """Find two factors of a given target number.

    Parameters
    ----------
    target : int
        The target number to find factors for.

    Returns
    -------
    tuple[int, int] | None
        If i and i+1 both divide target, returns (i, i+1).
        Otherwise, returns (i, target // i).
        Returns None if no factors are found.
    """
    for i in range(2, target + 1):
        if target % i == 0:
            if (i + 1) <= target and target % (i + 1) == 0:
                return i + 1, target // (i + 1)
            else:
                return i, target // i
    return None

Find two factors of a given target number.

Parameters

target : int: The target number to find factors for.

Returns

tuple[int, int] | None: If i and i+1 both divide target, returns (i, i+1). Otherwise, returns (i, target // i). Returns None if no factors are found.

def gauss_elimination(A: numpy.ndarray | list, pr: int = 2) ‑> numpy.ndarray

Expand source code

def gauss_elimination(A: ndarray | list, pr: int = 2) -> ndarray:
    """Computes the Gauss elimination algorithm.

    Parameters
    ----------
    A : `np.array` or `list`
        An array containing the parameters of the $n$ equations
        with the equalities.

    pr : `int`
        significant numbers of decimals.

    Returns
    -------
    X : `np.array`
        The solution of the system of $n$ equations

    """

    n = len(A)
    X = [0 for _ in range(n)]

    for i in range(n - 1):
        for p in range(i, n):
            if i <= p <= (n - 1) and A[p][i] != 0:
                if p != i:
                    A[p], A[i] = A[i], A[p]
                break
            elif p == (n - 1):
                print("There is no single solution")
                return None

        for j in range(i + 1, n):
            if i <= j <= n and A[j][i] != 0:
                if A[i][i] < A[j][i]:
                    A[j], A[i] = A[i], A[j]
                break

        for j in range(i + 1, n):
            if A[i][i] == 0:
                print("There is no single solution")
                return None
            factor = A[j][i] / A[i][i]
            A[j] = [A[j][k] - factor * A[i][k] for k in range(n + 1)]

    if A[n - 1][n - 1] == 0:
        print("There is no single solution")
        return None

    X[n - 1] = A[n - 1][n] / A[n - 1][n - 1]
    for i in range(n - 2, -1, -1):
        s = sum(A[i][j] * X[j] for j in range(i + 1, n))
        X[i] = (A[i][n] - s) / A[i][i]

    ecprint(A)
    print("The solution is:")
    for i in range(n):
        print(f"\tX{i} = {round(X[i], pr)}")

    return X

Computes the Gauss elimination algorithm.

Parameters

A : np.array or list: An array containing the parameters of the $n$ equations with the equalities.
pr : int: significant numbers of decimals.

Returns

X : np.array: The solution of the system of $n$ equations

def get_metrics(dataset, actual_column_name, predicted_column_name, verbose=False)

Expand source code

def get_metrics(dataset, actual_column_name, predicted_column_name, verbose=False):
    # Variables to keep track of the number of correct and total predictions
    true_positives = 0  # Correctly predicted positives
    true_negatives = 0  # Correctly predicted negatives
    false_positives = 0  # Negatives predicted as positives
    false_negatives = 0  # Positives predicted as negatives
    total_predictions = len(dataset)

    # Counters for actual and predicted classes
    actual_positive_count = 0
    actual_negative_count = 0
    predicted_positive_count = 0
    predicted_negative_count = 0

    for index, row in dataset.iterrows():
        actual_class = row[actual_column_name]
        predicted_class = row[predicted_column_name]

        # Update confusion matrix counts
        if actual_class == 1 and predicted_class == 1:  # True positive
            true_positives += 1
        elif actual_class == 0 and predicted_class == 0:  # True negative
            true_negatives += 1
        elif actual_class == 0 and predicted_class == 1:  # False positive
            false_positives += 1
        elif actual_class == 1 and predicted_class == 0:  # False negative
            false_negatives += 1

        # Update class counts
        if actual_class == 1:
            actual_positive_count += 1
        else:
            actual_negative_count += 1

        if predicted_class == 1:
            predicted_positive_count += 1
        else:
            predicted_negative_count += 1

    # Calculate accuracy
    accuracy = (true_positives + true_negatives) / total_predictions * 100

    # Calculate precision
    if true_positives + false_positives > 0:
        precision = true_positives / (true_positives + false_positives) * 100
    else:
        precision = 0  # Avoid division by zero

    # Calculate recall
    if true_positives + false_negatives > 0:
        recall = true_positives / (true_positives + false_negatives) * 100
    else:
        recall = 0  # Avoid division by zero

    # Calculate F1-Score
    if precision + recall > 0:
        f1_score = 2 * (precision * recall) / (precision + recall)
    else:
        f1_score = 0  # Avoid division by zero

    coeff_1 = (true_positives + false_positives) * (false_positives + true_negatives)
    coeff_2 = (true_positives + false_negatives) * (false_negatives + true_negatives)
    if coeff_1 + coeff_2 > 0:
        kappa = (
            2
            * (true_positives * true_negatives - false_negatives * false_positives)
            / (coeff_1 + coeff_2)
        )

    metrics = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score,
        "kappa": kappa,
    }

    if verbose:
        print(f"Accuracy: {accuracy:.2f}%")
        print(f"Precision: {precision:.2f}%")
        print(f"Recall: {recall:.2f}%")
        print(f"F1-Score: {f1_score:.2f}")
        print(f"Cohen's Kappa: {kappa:.4f}")
    else:
        return metrics

def sor_elimination(A: numpy.ndarray, b: numpy.ndarray, n: int, max_iterations: int, w: float, error: float = 0.001, verbose: bool = True) ‑> numpy.ndarray

Expand source code

def sor_elimination(
    A: ndarray,
    b: ndarray,
    n: int,
    max_iterations: int,
    w: float,
    error: float = 1e-3,
    verbose: bool = True,
) -> ndarray:
    """Computes the Successive Over-Relaxation algorithm.

    Parameters
    ----------
    A : `np.array`
        Coefficient matrix of the system of equations.
    b : `np.array`
        Right-hand side vector of the system of equations.
    n : `int`
        Dimension of the system of equations.
    max_iterations : `int`
        Maximum number of iterations allowed.
    w : `float`
        Relaxation parameter.
    error : `float`, optional
        Desired level of accuracy, default is 1e-3.
    verbose : `bool`, optional
        Whether to print intermediate results, default is False.

    Returns
    -------
    xi : `np.array`
        The solution of the system of equations.
    """
    xin = np.zeros(n)
    for k in range(max_iterations):
        xi = np.zeros(n)
        for i in range(n):
            s1 = np.dot(A[i, :i], xin[:i])
            s2 = np.dot(A[i, i + 1 :], xin[i + 1 :])
            xi[i] = (w / A[i, i]) * (b[i] - s1 - s2) + (1.0 - w) * xin[i]

        difference = np.max(np.abs(xi - xin))
        if verbose:
            print(f"Iteration {k + 1}: xi = {xi}, error = {difference}")
        if difference <= error:
            if verbose:
                print(f"Converged after {k + 1} iterations.")
            return xi
        xin = np.copy(xi)

    raise RuntimeError("Convergence not achieved within the maximum number of iterations.")

Computes the Successive Over-Relaxation algorithm.

Parameters

A : np.array: Coefficient matrix of the system of equations.
b : np.array: Right-hand side vector of the system of equations.
n : int: Dimension of the system of equations.
max_iterations : int: Maximum number of iterations allowed.
w : float: Relaxation parameter.
error : float, optional: Desired level of accuracy, default is 1e-3.
verbose : bool, optional: Whether to print intermediate results, default is False.

Returns

xi : np.array: The solution of the system of equations.

def xi_corr(df: pandas.core.frame.DataFrame) ‑> pandas.core.frame.DataFrame

Expand source code

def xi_corr(df: pd.DataFrame) -> pd.DataFrame:
    """Calculate new coefficient of correlation for all pairs of columns in a `DataFrame`.

    Parameters
    ----------
    df : `DataFrame`
        Input data containing the variables to be correlated.

    Returns
    -------
    `DataFrame`
        A square dataframe with variable names as both index and columns,
        containing their corresponding correlation coefficients.
    """

    columns = df.select_dtypes(include="number").columns
    n = len(columns)

    # Initialize a square matrix for the correlations
    correlations = pd.DataFrame(1.0, index=columns, columns=columns)

    for i, col1 in enumerate(columns):
        for j, col2 in enumerate(columns):
            if i < j:
                x = df[col1].values
                y = df[col2].values

                correlation = xicor(x, y)
                correlations.loc[col1, col2] = round(correlation, 8)
                correlations.loc[col2, col1] = round(correlation, 8)  # Mirror the correlation

    return correlations

Calculate new coefficient of correlation for all pairs of columns in a DataFrame.

Parameters

df : DataFrame: Input data containing the variables to be correlated.

Returns

DataFrame A square dataframe with variable names as both index and columns, containing their corresponding correlation coefficients.

def xicor(X: numpy.ndarray, Y: numpy.ndarray, ties: bool = True, random_seed: int = None) ‑> float

Expand source code

def xicor(X: np.ndarray, Y: np.ndarray, ties: bool = True, random_seed: int = None) -> float:
    """
    Calculate a generalized coefficient of correlation between two variables.

    This coefficient is an extension of Pearson's correlation, accounting for ties with optional randomization.

    Parameters
    ----------
    X : `np.ndarray`
        The first variable to be correlated. Must have at least one dimension.
    Y : `np.ndarray`
        The second variable to be correlated. Must have at least one dimension.
    ties : bool
        Whether to handle ties using randomization.
    random_seed : int, optional
        Seed for the random number generator for reproducibility.

    Returns
    -------
    xi : `float`
        The estimated value of the new coefficient of correlation.
    """

    # Early return for identical arrays
    if np.array_equal(X, Y):
        return 1.0

    n = len(X)

    # Early return for cases with less than 2 elements
    if n < 2:
        return 0.0

    # Flatten the input arrays if they are multidimensional
    X = X.flatten()
    Y = Y.flatten()

    # Get the sorted order of X
    order = np.argsort(X)

    if ties:
        np.random.seed(random_seed)  # Set seed for reproducibility if needed
        ranks = np.argsort(np.argsort(Y[order]))  # Get ranks
        unique_ranks, counts = np.unique(ranks, return_counts=True)

        # Adjust ranks for ties by shuffling
        for rank, count in zip(unique_ranks, counts):
            if count > 1:
                tie_indices = np.where(ranks == rank)[0]
                np.random.shuffle(ranks[tie_indices])  # Randomize ties

        cumulative_counts = np.array([np.sum(y >= Y[order]) for y in Y[order]])
        return 1 - n * np.sum(np.abs(ranks[1:] - ranks[: n - 1])) / (
            2 * np.sum(cumulative_counts * (n - cumulative_counts))
        )
    else:
        ranks = np.argsort(np.argsort(Y[order]))  # Get ranks without randomization
        return 1 - 3 * np.sum(np.abs(ranks[1:] - ranks[: n - 1])) / (n**2 - 1)

Calculate a generalized coefficient of correlation between two variables.

This coefficient is an extension of Pearson's correlation, accounting for ties with optional randomization.

Parameters

X : np.ndarray: The first variable to be correlated. Must have at least one dimension.
Y : np.ndarray: The second variable to be correlated. Must have at least one dimension.
ties : bool: Whether to handle ties using randomization.
random_seed : int, optional: Seed for the random number generator for reproducibility.

Returns

xi : float: The estimated value of the new coefficient of correlation.