Module nkcs.plot

Plots experimental results.

Expand source code
#!/usr/bin/python3
#
# Copyright (C) 2019--2024 Richard Preen <rpreen@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

"""Plots experimental results."""

import logging
import os
from typing import Final, List

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

from .constants import Constants as Cons
from .perf import read_data

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("plot")

FILE_LIST: List[str] = []  # add data file names here (without .dat)

PLOT_BESTS: Final[bool] = True  #: whether to plot the the best fitnesses
PLOT_AVERAGES: Final[bool] = False  #: whether to plot the mean fitnesses
USE_TEX: Final[bool] = False  #: whether to use texlive for plot font
CONF: Final[float] = 1  #: 1.96 = 95% confidence; 1 = standard error
ALPHA: Final[float] = 0.3  #: transparency for shading confidence bounds
MS: Final[int] = 5  #: marker size
ME: Final[int] = 2  #: mark every
LW: Final[int] = 1  #: line width
NUM_COLORS: Final[int] = 10  #: number of line colours

if USE_TEX:
    plt.rc("font", **{"family": "serif", "serif": ["Palatino"]})
    params = {
        "text.usetex": True,
        "text.latex.preamble": r"\usepackage{amstext}",
    }
    plt.rcParams.update(params)


def get_title() -> str:
    """Return the title."""
    if USE_TEX:
        title = f"$N$={Cons.N} $K$={Cons.K}"
        if Cons.S > 1:
            title += f" $C$={Cons.C} $S$={Cons.S}"
    else:
        title = f"N={Cons.N} K={Cons.K}"
        if Cons.S > 1:
            title += f" C={Cons.C} S={Cons.S}"
    return title


def get_label(text: str) -> str:
    """Return the plot label."""
    label = Cons.ACQUISITION.upper()
    if Cons.ACQUISITION != "ea":
        label += "-" + Cons.MODEL.upper()
    label += " " + text
    return label


def plot(filenames: List[str], plotname: str) -> None:
    """Plot performance from multiple sets of runs."""
    fig = plt.figure(figsize=(6, 3))
    ax = fig.add_subplot(1, 1, 1)
    cm = plt.get_cmap("tab10")
    cycler = plt.cycler(color=[cm(i / NUM_COLORS) for i in range(NUM_COLORS)])
    cycler += plt.cycler(marker=["s", "o", "^", "x", "*", "+", "X"])
    ax.set_prop_cycle(cycler)
    for data in filenames:
        evals, perf_best, perf_avg = read_data(data)
        mean_best = np.mean(perf_best, axis=0)
        mean_avg = np.mean(perf_avg, axis=0)
        if PLOT_BESTS:
            ax.plot(
                evals,
                mean_best,
                linewidth=LW,
                markersize=MS,
                markevery=ME,
                label=get_label("best"),
            )
            ax.fill_between(
                evals,
                mean_best - (CONF * stats.sem(perf_best, axis=0)),
                mean_best + (CONF * stats.sem(perf_best, axis=0)),
                alpha=ALPHA,
            )
        if PLOT_AVERAGES:
            ax.plot(
                evals,
                mean_avg,
                linewidth=LW,
                markersize=MS,
                markevery=ME,
                label=get_label("avg"),
            )
            ax.fill_between(
                evals,
                mean_avg - (CONF * stats.sem(perf_avg, axis=0)),
                mean_avg + (CONF * stats.sem(perf_avg, axis=0)),
                alpha=ALPHA,
            )
    ax.grid(linestyle="dotted", linewidth=1)
    ax.set_xlim(xmin=0)
    ax.legend(loc="best", prop={"size": 10})
    plt.title(get_title(), fontsize=14)
    ax.set_xlabel("Evaluations", fontsize=12)
    ax.set_ylabel("Fitness", fontsize=12)
    path: Final[str] = os.path.normpath(f"res/{plotname}.pdf")
    fig.savefig(path, bbox_inches="tight")


def stat_summary(name: str, array: np.ndarray) -> None:
    """Print descriptive statistics summary of an array."""
    logger.info(
        f"{name}: "
        f"MEAN={np.mean(array, axis=0)},"
        f"SD={np.std(array, axis=0)},"
        f"SE={stats.sem(array, axis=0)},"
        f"N={len(array)},"
        f"MIN={np.min(array, axis=0)},"
        f"MEDIAN={np.median(array, axis=0)}"
    )


def stat(filename1: str, filename2: str, generation: int) -> None:
    """Compare the best individuals at a specified generation."""
    _, perf_best1, _ = read_data(filename1)
    _, perf_best2, _ = read_data(filename2)
    a: np.ndarray = perf_best1[:, generation]
    b: np.ndarray = perf_best2[:, generation]
    stat_summary("A", a)
    stat_summary("B", b)
    (s, p) = stats.ranksums(a, b)
    logger.info(f"Wilcoxon rank-sums: A vs. B: stat = {s:.5f}, p <= {p:.5f}\n")


# plots all experiments if this script is executed
if __name__ == "__main__":
    if len(FILE_LIST) > 0:
        plot(FILE_LIST, "plot")

Global variables

var ALPHA : Final[float]

transparency for shading confidence bounds

var CONF : Final[float]

1.96 = 95% confidence; 1 = standard error

var LW : Final[int]

line width

var ME : Final[int]

mark every

var MS : Final[int]

marker size

var NUM_COLORS : Final[int]

number of line colours

var PLOT_AVERAGES : Final[bool]

whether to plot the mean fitnesses

var PLOT_BESTS : Final[bool]

whether to plot the the best fitnesses

var USE_TEX : Final[bool]

whether to use texlive for plot font

Functions

def get_label(text: str) ‑> str

Return the plot label.

Expand source code
def get_label(text: str) -> str:
    """Return the plot label."""
    label = Cons.ACQUISITION.upper()
    if Cons.ACQUISITION != "ea":
        label += "-" + Cons.MODEL.upper()
    label += " " + text
    return label
def get_title() ‑> str

Return the title.

Expand source code
def get_title() -> str:
    """Return the title."""
    if USE_TEX:
        title = f"$N$={Cons.N} $K$={Cons.K}"
        if Cons.S > 1:
            title += f" $C$={Cons.C} $S$={Cons.S}"
    else:
        title = f"N={Cons.N} K={Cons.K}"
        if Cons.S > 1:
            title += f" C={Cons.C} S={Cons.S}"
    return title
def plot(filenames: List[str], plotname: str) ‑> None

Plot performance from multiple sets of runs.

Expand source code
def plot(filenames: List[str], plotname: str) -> None:
    """Plot performance from multiple sets of runs."""
    fig = plt.figure(figsize=(6, 3))
    ax = fig.add_subplot(1, 1, 1)
    cm = plt.get_cmap("tab10")
    cycler = plt.cycler(color=[cm(i / NUM_COLORS) for i in range(NUM_COLORS)])
    cycler += plt.cycler(marker=["s", "o", "^", "x", "*", "+", "X"])
    ax.set_prop_cycle(cycler)
    for data in filenames:
        evals, perf_best, perf_avg = read_data(data)
        mean_best = np.mean(perf_best, axis=0)
        mean_avg = np.mean(perf_avg, axis=0)
        if PLOT_BESTS:
            ax.plot(
                evals,
                mean_best,
                linewidth=LW,
                markersize=MS,
                markevery=ME,
                label=get_label("best"),
            )
            ax.fill_between(
                evals,
                mean_best - (CONF * stats.sem(perf_best, axis=0)),
                mean_best + (CONF * stats.sem(perf_best, axis=0)),
                alpha=ALPHA,
            )
        if PLOT_AVERAGES:
            ax.plot(
                evals,
                mean_avg,
                linewidth=LW,
                markersize=MS,
                markevery=ME,
                label=get_label("avg"),
            )
            ax.fill_between(
                evals,
                mean_avg - (CONF * stats.sem(perf_avg, axis=0)),
                mean_avg + (CONF * stats.sem(perf_avg, axis=0)),
                alpha=ALPHA,
            )
    ax.grid(linestyle="dotted", linewidth=1)
    ax.set_xlim(xmin=0)
    ax.legend(loc="best", prop={"size": 10})
    plt.title(get_title(), fontsize=14)
    ax.set_xlabel("Evaluations", fontsize=12)
    ax.set_ylabel("Fitness", fontsize=12)
    path: Final[str] = os.path.normpath(f"res/{plotname}.pdf")
    fig.savefig(path, bbox_inches="tight")
def stat(filename1: str, filename2: str, generation: int) ‑> None

Compare the best individuals at a specified generation.

Expand source code
def stat(filename1: str, filename2: str, generation: int) -> None:
    """Compare the best individuals at a specified generation."""
    _, perf_best1, _ = read_data(filename1)
    _, perf_best2, _ = read_data(filename2)
    a: np.ndarray = perf_best1[:, generation]
    b: np.ndarray = perf_best2[:, generation]
    stat_summary("A", a)
    stat_summary("B", b)
    (s, p) = stats.ranksums(a, b)
    logger.info(f"Wilcoxon rank-sums: A vs. B: stat = {s:.5f}, p <= {p:.5f}\n")
def stat_summary(name: str, array: numpy.ndarray) ‑> None

Print descriptive statistics summary of an array.

Expand source code
def stat_summary(name: str, array: np.ndarray) -> None:
    """Print descriptive statistics summary of an array."""
    logger.info(
        f"{name}: "
        f"MEAN={np.mean(array, axis=0)},"
        f"SD={np.std(array, axis=0)},"
        f"SE={stats.sem(array, axis=0)},"
        f"N={len(array)},"
        f"MIN={np.min(array, axis=0)},"
        f"MEDIAN={np.median(array, axis=0)}"
    )