Source code for passengersim.summaries.pathlegs

from __future__ import annotations

from typing import TYPE_CHECKING, Literal

import pandas as pd

from .generic import GenericSimulationTables, SimulationTableItem
from .pathclasses import SimTabPathClasses
from .tools import aggregate_by_first_dataframe

if TYPE_CHECKING:
    from numpy.typing import ArrayLike

    from passengersim import Simulation



[docs]
def extract_path_legs(sim: Simulation) -> pd.DataFrame:
    """
    Extract path_legs from a Simulation.

    This is a table that indicates which legs are included in each path.

    Parameters
    ----------
    sim : Simulation
        The Simulation object from which to extract the data.

    Returns
    -------
    pd.DataFrame
        A DataFrame with columns "path_id", "leg_id", and "path_seq".
    """
    path_legs = []
    for pth in sim.eng.paths:
        for n, leg_id in enumerate(pth.leg_ids):
            path_legs.append((pth.path_id, leg_id, n))
    path_legs = pd.DataFrame(path_legs, columns=["path_id", "leg_id", "path_seq"])
    return path_legs




[docs]
class SimTabPathLegs(GenericSimulationTables):
    """Container for summary tables and figures extracted from a Simulation.

    This class is a subclass of GenericSimulationTables, which is defined in
    the generic module.  It lists the items that are available in the
    SimulationTables class, and provides type hints and (optionally, but
    ideally) documentation for the data that is stored in each item.
    """

    path_legs: pd.DataFrame = SimulationTableItem(
        aggregation_func=aggregate_by_first_dataframe("path_legs"),
        extraction_func=extract_path_legs,
        doc="Legs on each path.",
    )

    pathclasses: pd.DataFrame = SimTabPathClasses.pathclasses


[docs]
    def select_leg_analysis(self, leg_id: int | ArrayLike[int]) -> dict[str, pd.DataFrame]:
        """
        Select path_legs for a specific leg.

        Parameters
        ----------
        leg_id : int
            The leg_id(s) to select.

        Returns
        -------
        dict[str, pd.DataFrame]
            Keys include "orig", "dest", and "booking_class".  Values
            are DataFrames with columns "gt_sold" and "gt_revenue".
        """
        if isinstance(leg_id, int):
            path_ids = self.path_legs.path_id[self.path_legs.leg_id == leg_id]
        else:
            path_ids = self.path_legs.path_id[self.path_legs.leg_id.isin(leg_id)]
        which_path = self.pathclasses.index.get_level_values("path_id").isin(path_ids)
        df = self.pathclasses.loc[which_path]
        result = {}
        for k in ["orig", "dest", "booking_class"]:
            result[k] = df.groupby(k)[["gt_sold", "gt_revenue"]].sum()
            result[k]["sold"] = result[k]["gt_sold"] / self.n_total_samples
            result[k]["revenue"] = result[k]["gt_revenue"] / self.n_total_samples
            result[k] = result[k].query("gt_sold > 0")
        return result



[docs]
    def fig_select_leg_analysis(
        self,
        leg_id: int | ArrayLike[int],
        metric: Literal["bookings", "revenue"] = "bookings",
        *,
        raw_input: dict[str, pd.DataFrame] = None,
        width: int = 300,
    ):
        """
        Origins, destinations, and booking classes for passengers on leg(s).

        Parameters
        ----------
        leg_id : int | ArrayLike[int]
            The leg_id(s) to select.
        metric : {"bookings", "revenue"}, default "bookings"
            The metric to display.
        raw_input : dict[str, pd.DataFrame], optional
            Precomputed raw input data from the select leg analysis method.
            If not provided, that method will be called to get the data.
        width : int, default 300
            The width of each chart panel.

        Returns
        -------
        alt.Chart
            An Altair chart object.
        """
        if isinstance(raw_input, dict):
            data = raw_input
        else:
            data = self.select_leg_analysis(leg_id)

        if isinstance(leg_id, int):
            leg_descrip = f"Leg Id {leg_id}"
        elif len(leg_id) == 1:
            leg_descrip = f"Leg Id {leg_id[0]}"
        elif len(leg_id) > 4:
            leg_descrip = f"{len(leg_id)} Selected Leg Ids"
        else:
            leg_descrip = f"Leg Ids {list(leg_id)}"

        import altair as alt

        if metric == "bookings":
            x = alt.X("sold")
        elif metric == "revenue":
            x = alt.X("revenue")
        else:
            raise ValueError(f"Unknown metric: {metric}")

        charts = []
        for k in ["orig", "dest"]:
            df = data[k]
            # for k, df in data.items():
            chart = (
                alt.Chart(df.reset_index(), width=width)
                .mark_bar()
                .encode(
                    x=x.title(k.replace("_", " ").title()),
                    color=alt.Color(k),
                    tooltip=[
                        alt.Tooltip(k, title=k.replace("_", " ").title()),
                        alt.Tooltip("sold", title="Bookings", format=".4s"),
                        alt.Tooltip("revenue", title="Revenue", format=".4s"),
                    ],
                )
            )
            charts.append(chart)

        orig_dest_chart = (
            alt.vconcat(*charts)
            # .resolve_scale(color="independent")
            # .properties(
            #     title={
            #         "text": [f"{metric.title()} on {leg_descrip}"],
            #     }
            # )
        )

        booking_class_chart = (
            alt.Chart(data["booking_class"].reset_index(), width=width)
            .mark_bar()
            .encode(
                x=x.title("Booking Class"),
                color=alt.Color(
                    "booking_class",
                    # legend=alt.Legend(orient="bottom")
                ),
                tooltip=[
                    alt.Tooltip("booking_class", title="Booking Class"),
                    alt.Tooltip("sold", title="Bookings", format=".4s"),
                    alt.Tooltip("revenue", title="Revenue", format=".4s"),
                ],
            )
        )

        try:
            return (
                alt.hconcat(orig_dest_chart, booking_class_chart)
                .resolve_scale(color="independent")
                .properties(
                    title={
                        "text": [f"{metric.title()} on {leg_descrip}"],
                    }
                )
            )
        except Exception as e:
            import sys

            print(e, file=sys.stderr)
            return [orig_dest_chart, booking_class_chart]



[docs]
    def connecting_paths_by_place(self) -> pd.Series:
        """
        Get the number of paths that connect in each place.

        The index of the result are the places that are layovers on one or more
        connecting paths. The values are the number of paths that connect in that
        place (i.e. the number of paths that have a leg with that place as the origin,
        but are not the first leg of the path).  The series is sorted in descending
        order of the number of connecting paths.

        Returns
        -------
        pandas.Series
        """
        if "path_seq" not in self.path_legs.columns:
            path_seq = self.path_legs.groupby("path_id").cumcount().rename("path_seq")
            df = pd.concat([self.path_legs, path_seq], axis=1)
        else:
            df = self.path_legs
        result = df[df.path_seq > 0].join(self.legs["orig"], on="leg_id", rsuffix="_leg").orig.value_counts()
        return result.rename_axis(index="place").rename("n_connecting_paths")