from __future__ import annotations
from collections import defaultdict
from typing import TYPE_CHECKING, Literal
import pandas as pd
from passengersim.database import common_queries
from passengersim.reporting import report_figure
from .generic import (
DatabaseTableItem,
GenericSimulationTables,
SimulationTableItem,
)
from .tools import aggregate_by_concat_dataframe
if TYPE_CHECKING:
import altair as alt
from passengersim import Simulation
from . import SimulationTables
[docs]
def aggregate_carriers(summaries: list[SimulationTables]) -> pd.DataFrame | None:
"""Aggregate leg-level summaries."""
table_avg = []
for s in summaries:
frame = s._raw_carriers
if frame is not None:
# The keys in the set_index below represent qualitative string attributes that should be
# consistent across all the tables we are collecting to aggregate. We will group by these
# attributes and average the numeric values within each group, becase getting the
# "average" of these string attributes doesn't make sense numerically. But conceptually
# the average of strings that are all the same is just that string.
table_avg.append(frame.set_index(["control", "truncation_rule", "rm_system"], append=True))
n = len(table_avg)
while len(table_avg) > 1:
table_avg[0] = table_avg[0].add(table_avg.pop(1), fill_value=0)
if table_avg:
table_avg[0] /= n
return table_avg[0].reset_index(["control", "truncation_rule", "rm_system"])
return None
[docs]
class SimTabCarriers(GenericSimulationTables):
"""Container for summary tables and figures extracted from a Simulation.
This class is a subclass of GenericSimulationTables, which is defined in
the generic module. It lists the items that are available in the
SimulationTables class, and provides type hints and (optionally, but
ideally) documentation for the data that is stored in each item.
"""
carriers: pd.DataFrame = SimulationTableItem(
aggregation_func=aggregate_carriers,
extraction_func=extract_carriers,
computed_fields={
"avg_price": "avg_rev / avg_sold",
"yield": "avg_rev / rpm",
"rasm": "avg_rev / asm",
"sys_lf": "100.0 * rpm / asm",
"local_pct_leg_pax": "100.0 * avg_local_leg_pax / avg_total_leg_pax",
"local_pct_bookings": "100.0 * avg_local_leg_pax / avg_sold",
},
doc="Carrier-level summary data.",
)
carrier_history: pd.DataFrame | None = DatabaseTableItem(
aggregation_func=aggregate_by_concat_dataframe("carrier_history"),
query_func=common_queries.carrier_history,
doc="Carrier-level summary data from each sample.",
)
carrier_history2: pd.DataFrame | None = SimulationTableItem(
aggregation_func=aggregate_by_concat_dataframe("carrier_history2"),
extraction_func=extract_carrier_history2,
doc="Carrier-level summary data from each sample, new version with counters in CoreCarrier.",
)
forecast_accuracy: pd.DataFrame | None = SimulationTableItem(
aggregation_func=aggregate_by_concat_dataframe("forecast_accuracy"),
extraction_func=extract_forecast_accuracy,
doc="Summary of forecast history, based on UA's EDGAR approach",
)
def _fig_carrier_attribute(
self,
raw_df: bool,
load_measure: str,
measure_name: str,
measure_format: str = ".2f",
orient: Literal["h", "v"] = "h",
title: str | None = None,
also_df: bool = False,
) -> alt.Chart | pd.DataFrame | tuple[alt.Chart, pd.DataFrame]:
if "rm_system" not in self.carriers.columns:
# TODO: remove this once no longer using older cached data
df = self.carriers.reset_index()[["carrier", load_measure]].assign(rm_system="Unknown")
else:
df = self.carriers.reset_index()[["carrier", load_measure, "rm_system"]]
if raw_df:
return df
import altair as alt
chart = alt.Chart(df)
if orient == "v":
bars = chart.mark_bar().encode(
x=alt.X("carrier:N", title="Carrier"),
y=alt.Y(f"{load_measure}:Q", title=measure_name, axis=alt.Axis(format=measure_format)).stack("zero"),
color=alt.Color("rm_system:N", title="RM System"),
tooltip=[
alt.Tooltip("carrier", title="Carrier"),
alt.Tooltip("rm_system", title="RM System"),
alt.Tooltip(f"{load_measure}:Q", title=measure_name, format=measure_format),
],
)
text = chart.mark_text(dx=0, dy=3, color="white", baseline="top").encode(
x=alt.X("carrier:N", title="Carrier"),
y=alt.Y(f"{load_measure}:Q", title=measure_name, axis=alt.Axis(format=measure_format)).stack("zero"),
text=alt.Text(f"{load_measure}:Q", format=measure_format),
)
else:
bars = chart.mark_bar().encode(
y=alt.Y("carrier:N", title="Carrier"),
x=alt.X(f"{load_measure}:Q", title=measure_name, axis=alt.Axis(format=measure_format)).stack("zero"),
color=alt.Color("rm_system:N", title="RM System"),
tooltip=[
alt.Tooltip("carrier", title="Carrier"),
alt.Tooltip("rm_system", title="RM System"),
alt.Tooltip(f"{load_measure}:Q", title=measure_name, format=measure_format),
],
)
text = chart.mark_text(dx=-5, dy=0, color="white", baseline="middle", align="right").encode(
y=alt.Y("carrier:N", title="Carrier"),
x=alt.X(f"{load_measure}:Q", title=measure_name, axis=alt.Axis(format=measure_format)).stack("zero"),
text=alt.Text(f"{load_measure}:Q", format=measure_format),
)
fig = (
(bars + text)
.properties(
width=500,
height=10 + 20 * len(df),
)
.configure_axis(
labelFontSize=12,
titleFontSize=12,
)
.configure_legend(
titleFontSize=12,
labelFontSize=15,
)
)
if title:
fig.title = title
if also_df:
return fig, df
return fig
[docs]
@report_figure
def fig_carrier_load_factors(
self,
load_measure: Literal["sys_lf", "avg_leg_lf"] = "sys_lf",
*,
raw_df: bool = False,
also_df: bool = False,
title: str | None = "_default_",
):
measure_name = "System Load Factor" if load_measure == "sys_lf" else "Leg Load Factor"
return self._fig_carrier_attribute(
raw_df,
load_measure,
measure_name,
title=f"Carrier {measure_name}s" if title == "_default_" else title,
also_df=also_df,
)
[docs]
@report_figure
def fig_carrier_revenues(
self, *, raw_df: bool = False, also_df: bool = False, title: str | None = "Carrier Revenues"
):
return self._fig_carrier_attribute(raw_df, "avg_rev", "Average Revenue", "$.4s", title=title, also_df=also_df)
[docs]
@report_figure
def fig_carrier_yields(self, *, raw_df: bool = False, also_df: bool = False, title: str | None = "Carrier Yields"):
"""Generate a figure showing carrier yields.
Notes
-----
Yield is defined as revenue per revenue passenger-mile. It differs from
RASM (revenue per available seat mile) in that it only considers revenue
and miles from paying passengers, If a seat is flown empty, it does not
generate revenue or contribute to RPM, so it does not affect yield, but it
does reduce RASM since it contributes to ASM. Yield is often considered a
better measure of the price level that a carrier is achieving, while RASM
is a better measure of overall revenue efficiency. Both measures are useful
for understanding carrier performance, and they can sometimes move in
different directions, so it's helpful to look at both.
"""
return self._fig_carrier_attribute(raw_df, "yield", "Average Yield", "$.4f", title=title, also_df=also_df)
[docs]
@report_figure
def fig_carrier_rasm(
self,
*,
raw_df: bool = False,
also_df: bool = False,
title: str | None = "Carrier Revenue per Available Seat Mile (RASM)",
):
return self._fig_carrier_attribute(
raw_df,
"rasm",
"Revenue per Available Seat Mile",
"$.4f",
title=title,
also_df=also_df,
)
[docs]
@report_figure
def fig_carrier_total_bookings(
self: SimulationTables,
*,
raw_df: bool = False,
also_df: bool = False,
title: str | None = "Carrier Total Bookings",
):
return self._fig_carrier_attribute(raw_df, "avg_sold", "Total Bookings", ".4s", title=title, also_df=also_df)
[docs]
@report_figure
def fig_carrier_local_share(
self,
load_measure: Literal["bookings", "leg_pax"] = "bookings",
*,
raw_df: bool = False,
also_df: bool = False,
title: str | None = "_default_",
):
measure_name = "Local Percent of Bookings" if load_measure == "bookings" else "Local Percent of Leg Passengers"
m = "local_pct_bookings" if load_measure == "bookings" else "local_pct_leg_pax"
if title == "_default_":
title = f"Carrier {measure_name}"
return self._fig_carrier_attribute(raw_df, m, measure_name, title=title, also_df=also_df)
[docs]
@report_figure
def fig_carrier_mileage(
self, *, raw_df: bool = False, also_df: bool = False
) -> alt.Chart | pd.DataFrame | tuple[alt.Chart, pd.DataFrame]:
"""
Figure showing mileage by carrier.
ASM is available seat miles, and RPM is revenue passenger miles. Both
measures are reported as the average across all non-burned samples.
Parameters
----------
raw_df : bool, default False
Return the raw data for this figure as a pandas DataFrame, instead
of generating the figure itself.
report : xmle.Reporter, optional
Also append this figure to the given report.
trace : pd.ExcelWriter, optional
Also write the data from this figure to the given Excel file.
"""
df = (
self.carriers.reset_index()[["carrier", "asm", "rpm"]]
.set_index("carrier")
.rename_axis(columns="measure")
.unstack()
.to_frame("value")
.reset_index()
)
if raw_df:
return df
import altair as alt
chart = alt.Chart(df, title="Carrier Loads")
bars = chart.mark_bar().encode(
x=alt.X("carrier:N", title="Carrier"),
y=alt.Y("value", stack=None, title="miles"),
color="measure",
tooltip=["carrier", "measure", alt.Tooltip("value", format=".4s")],
)
text = chart.mark_text(
dx=0,
dy=5,
color="white",
baseline="top",
).encode(
x=alt.X("carrier:N"),
y=alt.Y("value").stack(None),
text=alt.Text("value:Q", format=".4s"),
)
fig = (
(bars + text)
.properties(
width=400,
height=300,
)
.configure_axis(
labelFontSize=12,
titleFontSize=12,
)
.configure_legend(
titleFontSize=12,
labelFontSize=15,
)
)
if also_df:
return fig, df
return fig
[docs]
def fig_carrier_revenue_distribution(self, *, raw_df=False, also_df=False):
"""Figure showing the distribution of carrier revenues.
Parameters
----------
raw_df : bool, default False
Return the raw data for this figure as a pandas DataFrame, instead
of generating the figure itself. This is not implemented yet and will
raise an error if set.
also_df: bool, default False
Return the raw data for this figure as a pandas DataFrame, in addition
to the figure itself. This is not implemented yet, and will be silently
ignored if set.
"""
if raw_df:
raise NotImplementedError("Raw data not available for this figure.")
import altair as alt
fig = (
alt.Chart(self.carrier_history2.reset_index())
.transform_density(
"revenue",
groupby=["carrier"],
as_=["revenue", "density"],
)
.mark_area()
.encode(
x=alt.X("revenue:Q", axis=alt.Axis(title="Revenue", format="$.3s")),
y=alt.Y("density:Q", title="Density", axis=alt.Axis(labels=False)),
color="carrier:N",
)
.facet(
"carrier:N",
title="Revenue Distribution by Carrier",
)
)
return fig
[docs]
def fig_carrier_head_to_head_revenue(
self, x_carrier: str, y_carrier: str, *, raw_df=False, mean_adjusted: bool = True
):
"""
Figure comparing carrier revenues head-to-head.
Parameters
----------
x_carrier, y_carrier : str
The carrier to plot on the x- and y-axis, respectively.
raw_df : bool, default False
Return the raw data for this figure as a pandas DataFrame, instead
of generating the figure itself.
mean_adjusted : bool, default True
If True, adjust revenues by dividing by the mean revenue for each carrier,
so that the plot shows percentage of mean revenue. If False, use raw
revenues, which is generally only useful for analyzing symmetric networks,
such as 3MKT.
Returns
-------
alt.Chart | pd.DataFrame
The Altair chart object, or the raw data as a pandas DataFrame
"""
import altair as alt
df1 = self.carrier_history2.query(f"carrier == '{x_carrier}'")
df2 = self.carrier_history2.query(f"carrier == '{y_carrier}'")
axis_label_text = "Percentage of Mean Revenue"
axis_format = "%"
if mean_adjusted:
df = pd.concat(
[
df1["revenue"] / df1["revenue"].mean(),
df2["revenue"] / df2["revenue"].mean(),
]
)
else:
df = pd.concat(
[
df1["revenue"],
df2["revenue"],
]
)
axis_label_text = "Revenue"
axis_format = "$.2s"
rng = df.min(), df.max()
df = df.unstack("carrier").reset_index()
if raw_df:
return df
diag = (
alt.Chart(pd.DataFrame({x_carrier: rng, "AL2": rng}))
.mark_line(color="red", opacity=0.3)
.encode(
x=x_carrier,
y="AL2",
)
)
fig = (
alt.Chart(df)
.mark_circle(opacity=0.3)
.encode(
x=alt.X(f"{x_carrier}:Q")
.axis(format=axis_format)
.scale(zero=False)
.title(f"{x_carrier} {axis_label_text}"),
y=alt.Y(f"{y_carrier}:Q")
.axis(format=axis_format)
.scale(zero=False)
.title(f"{y_carrier} {axis_label_text}"),
)
+ diag
)
return fig.interactive()