Source code for passengersim.summaries.segmentation_detail

from __future__ import annotations

from typing import TYPE_CHECKING

import pandas as pd

from passengersim.database import Database
from passengersim.reporting import report_figure

from .generic import DatabaseTableItem, GenericSimulationTables
from .tools import aggregate_by_averaging_dataframe

if TYPE_CHECKING:
    import altair as alt

    from passengersim.contrast import Contrast


[docs] def detailed_bookings_by_timeframe( cnx: Database, *, scenario: str | None = None, burn_samples: int = 100, ) -> pd.DataFrame: """ Average bookings by carrier, orig, dest, booking class, and timeframe. This query requires that the simulation was run while recording supporting details (i.e. with the `bookings` or `fare` flags set on `Config.db.write_items`). Parameters ---------- cnx : Database scenario : str burn_samples : int, default 100 The bookings will be computed ignoring this many samples from the beginning of each trial. This argument is nominally ignored by this query unless `from_fare_detail` is true, although the simulator will have already ignored the burned samples when storing the data in the bookings table. Returns ------- pandas.DataFrame The resulting dataframe is indexed by `trial`, `carrier`, `orig`, `dest`, `booking_class`, and `days_prior`, and has these columns: - `avg_sold`: Average number of sales. - `avg_business`: Average number of sales to passengers in the business segment. - `avg_leisure`: Average number of sales to leisure passengers. """ qry = """ SELECT trial, carrier, orig, dest, booking_class, days_prior, (AVG(sold)) AS avg_sold, (AVG(sold_business)) AS avg_business, (AVG(sold_leisure)) AS avg_leisure FROM (SELECT trial, scenario, carrier, orig, dest, booking_class, days_prior, SUM(sold) AS sold, SUM(sold_business) AS sold_business, SUM(sold - sold_business) AS sold_leisure FROM fare_detail LEFT JOIN fare_defs USING (fare_id) WHERE sample >= ?1 AND scenario = ?2 GROUP BY trial, sample, carrier, orig, dest, booking_class, days_prior) a GROUP BY carrier, orig, dest, booking_class, days_prior, trial ORDER BY carrier, orig, dest, booking_class, days_prior, trial; """ if scenario is None: qry = qry.replace("AND scenario = ?2", "") params = (burn_samples,) else: params = (burn_samples, scenario) return cnx.dataframe(qry, params).set_index(["trial", "carrier", "orig", "dest", "booking_class", "days_prior"])
[docs] class SimTabSegmentationDetail(GenericSimulationTables): """Container for summary tables and figures extracted from a Simulation. This class is a subclass of _Generic """ segmentation_detail: pd.DataFrame = DatabaseTableItem( aggregation_func=aggregate_by_averaging_dataframe("segmentation_detail"), query_func=detailed_bookings_by_timeframe, doc="Segmentation detail.", )
[docs] @report_figure def fig_segmentation_detail( self, *, by_carrier: bool | str = True, by_class: bool | str = False, orig: str | None = None, dest: str | None = None, raw_df: bool = False, also_df: bool = False, ) -> alt.Chart | pd.DataFrame | tuple[alt.Chart, pd.DataFrame]: """ Plot the segmentation detail data. Parameters ---------- by_carrier : bool or str, default True If True, group by carrier. If a string, filter by carrier. by_class : bool or str, default False If True, group by booking class. If a string, filter by booking class. orig : str, optional Filter by origin. dest : str, optional Filter by destination. raw_df : bool, default False If True, return the raw dataframe instead of the figure. also_df : bool, default False If True, return the dataframe as well as the figure. Returns ------- alt.Chart or pd.DataFrame or tuple[alt.Chart, pd.DataFrame] The segmentation detail figure or dataframe. """ if self.segmentation_detail is None: raise ValueError("segmentation_detail not found") metric = "bookings" df = self.segmentation_detail # convert to sales by timeframe df = df.groupby(df.index.names[:-1])[["avg_business", "avg_leisure"]].diff(-1) df = df.groupby(df.index.names[:-1])[["avg_business", "avg_leisure"]].shift() df = df.query("days_prior != 0") # rename columns df = df.rename(columns={"avg_business": "business", "avg_leisure": "leisure"}) df.columns.name = "segment" df = df.stack().rename("bookings").to_frame() if orig is not None: df = df.query("orig == @orig") df.index = df.index.droplevel("orig") else: gb = [i for i in df.index.names if i != "orig"] df = df.groupby(gb).sum() if dest is not None: df = df.query("dest == @dest") df.index = df.index.droplevel("dest") else: gb = [i for i in df.index.names if i != "dest"] df = df.groupby(gb).sum() idxs = list(df.index.names) if "trial" in idxs: idxs.remove("trial") df = df.groupby(idxs).mean() df = df.reset_index() title = "Detailed Segmentation by Timeframe" if orig and dest: title = f"{title} ({orig}~{dest})" elif orig and not dest: title = f"{title} (Orig={orig})" elif not orig and dest: title = f"{title} (Dest={dest})" title_annot = [] if not by_carrier: g = ["days_prior", "segment"] if by_class: g += ["booking_class"] df = df.groupby(g, observed=False)[[metric]].sum().reset_index() if by_carrier and not by_class: df = df.groupby(["carrier", "days_prior", "segment"], observed=False)[[metric]].sum().reset_index() if isinstance(by_carrier, str): df = df[df["carrier"] == by_carrier] df = df.drop(columns=["carrier"]) title_annot.append(by_carrier) by_carrier = False if isinstance(by_class, str): df = df[df["booking_class"] == by_class] df = df.drop(columns=["booking_class"]) title_annot.append(f"Class {by_class}") by_class = False if title_annot: title = f"{title} ({', '.join(title_annot)})" if raw_df: return df import altair as alt if by_carrier: color = "carrier:N" color_title = "Carrier" elif by_class: color = "booking_class:N" color_title = "Booking Class" else: color = "segment:N" color_title = "Passenger Type" if metric == "revenue": metric_fmt = "$,.0f" else: metric_fmt = ",.2f" chart = ( alt.Chart(df) .mark_bar() .encode( color=alt.Color(color).title(color_title), x=alt.X("days_prior:O").scale(reverse=True).title("Days Prior to Departure"), y=alt.Y(metric), tooltip=([alt.Tooltip("carrier").title("Carrier")] if by_carrier else []) + ([alt.Tooltip("booking_class").title("Booking Class")] if by_class else []) + [ alt.Tooltip("segment", title="Passenger Type"), alt.Tooltip("days_prior", title="Days Prior"), alt.Tooltip(metric, format=metric_fmt, title=metric.title()), ], ) .properties( width=500, height=200, ) ) if by_carrier or by_class: chart = chart.facet( row=alt.Row("segment:N", title="Passenger Type"), title=title, ) else: chart = chart.properties(title=title) if also_df: return chart, df return chart
# Figure for Contrast
[docs] def fig_segmentation_detail( summaries: Contrast, *, by_carrier: bool | str = True, by_class: bool | str = False, orig: str | None = None, dest: str | None = None, raw_df: bool = False, also_df: bool = False, width: int | None = 400, height: int | None = 180, ) -> alt.Chart | pd.DataFrame | tuple[alt.Chart, pd.DataFrame]: """ Plot the segmentation detail data. Parameters ---------- summaries : Contrast The contrast object containing the segmentation detail data. by_carrier : bool or str, default True If True, group by carrier. If a string, filter by carrier. by_class : bool or str, default False If True, group by booking class. If a string, filter by booking class. orig : str, optional Filter by origin. dest : str, optional Filter by destination. raw_df : bool, default False If True, return the raw dataframe instead of the figure. also_df : bool, default False If True, return the dataframe as well as the figure. width : int, optional The width of the figure. Default is 400. height : int, optional The height of the figure. Default is 180. Returns ------- alt.Chart or pd.DataFrame or tuple[alt.Chart, pd.DataFrame] The segmentation detail figure or dataframe. """ dfs = { k: v.fig_segmentation_detail( by_carrier=by_carrier, by_class=by_class, orig=orig, dest=dest, raw_df=True, ) for k, v in summaries.items() } df = pd.concat(dfs, names=["source"]).reset_index() if raw_df: return df metric = "bookings" if metric == "revenue": metric_fmt = "$,.0f" else: metric_fmt = ",.2f" title = "Detailed Segmentation by Timeframe" title_annot = [] if orig and dest: title_annot.append(f"{orig}~{dest}") elif orig and not dest: title_annot.append(f"Orig={orig}") elif not orig and dest: title_annot.append(f"Dest={dest}") if isinstance(by_carrier, str): # this filter is already applied in the query against each source # and can now be removed title_annot.append(by_carrier) by_carrier = False title = f"{title} ({', '.join(title_annot)})" if by_carrier: color = "carrier:N" color_title = "Carrier" elif by_class: color = "booking_class:N" color_title = "Booking Class" else: color = "segment:N" color_title = "Passenger Type" import altair as alt fig = ( alt.Chart(df) .mark_bar() .encode( xOffset=alt.XOffset("source:N"), x=alt.X("days_prior:O").scale(reverse=True).title("Days Prior to Departure"), y=alt.Y("bookings:Q").title("Bookings"), color=alt.Color(color).title(color_title), tooltip=[ alt.Tooltip("source", title="Source"), ] + ([alt.Tooltip("carrier").title("Carrier")] if by_carrier else []) + ([alt.Tooltip("booking_class").title("Booking Class")] if by_class else []) + [ alt.Tooltip("segment", title="Passenger Type"), alt.Tooltip("days_prior", title="Days Prior"), alt.Tooltip(metric, format=metric_fmt, title=metric.title()), ], ) ) if by_carrier or by_class: fig = fig.facet( row=alt.Row("segment:N", title="Passenger Type"), title=title, ) if width: fig.spec.width = width if height: fig.spec.height = height else: fig = fig.properties(title=title, width=width, height=height) if also_df: return fig, df return fig