Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.2] - 2026-05-09

### Fixed

- `mark_line`, `mark_trail`, and `mark_area` now connect points in tree
tip order regardless of other encodings on the chart. Previously, a
user chart with an explicit categorical color-scale `domain` rendered
with crisscrossing line segments because Vega-Lite's default
connection-order heuristic ignored the strain-axis sort. The package
now attaches a `calculate` transform that derives a per-row tip rank
and points the `order` channel at that field on these marks. Any
user-supplied `order` is left in place.
- When `color_tree_by` is used together with a user chart that has its
own `color` encoding (e.g. a titer plot colored by `cell_line`), the
user chart's marks no longer disappear. The concat container now
resolves the `color` scale as `independent` so the tree's
`color_value:N` scale (with its tree-specific domain) is not merged
with the user chart's color scale.

## [0.2.1] - 2026-05-06

### Fixed
Expand Down
17 changes: 13 additions & 4 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,12 @@ edge.

The H3N2 example above is rendered with `color_tree_by="subclade"`,
which colors the tree's branches and tip circles by the
`node_attrs.subclade` value at each node and adds a categorical legend
below the plot. See "Color the tree" below for the full set of options.
`node_attrs.subclade` value at each node. The package's default places
the categorical legend at the bottom of the combined plot; here we pass
`tree_color_legend_format={"orient": "left"}` to push it to the left
side instead, so it doesn't compete with the cohort-selection legend
already sitting below the chart. See "Color the tree" below for the
full set of options.

### Optional: connect leaders all the way to the labels

Expand Down Expand Up @@ -156,8 +160,11 @@ Colors match what you'd see on the Nextstrain view of the same tree —
either from the JSON's palette information when the build provides it,
or from the same default palette Auspice uses when it doesn't.
Categories are ordered by descending frequency in both cases. Missing
values render in gray, and the legend is drawn at the bottom of the
combined plot.
values render in gray. By default the legend is drawn at the bottom of
the combined plot; the H3N2 examples in this section pass
`tree_color_legend_format={"orient": "left"}` to move it to the left
instead, since the bottom edge already carries the cohort-selection
legend.

The example below colors the same H3N2 chart by genotype at HA1
site 158, which has two mutations in the tree (`N158K`, `N158D`) and
Expand Down Expand Up @@ -277,6 +284,7 @@ tree-annotated-plot \
--scale-bar \
--branch-length-units substitutions \
--color-tree-by subclade \
--tree-color-legend-format '{"orient":"left"}' \
--output examples/data/h3n2_combined.json
```

Expand All @@ -295,6 +303,7 @@ out = tree_annotated_plot.plot(
scale_bar=True,
branch_length_units="substitutions",
color_tree_by="subclade",
tree_color_legend_format={"orient": "left"},
)
```

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "tree-annotated-plot"
version = "0.2.1"
version = "0.2.2"
description = "Annotate the axis of an Altair / Vega-Lite plot with a phylogenetic tree."
readme = "README.md"
requires-python = ">=3.13"
Expand Down
6 changes: 6 additions & 0 deletions scripts/generate_docs_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,11 @@ def _render_kikawa() -> None:
# Color H3N2 by subclade so the docs SVG matches what users see
# on Nextstrain. The Auspice JSON's meta.colorings.subclade has
# no `scale` defined, so colors come from the default palette.
# Place the subclade legend on the left so it doesn't compete
# with the cohort-selection legend that already sits at the
# bottom of the chart.
plot_kwargs["color_tree_by"] = "subclade"
plot_kwargs["tree_color_legend_format"] = {"orient": "left"}
out = tree_annotated_plot.plot(
DATA_DIR / f"flu-seqneut-2025to2026_{subtype}.json",
chart,
Expand Down Expand Up @@ -155,6 +159,7 @@ def _render_kikawa() -> None:
strain_label_font_size=9,
shift_tree_loc=60,
color_tree_by="subclade",
tree_color_legend_format={"orient": "left"},
)
_save_pair(out, "h3n2_combined_label_connect")

Expand All @@ -180,6 +185,7 @@ def _render_kikawa() -> None:
scale_bar=True,
branch_length_units="substitutions",
color_tree_by="genotype:HA1:158",
tree_color_legend_format={"orient": "left"},
)
_save_pair(out, "h3n2_combined_genotype_158")

Expand Down
101 changes: 96 additions & 5 deletions src/tree_annotated_plot/_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,28 @@ def _build(
ch.axis = alt.Axis(labels=False, ticks=False, domain=False, title=None)
n_hits += 1
_check_walker_hits("strain-axis update", n_hits, len(axis_hits), axis)

# Pin line/trail/area connection order to tip order via Vega-Lite's
# `order` channel. Without this, an explicit categorical `color` scale
# `domain` (or other encoding choices) can shift Vega-Lite's
# connection-order heuristic away from the strain-axis sort, causing
# lines to crisscross. The order channel's `sort` only accepts
# ascending/descending, so we attach a `calculate` transform that
# computes a per-row tip rank and point `order` at that derived
# quantitative field. User-supplied `order` always wins.
rank_expr = (
f"indexof({json.dumps(list(tip_names))}, "
f"datum[{json.dumps(config.chart_strain_field)}])"
)
for node in _iter_connection_order_nodes(new_chart, config.chart_strain_field):
enc = _live_attr(node, "encoding")
if _live_attr(enc, "order") is not None:
continue
existing = _live_attr(node, "transform") or []
node.transform = list(existing) + [
{"calculate": rank_expr, "as": _TIP_ORDER_RANK_FIELD}
]
enc.order = alt.Order(f"{_TIP_ORDER_RANK_FIELD}:Q")
hoisted_config, hoisted_other = _pop_toplevel_only_attrs(new_chart)

combined = _concat_for_location(
Expand Down Expand Up @@ -276,22 +298,32 @@ def _concat_for_location(
user_chart: alt.TopLevelMixin,
location: TreeLocation,
) -> alt.HConcatChart | alt.VConcatChart:
"""Concat tree and chart in the order implied by the tree's location."""
"""Concat tree and chart in the order implied by the tree's location.

The strain axis is resolved independent so the tree and chart can use
different scales on that axis (the tree's branch length vs. the chart's
measurement value), while still sharing the orthogonal strain axis. The
`color` scale is also resolved independent: when ``color_tree_by`` is set
the tree panel emits a `color_value:N` color scale with a tree-specific
domain, and Vega-Lite's default of sharing color across concat views
would merge it with any color encoding on the user's chart, hiding
user-chart marks whose color values aren't in the tree's domain.
"""
if location == "left":
return alt.hconcat(tree_chart, user_chart, spacing=0).resolve_scale(
y="independent"
y="independent", color="independent"
)
if location == "right":
return alt.hconcat(user_chart, tree_chart, spacing=0).resolve_scale(
y="independent"
y="independent", color="independent"
)
if location == "top":
return alt.vconcat(tree_chart, user_chart, spacing=0).resolve_scale(
x="independent"
x="independent", color="independent"
)
if location == "bottom":
return alt.vconcat(user_chart, tree_chart, spacing=0).resolve_scale(
x="independent"
x="independent", color="independent"
)
raise ValueError(f"unreachable: tree_location={location!r}")

Expand Down Expand Up @@ -999,6 +1031,65 @@ def _live_attr(obj: Any, name: str) -> Any:
return v


# Vega-Lite marks whose drawing order along the discrete axis is determined
# by the order channel (rule 1 of the connection-order fallback chain). For
# these marks pinning the order channel to a per-row tip-order rank ensures
# the line / trail / area connects strains in tip order regardless of how
# Vega-Lite would otherwise resolve the fallback (e.g. when an explicit
# color-scale `domain` shifts it away from the y-axis sort).
_CONNECTION_ORDER_MARKS = frozenset({"line", "trail", "area"})

# Derived field name appended to each connection-order mark's transform
# pipeline. Chosen to be unlikely to collide with user data; private prefix
# makes the intent clear if anyone inspects the rendered spec.
_TIP_ORDER_RANK_FIELD = "_tap_strain_order_idx"


def _mark_type(node: Any) -> str | None:
"""Return the mark type string for a chart node, or None.

Handles altair's two mark forms: the plain string (e.g. ``"line"`` from
``chart.mark_line()``) and the ``MarkDef`` object (e.g. from
``chart.mark_line(point=True)``). Container nodes (LayerChart,
HConcatChart, etc.) have no mark and return None.
"""
mark = _live_attr(node, "mark")
if mark is None:
return None
if isinstance(mark, str):
return mark
type_ = _live_attr(mark, "type")
return type_ if isinstance(type_, str) else None


def _iter_connection_order_nodes(node: Any, chart_strain_field: str) -> Iterator[Any]:
"""Yield the live chart node for every node whose mark is in
``_CONNECTION_ORDER_MARKS`` and whose ``x``/``y`` strain encoding
matches ``chart_strain_field``.

Mirrors `_iter_strain_axis_channels`'s traversal (hconcat / vconcat /
concat / layer / spec descent). Yields the node itself (not just the
encoding) so the caller can attach both a ``calculate`` transform and
the ``order`` channel.
"""
if _mark_type(node) in _CONNECTION_ORDER_MARKS:
enc = _live_attr(node, "encoding")
if enc is not None:
for channel in ("x", "y"):
ch = _live_attr(enc, channel)
if ch is not None and _channel_field(ch) == chart_strain_field:
yield node
break
for attr in ("hconcat", "vconcat", "concat", "layer"):
sub = _live_attr(node, attr)
if isinstance(sub, list):
for s in sub:
yield from _iter_connection_order_nodes(s, chart_strain_field)
spec = _live_attr(node, "spec")
if spec is not None:
yield from _iter_connection_order_nodes(spec, chart_strain_field)


def _pop_toplevel_only_attrs(
chart: alt.TopLevelMixin,
) -> tuple[Any, dict]:
Expand Down
34 changes: 34 additions & 0 deletions tests/test_color_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,40 @@ def test_color_tree_by_legend_hides_internal_only_unknown():
assert "unknown" in enc["scale"]["domain"]


def test_color_tree_by_color_scale_independent_from_user_chart():
"""When the tree is colored, the concat container must resolve the color
scale as `independent` so the tree's `color_value:N` scale doesn't merge
with a user-chart color encoding (which would hide user marks whose
color values aren't in the tree's domain)."""
df = pd.DataFrame(
{
"strain": ["A", "B", "C", "D"] * 2,
"titer": [1.0, 2.0, 4.0, 8.0, 1.5, 2.5, 4.5, 8.5],
"cell_line": ["X", "X", "X", "X", "Y", "Y", "Y", "Y"],
}
)
user_chart = (
alt.Chart(df)
.mark_line(point=True)
.encode(
x="titer:Q",
y=alt.Y("strain:N"),
color=alt.Color("cell_line:N"),
)
.properties(width=200, height=200)
)
out = tree_annotated_plot.plot(
_attr_auspice(),
user_chart,
**_kw(),
color_tree_by="subclade",
)
spec = out.to_dict()
resolve = spec.get("resolve") or {}
scale = resolve.get("scale") or {}
assert scale.get("color") == "independent"


# -----------------------------------------------------------------------------
# CLI
# -----------------------------------------------------------------------------
Expand Down
Loading