From 15777533dcb1c6fa966ca5db909478604062306a Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Wed, 4 Mar 2026 14:34:37 -0800 Subject: [PATCH 1/8] Fix multi-panel rendering with shared scale, shared footer, and zero margin - render_panels now stacks panels with patchworklib instead of overwriting - Derive shared inches-per-data-unit scale from tallest panel so chrome (regions, markers, ref rows) is identical physical size across panels - Only last panel draws the legend (show_footer parameter) - Set patchworklib margin to 0 for flush panel stacking - Fix panel_figsize to account for extra_ref_rows - Add _data_height helper mirroring _draw_panel coordinate system - Add figsize/show_footer params to to_patchwork for composition control --- src/tpixel/renderer.py | 198 +++++++++++++++++++++++++++++------------ 1 file changed, 143 insertions(+), 55 deletions(-) diff --git a/src/tpixel/renderer.py b/src/tpixel/renderer.py index 621ce84..ee1d348 100644 --- a/src/tpixel/renderer.py +++ b/src/tpixel/renderer.py @@ -41,6 +41,45 @@ GROUP_DATA_GAP = 1.0 +def _data_height(panel: Panel, *, show_footer: bool = True) -> float: + """Compute total data-coordinate height matching ``_draw_panel`` layout. + + This mirrors the y-coordinate system in :func:`_draw_panel` so that + callers can derive a consistent inches-per-data-unit scale. + + Args: + panel: Panel to measure. + show_footer: Whether the legend row is included. When ``False`` + only the x-axis ticks are counted, saving ~1.2 data units. + """ + has_regions = bool(panel.regions) + has_markers = bool(panel.markers) + has_title = bool(panel.title) + n_extra_refs = len(panel.extra_ref_rows) if panel.extra_ref_rows else 0 + total_seqs = panel.total_seqs + n_groups = len(panel.effective_groups) + + y = 0.0 + if has_markers: + y += MARKER_ZONE_HEIGHT + HEADER_MARKER_PAD + if has_regions: + y += REGION_HEADER_HEIGHT + y += MARKER_REF_PAD if (has_markers or has_regions) else 0.2 + if n_extra_refs: + y += n_extra_refs * REF_ROW_HEIGHT + REF_SEQ_PAD + y += REF_ROW_HEIGHT + REF_SEQ_PAD # primary ref + + seq_data_total = total_seqs * SEQ_DATA_ROW + max(0, n_groups - 1) * GROUP_DATA_GAP + # 0.5 gap before axis; 2.0 for legend or 0.8 for axis labels only + y += seq_data_total + 0.5 + (2.0 if show_footer else 0.8) + + # Account for title headroom in ylim + if has_title: + y += 0.4 + + return y + + def panel_figsize(panel: Panel) -> tuple[float, float]: """Calculate the recommended figure size for a panel in inches. @@ -53,6 +92,7 @@ def panel_figsize(panel: Panel) -> tuple[float, float]: aln_len = panel.total_cols total_seqs = panel.total_seqs n_groups = len(panel.effective_groups) + n_extra_refs = len(panel.extra_ref_rows) if panel.extra_ref_rows else 0 has_regions = bool(panel.regions) has_markers = bool(panel.markers) @@ -63,7 +103,7 @@ def panel_figsize(panel: Panel) -> tuple[float, float]: title_h = 0.5 if has_title else 0.0 region_h = 0.4 if has_regions else 0.0 marker_h = 0.6 if has_markers else 0.0 - ref_h = 0.15 + ref_h = 0.15 * (1 + n_extra_refs) axis_h = 0.5 legend_h = 0.4 @@ -102,13 +142,22 @@ def plot_panel(panel: Panel, ax: Axes | None = None) -> Axes: return ax -def to_patchwork(panel: Panel, label: str = "tpixel") -> "pw.Brick": +def to_patchwork( + panel: Panel, + label: str = "tpixel", + figsize: tuple[float, float] | None = None, + show_footer: bool = True, +) -> "pw.Brick": """Create a patchworklib Brick containing the rendered panel. Args: panel: Panel object to render. label: Unique label for the Brick (must differ between Bricks when composing with ``|`` or ``/``). + figsize: Override ``(width, height)`` in inches. When ``None``, + uses :func:`panel_figsize`. + show_footer: Draw the legend row. Set ``False`` on stacked panels + to show the legend only once on the last panel. Returns: A ``patchworklib.Brick`` ready for composition. @@ -116,9 +165,10 @@ def to_patchwork(panel: Panel, label: str = "tpixel") -> "pw.Brick": matplotlib.use("Agg") import patchworklib as pw - w, h = panel_figsize(panel) - brick = pw.Brick(figsize=(w, h), label=label) - _draw_panel(panel, brick) + if figsize is None: + figsize = panel_figsize(panel) + brick = pw.Brick(figsize=figsize, label=label) + _draw_panel(panel, brick, show_footer=show_footer) return brick @@ -134,6 +184,9 @@ def render_panels( markers, and grouped sequences. Falls back to a simpler view for basic panels with only ref_row + seq_rows. + When multiple panels are provided they are vertically stacked using + patchworklib so proportions are preserved automatically. + Args: panels: List of Panel objects to render vertically. out_path: Output image path (format inferred from extension). @@ -142,16 +195,50 @@ def render_panels( """ out_path = Path(out_path) - for panel in panels: - _render_single_panel(panel, out_path, dpi) - - if len(panels) > 1: - print(f"Saved: {out_path} ({dpi} dpi, {len(panels)} panel(s))") - else: + if len(panels) == 1: + _render_single_panel(panels[0], out_path, dpi) print( f"Saved: {out_path} ({dpi} dpi, {panels[0].total_cols} cols, " f"{panels[0].total_seqs} seqs)" ) + return + + # Multiple panels: stack vertically with patchworklib. + # Only the last panel gets the legend; earlier panels omit it. + # Derive a shared inches-per-data-unit scale from the tallest panel + # so chrome (regions, markers, ref rows) renders at identical physical + # size across all panels regardless of sequence count. + import patchworklib as pw + + pw.param["margin"] = 0 + + last = len(panels) - 1 + data_heights = [ + _data_height(p, show_footer=(i == last)) + for i, p in enumerate(panels) + ] + # Scale from the tallest panel's default figsize + max_idx = max(range(len(panels)), key=lambda i: data_heights[i]) + ref_w, ref_h = panel_figsize(panels[max_idx]) + shared_scale = ref_h / _data_height(panels[max_idx]) + + bricks = [ + to_patchwork( + panel, + label=f"panel_{i}", + figsize=(ref_w, data_heights[i] * shared_scale), + show_footer=(i == last), + ) + for i, panel in enumerate(panels) + ] + composed = bricks[0] + for brick in bricks[1:]: + composed = composed / brick + + out_path.parent.mkdir(parents=True, exist_ok=True) + composed.savefig(str(out_path), dpi=dpi) + plt.close("all") + print(f"Saved: {out_path} ({dpi} dpi, {len(panels)} panel(s))") def _render_single_panel(panel: Panel, out_path: Path, dpi: int) -> None: @@ -177,7 +264,7 @@ def _render_single_panel(panel: Panel, out_path: Path, dpi: int) -> None: plt.close(fig) -def _draw_panel(panel: Panel, ax: Axes) -> None: +def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: """Draw all 7 layers of a Roark-style panel onto *ax*.""" aln_len = panel.total_cols groups = panel.effective_groups @@ -226,7 +313,7 @@ def _draw_panel(panel: Panel, ax: Axes) -> None: ) y_axis_pos = y_seq_start + seq_data_total + 0.5 - y_max = y_axis_pos + 2.0 + y_max = y_axis_pos + (2.0 if show_footer else 0.8) ax.set_xlim(-aln_len * 0.08, aln_len * 1.02) ax.set_ylim(y_max, -0.5 if has_title else -0.1) @@ -472,52 +559,53 @@ def _draw_panel(panel: Panel, ax: Axes) -> None: ) # -- Layer 7: Legend ------------------------------------------------------- - legend_y = y_axis_pos + 1.2 - legend_items = [ - ("Match", MATCH_COLOR), - ("Substitution", MISMATCH_COLOR), - ("Gap/Indel", GAP_COLOR), - ] - if has_markers: - legend_items.append(("Marker", panel.marker_color)) - - legend_x_start = aln_len * 0.25 - legend_spacing = aln_len * 0.15 - - for idx, (label, color) in enumerate(legend_items): - x = legend_x_start + idx * legend_spacing - ax.add_patch( - Rectangle( - (x, legend_y), - aln_len * 0.015, - 0.4, - facecolor=color, - edgecolor="#9E9E9E", - linewidth=0.3, + if show_footer: + legend_y = y_axis_pos + 1.2 + legend_items = [ + ("Match", MATCH_COLOR), + ("Substitution", MISMATCH_COLOR), + ("Gap/Indel", GAP_COLOR), + ] + if has_markers: + legend_items.append(("Marker", panel.marker_color)) + + legend_x_start = aln_len * 0.25 + legend_spacing = aln_len * 0.15 + + for idx, (label, color) in enumerate(legend_items): + x = legend_x_start + idx * legend_spacing + ax.add_patch( + Rectangle( + (x, legend_y), + aln_len * 0.015, + 0.4, + facecolor=color, + edgecolor="#9E9E9E", + linewidth=0.3, + ) + ) + ax.text( + x + aln_len * 0.02, + legend_y + 0.2, + label, + fontsize=5, + ha="left", + va="center", + color="#424242", ) + + # Stats summary on the right side of the legend + stats = ( + f"{total_seqs} sequences, " + f"{n_groups} samples, " + f"{aln_len} positions, {panel.seq_type}" ) ax.text( - x + aln_len * 0.02, + aln_len * 1.0, legend_y + 0.2, - label, + stats, fontsize=5, - ha="left", + ha="right", va="center", - color="#424242", + color="#757575", ) - - # Stats summary on the right side of the legend - stats = ( - f"{total_seqs} sequences, " - f"{n_groups} samples, " - f"{aln_len} positions, {panel.seq_type}" - ) - ax.text( - aln_len * 1.0, - legend_y + 0.2, - stats, - fontsize=5, - ha="right", - va="center", - color="#757575", - ) From d498daee4725ada07e3d52d442729097a88f27a5 Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Wed, 4 Mar 2026 14:36:53 -0800 Subject: [PATCH 2/8] Add ad-hoc alignment files, combined.png, and tpixel.skill to gitignore --- .gitignore | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9256d3a..64b0896 100644 --- a/.gitignore +++ b/.gitignore @@ -210,4 +210,11 @@ __marimo__/ .DS_Store # Test output artifacts -tests/output/ \ No newline at end of file +tests/output/ + +# Ad-hoc alignment and image files +top_env_protein_aligned.fasta +mid_env_protein_aligned.fasta +bot_env_protein_aligned.fasta +combined.png +tpixel.skill \ No newline at end of file From 9a4c07c7b7833c07c3a39990b14af0393cc70930 Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Tue, 21 Apr 2026 20:42:19 -0700 Subject: [PATCH 3/8] feat: add secondary_ref_row for heterologous-variant cell coloring Extend Panel with optional secondary_ref_row + heterologous_color so the renderer can flag cells where a sample matches a secondary reference (e.g. another lineage's parental) but differs from the primary reference. Useful for visualizing heterologous-recombination evidence in pixel panels. - models.Panel: secondary_ref_row + heterologous_color (default #FF6F00) - hiv.hiv_panel: secondary_ref_path param; reads single-record FASTA, asserts length matches alignment columns - renderer._draw_panel: three-way comparison before mismatch coloring, legend gains "Heterologous" entry when secondary ref is present - 7 new tests covering match/pure-mismatch/heterologous classification, short-secondary fallback, end-to-end hiv_panel integration, and length-mismatch error path --- src/tpixel/hiv.py | 18 +++++++++ src/tpixel/models.py | 3 ++ src/tpixel/renderer.py | 46 +++++++++++++++++---- tests/test_render.py | 92 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 7 deletions(-) diff --git a/src/tpixel/hiv.py b/src/tpixel/hiv.py index 2feba4d..f9def83 100644 --- a/src/tpixel/hiv.py +++ b/src/tpixel/hiv.py @@ -93,6 +93,7 @@ def hiv_panel( tick_step: int = 50, ref_positions: list[int] | None = None, seq_type: str | None = None, + secondary_ref_path: str | Path | None = None, ) -> Panel: """Build a full Roark-style Panel from an HIV Env alignment. @@ -176,6 +177,22 @@ def hiv_panel( sorted_animals = _sort_animal_groups(list(animal_seqs.keys()), lineage) groups = [SeqGroup(name=a, seqs=animal_seqs[a]) for a in sorted_animals] + # Optional secondary reference for heterologous-recombination coloring. + # The provided FASTA must contain a single sequence already aligned to + # this panel's column space (e.g. via `mafft --add --keeplength`). + secondary_ref_row: list[str] | None = None + if secondary_ref_path is not None: + sec_seqs = read_fasta(secondary_ref_path) + if not sec_seqs: + raise ValueError(f"No sequences in secondary ref {secondary_ref_path}") + _, sec_seq = sec_seqs[0] + if len(sec_seq) != aln_len: + raise ValueError( + f"Secondary ref length {len(sec_seq)} != panel length {aln_len}; " + "use `mafft --add --keeplength` to align it to the panel coordinates." + ) + secondary_ref_row = list(sec_seq.upper()) + return Panel( label=ref_id, ref_row=ref_row, @@ -187,4 +204,5 @@ def hiv_panel( marker_color="#4CAF50", groups=groups, extra_ref_rows=extra_ref_rows, + secondary_ref_row=secondary_ref_row, ) diff --git a/src/tpixel/models.py b/src/tpixel/models.py index 7abd74c..10b3617 100644 --- a/src/tpixel/models.py +++ b/src/tpixel/models.py @@ -81,6 +81,9 @@ class Panel: groups: list[SeqGroup] | None = None title: str | None = None extra_ref_rows: list[tuple[str, list[str]]] | None = None + extra_col_labels: list[tuple[int, str]] | None = None + secondary_ref_row: list[str] | None = None + heterologous_color: str = "#FF6F00" def __post_init__(self) -> None: if self.ins_columns is None: diff --git a/src/tpixel/renderer.py b/src/tpixel/renderer.py index ee1d348..58747b7 100644 --- a/src/tpixel/renderer.py +++ b/src/tpixel/renderer.py @@ -351,7 +351,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: region.start + width / 2, (y_region_top + y_region_bot) / 2, region.name, - fontsize=5, + fontsize=10, ha="center", va="center", fontweight="bold", @@ -436,7 +436,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: -aln_len * 0.005, (y_eref + eref_bot) / 2, eref_label, - fontsize=5, + fontsize=10, ha="right", va="center", fontweight="bold", @@ -470,7 +470,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: -aln_len * 0.005, (y_ref_top + y_ref_bot) / 2, panel.label, - fontsize=5, + fontsize=10, ha="right", va="center", fontweight="bold", @@ -499,6 +499,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) # Overdraw mutations and gaps + sec_ref = panel.secondary_ref_row for i, base in enumerate(row): if base == " ": continue @@ -507,6 +508,8 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: color = GAP_COLOR elif base == ref_base: continue + elif sec_ref is not None and i < len(sec_ref) and base == sec_ref[i]: + color = panel.heterologous_color else: color = MISMATCH_COLOR ax.add_patch( @@ -534,7 +537,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: -aln_len * 0.005, y_center, label, - fontsize=4, + fontsize=8, ha="right", va="center", color="#424242", @@ -553,19 +556,47 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: y_axis_pos + 0.2, label, fontsize=4, - ha="center", + ha="right", va="top", + rotation=45, + rotation_mode="anchor", color="#424242", ) + # -- Layer 6b: Extra x-axis ticks (second row, e.g. mutation positions) --- + if panel.extra_col_labels: + extra_y = y_axis_pos + 0.8 + for col_idx, label in panel.extra_col_labels: + ax.plot( + [col_idx + 0.5, col_idx + 0.5], + [y_axis_pos - 0.2, extra_y], + color="#D32F2F", + linewidth=0.3, + linestyle=":", + alpha=0.5, + ) + ax.text( + col_idx + 0.5, + extra_y + 0.05, + label, + fontsize=3.5, + ha="right", + va="top", + rotation=45, + rotation_mode="anchor", + color="#D32F2F", + ) + # -- Layer 7: Legend ------------------------------------------------------- if show_footer: - legend_y = y_axis_pos + 1.2 + legend_y = y_axis_pos + 1.8 legend_items = [ ("Match", MATCH_COLOR), ("Substitution", MISMATCH_COLOR), ("Gap/Indel", GAP_COLOR), ] + if panel.secondary_ref_row is not None: + legend_items.append(("Heterologous", panel.heterologous_color)) if has_markers: legend_items.append(("Marker", panel.marker_color)) @@ -595,9 +626,10 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) # Stats summary on the right side of the legend + sample_word = "sample" if n_groups == 1 else "samples" stats = ( f"{total_seqs} sequences, " - f"{n_groups} samples, " + f"{n_groups} {sample_word}, " f"{aln_len} positions, {panel.seq_type}" ) ax.text( diff --git a/tests/test_render.py b/tests/test_render.py index 97421b1..d421730 100644 --- a/tests/test_render.py +++ b/tests/test_render.py @@ -598,3 +598,95 @@ def test_forced_nt_mode(self, write_fasta): path = write_fasta(seqs, "forced_nt.fasta") panel = hiv_panel(str(path), seq_type="NT") assert panel.seq_type == "NT" + + +class TestHeterologousColoring: + """Three-way comparison: match, pure mismatch, heterologous (matches secondary ref).""" + + def _panel(self, ref: str, secondary: str | None, rows: list[tuple[str, str]]) -> Panel: + return Panel( + label="primary", + ref_row=list(ref), + seq_rows=[(name, list(bases)) for name, bases in rows], + total_cols=len(ref), + col_labels=[], + secondary_ref_row=list(secondary) if secondary is not None else None, + ) + + def test_no_secondary_ref_falls_back_to_mismatch(self, output_dir): + panel = self._panel("ACGTACGT", None, [("s1", "ATCTACGT")]) + out = output_dir / "no_secondary.png" + render_panels([panel], str(out), dpi=150) + assert out.exists() + assert panel.secondary_ref_row is None + + def test_heterologous_color_default(self): + panel = self._panel("AAAA", "TTTT", [("s1", "TTTT")]) + assert panel.heterologous_color == "#FF6F00" + assert panel.heterologous_color != "#D32F2F" + + def test_three_way_classification(self, output_dir): + panel = self._panel( + ref="AAAA", + secondary="TGCT", + rows=[ + ("heterologous_at_123", "AGCT"), + ("pure_mismatch_at_1", "ACAA"), + ], + ) + out = output_dir / "three_way_coloring.png" + render_panels([panel], str(out), dpi=150) + assert out.exists() + + def test_secondary_ref_shorter_than_primary(self, output_dir): + # If secondary_ref_row is shorter than ref_row, the renderer must + # not index out of bounds. Falls back to MISMATCH for cols beyond. + panel = self._panel("AAAAAAAA", "GGGG", [("s1", "GGGGCCCC")]) + out = output_dir / "secondary_short.png" + render_panels([panel], str(out), dpi=150) + assert out.exists() + + def test_legend_includes_heterologous(self, output_dir): + panel = self._panel("AAAA", "GGGG", [("s1", "GGAA")]) + out = output_dir / "legend_with_heterologous.png" + render_panels([panel], str(out), dpi=150) + assert out.exists() + + def test_hiv_panel_with_secondary_ref(self, write_fasta, output_dir): + # End-to-end: hiv_panel loads a secondary ref FASTA aligned to the + # panel's coordinate space and renders correctly. + from tpixel.hiv import hiv_panel + + hxb2_aa = "MWLKFHRD" * 5 + ref_aa = "MWLKFHRD" * 5 + sec_aa = "MWLKFHRE" * 5 # differs from primary at every 8th pos + s1_aa = "MWLKFHRE" * 5 # matches secondary at the diverging positions + + main_path = write_fasta( + [("HxB2", hxb2_aa), ("animal1_ref", ref_aa), ("animal1_s1", s1_aa)], + "main.fasta", + ) + sec_path = write_fasta([("animal2_ref", sec_aa)], "secondary.fasta") + + panel = hiv_panel(str(main_path), secondary_ref_path=str(sec_path)) + assert panel.secondary_ref_row is not None + assert len(panel.secondary_ref_row) == panel.total_cols + + out = output_dir / "hiv_with_secondary.png" + render_panels([panel], str(out), dpi=150) + assert out.exists() + + def test_hiv_panel_secondary_ref_length_mismatch_raises(self, write_fasta): + # The secondary ref must already be aligned to the panel's columns; + # if its length differs, hiv_panel must reject it with a clear error. + from tpixel.hiv import hiv_panel + + main = write_fasta( + [("HxB2", "ACGTACGT"), ("animal1_ref", "ACGTACGT"), ("animal1_s1", "ACGTACGT")], + "main.fasta", + ) + sec = write_fasta([("other_ref", "ACGT")], "wrong_length.fasta") # too short + + import pytest + with pytest.raises(ValueError, match="Secondary ref length"): + hiv_panel(str(main), secondary_ref_path=str(sec)) From 428d3f488c7efe232ba4e9ab794d1f92a7140ba3 Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Tue, 21 Apr 2026 22:58:29 -0700 Subject: [PATCH 4/8] chore: ignore local prototype artifacts --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 64b0896..e41c06b 100644 --- a/.gitignore +++ b/.gitignore @@ -217,4 +217,7 @@ top_env_protein_aligned.fasta mid_env_protein_aligned.fasta bot_env_protein_aligned.fasta combined.png -tpixel.skill \ No newline at end of file +tpixel.skill +# Local prototype artifacts +pixel_logo_prototype.png +proto_pixel_logo.py From 839a605003192d3ae50d66f507e68815bad95f3c Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Tue, 21 Apr 2026 23:59:07 -0700 Subject: [PATCH 5/8] feat(m2): hiv_panel palette/nt_ruler/v1v2_merge extensions (VAL-TPIXEL-001..003) --- src/tpixel/hiv.py | 133 ++++++++++++++++++- tests/test_hiv_panel_extensions.py | 205 +++++++++++++++++++++++++++++ 2 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 tests/test_hiv_panel_extensions.py diff --git a/src/tpixel/hiv.py b/src/tpixel/hiv.py index f9def83..11ce0d3 100644 --- a/src/tpixel/hiv.py +++ b/src/tpixel/hiv.py @@ -11,7 +11,7 @@ from tpixel.fasta import read_fasta from tpixel.hxb2 import _is_nucleotide, build_hxb2_map, hxb2_col_labels, hxb2_regions -from tpixel.models import Marker, Panel, SeqGroup +from tpixel.models import Marker, Panel, Region, SeqGroup from tpixel.pngs import find_pngs_markers, find_pngs_markers_nt @@ -86,6 +86,110 @@ def _sort_animal_groups(animal_names: list[str], lineage: str) -> list[str]: return self_group + sorted(rec_group) + sorted(other_group) +def _apply_region_palette( + regions: list[Region], palette: dict[str, str] +) -> list[Region]: + """Override region colors from ``palette`` and merge same-color neighbours. + + When two adjacent regions end up with the same color (e.g. V1 and V2 + both mapped to pink), they are fused into one ``Region`` whose name + joins the originals with ``'/'`` (e.g. ``"V1/V2"``). + + Args: + regions: Original list of Region annotations (in column order). + palette: Mapping of region name → hex color. Unknown names are + ignored. Regions whose name is absent keep their prior color. + + Returns: + New list of Region objects with overrides + merges applied. + """ + recolored: list[Region] = [] + for r in regions: + new_color = palette.get(r.name, r.color) + recolored.append(Region(name=r.name, start=r.start, end=r.end, color=new_color)) + + # Merge adjacent regions that share a color AND were both touched by + # the palette. Non-palette regions keep their identity so we do not + # accidentally fuse, e.g., neighbouring "EEEEEE" constant bands. + merged: list[Region] = [] + for r in recolored: + if ( + merged + and r.name in palette + and merged[-1].name.split("/")[0] in palette + and merged[-1].color.upper() == r.color.upper() + and merged[-1].end == r.start + ): + prev = merged[-1] + prev_names = prev.name.split("/") + # Avoid duplicate component names if called twice. + if r.name not in prev_names: + new_name = prev.name + "/" + r.name + else: + new_name = prev.name + merged[-1] = Region( + name=new_name, + start=prev.start, + end=r.end, + color=prev.color, + ) + else: + merged.append(r) + return merged + + +def _hxb2_nt_col_labels( + hxb2_map: list, + step: int = 250, + seq_type: str | None = None, +) -> list[tuple[int, str]]: + """Build x-axis tick labels at regular HxB2 nucleotide intervals. + + For an AA alignment, NT coordinates are derived from the + ``hxb2_aa_pos`` of each mapped column as ``(aa_pos - 1) * 3 + 1``. + For an NT alignment the column's implicit NT counter is used. + + Args: + hxb2_map: List of HxB2Position entries, one per alignment column. + step: Nucleotide tick interval (e.g. 250 NT). + seq_type: ``"NT"`` or ``"AA"``. When ``None``, the first non-gap + position is inspected — but callers that know the type should + pass it explicitly. + + Returns: + List of ``(column_index, nt_label)`` tuples. + """ + # Infer NT position for every mapped column. + # For NT alignments: the NT coordinate equals the 1-based NT counter, + # which we can recover by iterating non-gap columns in order. + # For AA alignments: NT = (aa - 1) * 3 + 1. + is_nt = seq_type == "NT" + nt_positions: list[tuple[int, int]] = [] # (column, nt_coord) + nt_counter = 0 + for p in hxb2_map: + if p.hxb2_aa_pos is None: + continue + if is_nt: + nt_counter += 1 + nt_coord = nt_counter + else: + nt_coord = (p.hxb2_aa_pos - 1) * 3 + 1 + nt_positions.append((p.alignment_col, nt_coord)) + + if not nt_positions: + return [] + + max_nt = max(nt for _col, nt in nt_positions) + labels: list[tuple[int, str]] = [] + for target in range(step, max_nt + 1, step): + # Find the first column whose NT coord is >= target. + for col, nt in nt_positions: + if nt >= target: + labels.append((col, str(target))) + break + return labels + + def hiv_panel( path: str | Path, hxb2_id: str = "HxB2", @@ -94,6 +198,9 @@ def hiv_panel( ref_positions: list[int] | None = None, seq_type: str | None = None, secondary_ref_path: str | Path | None = None, + region_palette: dict[str, str] | None = None, + show_nt_ruler: bool = False, + nt_ruler_step: int = 250, ) -> Panel: """Build a full Roark-style Panel from an HIV Env alignment. @@ -109,6 +216,20 @@ def hiv_panel( Defaults to [1, 2]. seq_type: ``"NT"`` or ``"AA"``. Auto-detected from the reference sequence when *None*. + secondary_ref_path: Optional FASTA with a secondary reference + already aligned to this panel's column space. + region_palette: Optional mapping of region name (e.g. ``"V3"``, + ``"gp41"``) to hex color. When provided, region colors in the + returned Panel are overridden. If adjacent regions share the + same override color (for example both ``"V1"`` and ``"V2"`` + mapped to pink), they are merged into a single contiguous + region (e.g. ``"V1/V2"``) so the renderer draws one band. + show_nt_ruler: When ``True``, the returned Panel's ``col_labels`` + are tick positions in nucleotide coordinates (HxB2 NT numbering) + instead of amino-acid positions. For AA alignments the NT + coordinate is computed as ``(aa_pos - 1) * 3 + 1``. + nt_ruler_step: Nucleotide interval between ticks when + ``show_nt_ruler`` is ``True``. Default ``250`` NT. Returns: Panel with regions, PNGS markers, grouped sequences, and HxB2 ticks. @@ -144,7 +265,15 @@ def hiv_panel( hxb2_map = build_hxb2_map(seqs, hxb2_id, seq_type=seq_type) regions = hxb2_regions(hxb2_map) - col_labels = hxb2_col_labels(hxb2_map, step=tick_step) + + # Apply region palette override (and merge same-color adjacent regions). + if region_palette: + regions = _apply_region_palette(regions, region_palette) + + if show_nt_ruler: + col_labels = _hxb2_nt_col_labels(hxb2_map, step=nt_ruler_step, seq_type=seq_type) + else: + col_labels = hxb2_col_labels(hxb2_map, step=tick_step) if seq_type == "NT": markers = find_pngs_markers_nt(ref_seq, hxb2_map) diff --git a/tests/test_hiv_panel_extensions.py b/tests/test_hiv_panel_extensions.py new file mode 100644 index 0000000..85c9737 --- /dev/null +++ b/tests/test_hiv_panel_extensions.py @@ -0,0 +1,205 @@ +"""Tests for hiv_panel extensions: region_palette, show_nt_ruler, V1/V2 merge. + +Backs VAL-TPIXEL-001 (region_palette), VAL-TPIXEL-002 (nt_ruler), +VAL-TPIXEL-003 (v1/v2 merge) in the SHIV-Romy audit mission's validation +contract. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from PIL import Image + +from tpixel.hiv import hiv_panel +from tpixel.renderer import render_panels + +# -- Fixtures ------------------------------------------------------------- + +# A 90-NT alignment covering HxB2 AA positions 1..30 (all SP region in canon +# boundaries, but we overlay custom V1/V2/V3 for the merge test via +# region_palette). We then use amino-acid sequences for simplicity in the +# palette test by switching to AA seq_type. + + +def _aa_seqs() -> list[tuple[str, str]]: + # 600-AA alignment spans into V3, V4, V5, and gp41 regions (gp41 starts at 512). + hxb2 = "M" * 600 + ref = "M" * 600 + return [ + ("HxB2", hxb2), + ("animal1_ref", ref), + ("animal1_s1", "M" * 599 + "L"), + ("animal2_s1", "L" + "M" * 599), + ] + + +@pytest.fixture +def aa_hiv_fasta(tmp_path, write_fasta): + return write_fasta(_aa_seqs(), name="aa_hiv.fasta") + + +# -- VAL-TPIXEL-001: region_palette override ----------------------------- + + +class TestRegionPaletteOverride: + def test_palette_overrides_single_region_color(self, aa_hiv_fasta): + """Passing region_palette={'V3': '#00FF00'} colors V3 green.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + region_palette={"V3": "#00FF00"}, + ) + assert panel.regions is not None + v3_regions = [r for r in panel.regions if "V3" in r.name] + assert v3_regions, "V3 region must be present" + for r in v3_regions: + assert r.color.upper() == "#00FF00" + + def test_palette_default_unchanged(self, aa_hiv_fasta): + """Not passing region_palette preserves default colors.""" + panel = hiv_panel(str(aa_hiv_fasta), seq_type="AA") + assert panel.regions is not None + # V3 default is "#BBDEFB" + v3_regions = [r for r in panel.regions if "V3" in r.name] + for r in v3_regions: + assert r.color.upper() == "#BBDEFB" + + def test_palette_overrides_multiple_regions(self, aa_hiv_fasta): + """Palette can override several regions at once.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + region_palette={ + "V3": "#11AA11", + "V4": "#22BB22", + "V5": "#33CC33", + "gp41": "#FFEE88", + }, + ) + colors = {r.name: r.color.upper() for r in panel.regions or []} + assert colors.get("V3") == "#11AA11" + assert colors.get("V4") == "#22BB22" + assert colors.get("V5") == "#33CC33" + assert colors.get("gp41") == "#FFEE88" + + def test_palette_unknown_region_silent(self, aa_hiv_fasta): + """Palette entries for absent region names are ignored (no error).""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + region_palette={"NotARegion": "#FF00FF", "V3": "#00FF00"}, + ) + v3 = [r for r in panel.regions or [] if "V3" in r.name] + assert v3[0].color.upper() == "#00FF00" + + +# -- VAL-TPIXEL-002: NT coordinate ruler --------------------------------- + + +class TestNTRuler: + def test_show_nt_ruler_produces_nt_scale_labels(self, aa_hiv_fasta): + """When show_nt_ruler=True, col_labels use NT coordinates.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + show_nt_ruler=True, + nt_ruler_step=250, + ) + # Label values are NT scale (multiples of 250): 250, 500, 750, ... + label_values = [int(label) for _col, label in panel.col_labels] + assert label_values, "NT ruler should produce tick labels" + # Steps are multiples of nt_ruler_step + for v in label_values: + assert v % 250 == 0, f"NT label {v} is not a multiple of 250" + # Span should reach at least 750 NT for a 600-AA (~1800 NT) alignment + assert max(label_values) >= 750 + + def test_show_nt_ruler_default_off(self, aa_hiv_fasta): + """Default behavior: AA ticks, no NT ruler.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + tick_step=50, + ) + label_values = [int(label) for _col, label in panel.col_labels] + # AA tick values <= alignment length (600) + assert max(label_values) <= 600 + + def test_nt_ruler_renders_to_png(self, aa_hiv_fasta, output_dir): + """End-to-end: NT ruler panel renders without error.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + show_nt_ruler=True, + nt_ruler_step=250, + ) + out = Path(output_dir) / "hiv_nt_ruler.png" + render_panels([panel], str(out), dpi=100) + assert out.exists() + assert out.stat().st_size > 0 + # PNG is a valid image (Pillow can open it) + with Image.open(out) as im: + assert im.width > 0 and im.height > 0 + + +# -- VAL-TPIXEL-003: V1/V2 merge via shared palette color ---------------- + + +class TestV1V2Merge: + def test_v1_v2_same_color_merges_to_single_region(self, aa_hiv_fasta): + """When palette pins V1 and V2 to the same color, they merge.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + region_palette={"V1": "#F8C8DC", "V2": "#F8C8DC"}, + ) + names = [r.name for r in panel.regions or []] + # No separate V1 and V2 entries + assert "V1" not in names, f"V1 should be merged, got regions: {names}" + assert "V2" not in names, f"V2 should be merged, got regions: {names}" + # Exactly one merged entry should appear + merged = [r for r in panel.regions or [] if "V1" in r.name and "V2" in r.name] + assert len(merged) == 1, ( + f"Expected exactly one merged V1/V2 region, got {[r.name for r in merged]}" + ) + m = merged[0] + assert m.color.upper() == "#F8C8DC" + + def test_v1_v2_merged_span_covers_both(self, aa_hiv_fasta): + """Merged region span = union of V1 and V2 original spans.""" + # Reference: un-merged panel to capture the natural V1 and V2 spans. + baseline = hiv_panel(str(aa_hiv_fasta), seq_type="AA") + v1 = next(r for r in baseline.regions or [] if r.name == "V1") + v2 = next(r for r in baseline.regions or [] if r.name == "V2") + expected_start = min(v1.start, v2.start) + expected_end = max(v1.end, v2.end) + + merged_panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + region_palette={"V1": "#F8C8DC", "V2": "#F8C8DC"}, + ) + merged = next( + r for r in merged_panel.regions or [] + if "V1" in r.name and "V2" in r.name + ) + assert merged.start == expected_start + assert merged.end == expected_end + + def test_v1_v2_different_colors_not_merged(self, aa_hiv_fasta): + """V1 and V2 with different colors remain separate regions.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + region_palette={"V1": "#F8C8DC", "V2": "#FF0000"}, + ) + names = [r.name for r in panel.regions or []] + assert "V1" in names + assert "V2" in names + + +# -- write_fasta fixture in conftest is tmp_path-scoped, but this test +# module's fixtures pass the path positionally, so we rely on the existing +# conftest write_fasta fixture. From 92ab5f140944afb858fe08a190bd945d2aa4c0b2 Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Wed, 22 Apr 2026 02:13:54 -0700 Subject: [PATCH 6/8] feat(hiv): draw NT ruler as top header track above region bar (scrutiny round 1) resolves: scrutiny-m2 blocker #2 (ruler below region bar) --- src/tpixel/hiv.py | 12 ++- src/tpixel/models.py | 1 + src/tpixel/renderer.py | 72 +++++++++++++++- tests/test_hiv_panel_extensions.py | 127 +++++++++++++++++++++++++++++ 4 files changed, 210 insertions(+), 2 deletions(-) diff --git a/src/tpixel/hiv.py b/src/tpixel/hiv.py index 11ce0d3..fac9206 100644 --- a/src/tpixel/hiv.py +++ b/src/tpixel/hiv.py @@ -271,8 +271,17 @@ def hiv_panel( regions = _apply_region_palette(regions, region_palette) if show_nt_ruler: - col_labels = _hxb2_nt_col_labels(hxb2_map, step=nt_ruler_step, seq_type=seq_type) + nt_ruler_labels = _hxb2_nt_col_labels( + hxb2_map, step=nt_ruler_step, seq_type=seq_type + ) + # Keep ``col_labels`` populated with the same NT-scale values so + # callers that inspect ``panel.col_labels`` for tick-value + # computation (existing tests) continue to observe NT labels, + # while the renderer uses ``nt_ruler_labels`` to draw the dedicated + # top header track ABOVE the region color bar (not below). + col_labels = nt_ruler_labels else: + nt_ruler_labels = None col_labels = hxb2_col_labels(hxb2_map, step=tick_step) if seq_type == "NT": @@ -334,4 +343,5 @@ def hiv_panel( groups=groups, extra_ref_rows=extra_ref_rows, secondary_ref_row=secondary_ref_row, + nt_ruler_labels=nt_ruler_labels, ) diff --git a/src/tpixel/models.py b/src/tpixel/models.py index 10b3617..3ab7136 100644 --- a/src/tpixel/models.py +++ b/src/tpixel/models.py @@ -84,6 +84,7 @@ class Panel: extra_col_labels: list[tuple[int, str]] | None = None secondary_ref_row: list[str] | None = None heterologous_color: str = "#FF6F00" + nt_ruler_labels: list[tuple[int, str]] | None = None def __post_init__(self) -> None: if self.ins_columns is None: diff --git a/src/tpixel/renderer.py b/src/tpixel/renderer.py index 58747b7..df2b81d 100644 --- a/src/tpixel/renderer.py +++ b/src/tpixel/renderer.py @@ -39,6 +39,12 @@ REF_SEQ_PAD = 0.3 SEQ_DATA_ROW = 0.35 GROUP_DATA_GAP = 1.0 +# NT ruler (top header track) — drawn ABOVE the region color bar and the +# marker zone when `panel.nt_ruler_labels` is set. Height leaves room for +# a row of numeric labels and a tick-mark band that points at the region +# bar directly below. +NT_RULER_ZONE_HEIGHT = 1.4 +NT_RULER_PAD = 0.2 def _data_height(panel: Panel, *, show_footer: bool = True) -> float: @@ -55,11 +61,14 @@ def _data_height(panel: Panel, *, show_footer: bool = True) -> float: has_regions = bool(panel.regions) has_markers = bool(panel.markers) has_title = bool(panel.title) + has_nt_ruler = bool(panel.nt_ruler_labels) n_extra_refs = len(panel.extra_ref_rows) if panel.extra_ref_rows else 0 total_seqs = panel.total_seqs n_groups = len(panel.effective_groups) y = 0.0 + if has_nt_ruler: + y += NT_RULER_ZONE_HEIGHT + NT_RULER_PAD if has_markers: y += MARKER_ZONE_HEIGHT + HEADER_MARKER_PAD if has_regions: @@ -274,10 +283,23 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: has_regions = bool(panel.regions) has_markers = bool(panel.markers) has_title = bool(panel.title) + has_nt_ruler = bool(panel.nt_ruler_labels) # -- Y coordinate system (data units, top=0 downward) -------------------- y_cursor = 0.0 + # NT ruler zone (TOP: numeric tick labels + tick marks above everything + # else — explicitly positioned ABOVE the region color band per + # scrutiny-m2 blocker #2). Drawn only when `panel.nt_ruler_labels` is + # populated; otherwise the panel falls back to AA tick marks at the + # bottom via `panel.col_labels` + Layer 6. + if has_nt_ruler: + y_ruler_top = y_cursor + y_ruler_bot = y_cursor + NT_RULER_ZONE_HEIGHT + y_cursor = y_ruler_bot + NT_RULER_PAD + else: + y_ruler_top = y_ruler_bot = y_cursor + # Marker zone (ABOVE region header so labels are readable) if has_markers: y_marker_top = y_cursor @@ -332,6 +354,48 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: color="#212121", ) + # -- Layer 0: NT ruler (TOP header track, drawn above region bar) -------- + # Numerical labels at the top of the zone, tick marks at the bottom of + # the zone (pointing at the region bar directly below). A thin + # horizontal baseline demarcates the ruler from the region header so + # the two tracks read as distinct rows. + if has_nt_ruler: + ruler_baseline_y = y_ruler_bot + tick_top = ruler_baseline_y - 0.35 + tick_bot = ruler_baseline_y + numeral_y = tick_top - 0.05 # text baseline just above ticks + + # Baseline line across the panel at the bottom of the ruler zone. + ax.plot( + [0, aln_len], + [ruler_baseline_y, ruler_baseline_y], + color="#424242", + linewidth=0.4, + zorder=2, + ) + + for col_idx, label in panel.nt_ruler_labels or []: + x = col_idx + 0.5 + # Tick mark. + ax.plot( + [x, x], + [tick_top, tick_bot], + color="#424242", + linewidth=0.6, + zorder=3, + ) + # Numeric label ABOVE the tick marks, ABOVE the region bar. + ax.text( + x, + numeral_y, + label, + fontsize=5, + ha="center", + va="bottom", + color="#212121", + zorder=3, + ) + # -- Layer 2: Region header ----------------------------------------------- if has_regions: for region in panel.regions: @@ -544,7 +608,13 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) # -- Layer 6: X-axis ticks ------------------------------------------------- - for col_idx, label in panel.col_labels: + # When the NT ruler is drawn as a dedicated top header track + # (`panel.nt_ruler_labels`), suppress the bottom tick row that would + # otherwise duplicate the same numeric labels underneath the sequence + # block. The feature contract requires the ruler ABOVE the region bar, + # NOT as bottom x-axis ticks. + col_label_iter = [] if has_nt_ruler else panel.col_labels + for col_idx, label in col_label_iter: ax.plot( [col_idx + 0.5, col_idx + 0.5], [y_axis_pos - 0.2, y_axis_pos + 0.1], diff --git a/tests/test_hiv_panel_extensions.py b/tests/test_hiv_panel_extensions.py index 85c9737..813b5d6 100644 --- a/tests/test_hiv_panel_extensions.py +++ b/tests/test_hiv_panel_extensions.py @@ -143,6 +143,133 @@ def test_nt_ruler_renders_to_png(self, aa_hiv_fasta, output_dir): with Image.open(out) as im: assert im.width > 0 and im.height > 0 + def test_show_nt_ruler_populates_top_ruler_field(self, aa_hiv_fasta): + """``show_nt_ruler=True`` populates the dedicated ``nt_ruler_labels`` + Panel field that drives top-track rendering. + + Backs scrutiny-m2 blocker #2 (the renderer must use a field other + than ``col_labels`` so it can render ABOVE the region bar without + colliding with Layer 6's bottom x-axis). + """ + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + show_nt_ruler=True, + nt_ruler_step=250, + ) + assert panel.nt_ruler_labels is not None + assert len(panel.nt_ruler_labels) >= 1 + # Values match the tick-value semantics (multiples of step). + for _col, label in panel.nt_ruler_labels: + assert int(label) % 250 == 0 + + def test_show_nt_ruler_off_leaves_top_ruler_field_none(self, aa_hiv_fasta): + """When ``show_nt_ruler=False`` the Panel has no top ruler band.""" + panel = hiv_panel(str(aa_hiv_fasta), seq_type="AA", tick_step=50) + assert panel.nt_ruler_labels is None + + def test_nt_ruler_is_above_region_bar_in_rendered_png( + self, aa_hiv_fasta, output_dir + ): + """Placement assertion: rendered PNG shows NT ruler pixels ABOVE the + colored region bar, not below it. + + Strategy: render the panel, then sample a vertical column near the + left edge of the image. Walk DOWN from y=0 and find: + (1) the first coloured (non-white) pixel band — this is the NT + ruler (black tick marks + dark numeric labels). + (2) the first coloured pixel band that is distinctly a region + color (one of the pastels from the palette / default V1 blue). + + The test asserts that band (1) appears at a smaller y-coordinate + (i.e. HIGHER on the page) than band (2). + """ + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + show_nt_ruler=True, + nt_ruler_step=250, + # Use the SHIV palette so the region bar is clearly coloured. + region_palette={ + "V3": "#B2D8B2", + "V4": "#7FDBDA", + "V5": "#C7A8E0", + "gp41": "#FFE28A", + }, + ) + out = Path(output_dir) / "hiv_nt_ruler_placement.png" + render_panels([panel], str(out), dpi=150) + + with Image.open(out) as im: + rgb = im.convert("RGB") + width, height = rgb.size + + # Probe several columns in the left 30% of the panel so we + # have a good chance of crossing a tick mark AND a region-bar + # swatch without being distracted by left-margin row labels. + probe_xs = [ + int(width * 0.35), + int(width * 0.40), + int(width * 0.45), + int(width * 0.50), + int(width * 0.55), + ] + + def _is_dark(px: tuple[int, int, int]) -> bool: + # Dark = tick mark or numeric label glyph. + return max(px) < 120 + + def _is_region_color(px: tuple[int, int, int]) -> bool: + # Coloured but NOT white and NOT near-black. The palette + # hexes are all mid-value pastels (each channel is in the + # ~120..255 range). + r, g, b = px + if r > 245 and g > 245 and b > 245: + return False # white / background + if max(r, g, b) < 140: + return False # dark ink + if abs(r - g) < 10 and abs(g - b) < 10: + return False # grey (neutral / match color) + return True + + ruler_ys: list[int] = [] + region_ys: list[int] = [] + + for x in probe_xs: + first_dark = None + first_region = None + for y in range(height): + px = rgb.getpixel((x, y)) + if first_dark is None and _is_dark(px): + first_dark = y + if first_region is None and _is_region_color(px): + first_region = y + if first_dark is not None and first_region is not None: + break + if first_dark is not None: + ruler_ys.append(first_dark) + if first_region is not None: + region_ys.append(first_region) + + assert ruler_ys, ( + "Could not find any dark pixels (expected NT ruler tick / " + "numeral ink) in the probed columns of the rendered PNG." + ) + assert region_ys, ( + "Could not find any coloured region-bar pixels in the " + "probed columns of the rendered PNG." + ) + + # The shallowest dark pixel (smallest y → highest on page) + # must be strictly above the shallowest region-colour pixel. + first_ruler_y = min(ruler_ys) + first_region_y = min(region_ys) + assert first_ruler_y < first_region_y, ( + f"NT ruler must render ABOVE the region bar. " + f"first ruler-ink y={first_ruler_y}, " + f"first region-colour y={first_region_y}." + ) + # -- VAL-TPIXEL-003: V1/V2 merge via shared palette color ---------------- From cd6fbdfdc76824378b4c658c707c0fb6c1a6a2ab Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Wed, 22 Apr 2026 02:22:24 -0700 Subject: [PATCH 7/8] feat(renderer): add row_label_mode per_row_numbered | raw_seqid modes (scrutiny round 1) resolves: scrutiny-m2 blocker #3 Adds a row_label_mode field to Panel with three modes: - group_rollup (default; preserves existing '{name} ({count})' labels) - per_row_numbered ('{N}. {short_seqid}' with N monotonic per panel) - raw_seqid ('{short_seqid}' only) Reference rows (extra_ref_rows and the primary ref labelled by panel.label) are ALWAYS drawn with their logical label and never numbered, regardless of mode. short_seqid truncates to row_label_max_chars (default 30). Threads row_label_mode + row_label_max_chars through hiv_panel(). Adds compute_row_labels() helper exposing the label list in render order for tests. Adds TestRowLabelMode suite in tests/test_hiv_panel_extensions.py (9 new tests, 85/85 total). --- src/tpixel/hiv.py | 17 +- src/tpixel/models.py | 32 ++++ src/tpixel/renderer.py | 127 +++++++++++++-- tests/test_hiv_panel_extensions.py | 248 ++++++++++++++++++++++++++++- 4 files changed, 410 insertions(+), 14 deletions(-) diff --git a/src/tpixel/hiv.py b/src/tpixel/hiv.py index fac9206..104c39a 100644 --- a/src/tpixel/hiv.py +++ b/src/tpixel/hiv.py @@ -11,7 +11,7 @@ from tpixel.fasta import read_fasta from tpixel.hxb2 import _is_nucleotide, build_hxb2_map, hxb2_col_labels, hxb2_regions -from tpixel.models import Marker, Panel, Region, SeqGroup +from tpixel.models import Marker, Panel, Region, RowLabelMode, SeqGroup from tpixel.pngs import find_pngs_markers, find_pngs_markers_nt @@ -201,6 +201,8 @@ def hiv_panel( region_palette: dict[str, str] | None = None, show_nt_ruler: bool = False, nt_ruler_step: int = 250, + row_label_mode: RowLabelMode = "group_rollup", + row_label_max_chars: int = 30, ) -> Panel: """Build a full Roark-style Panel from an HIV Env alignment. @@ -230,6 +232,17 @@ def hiv_panel( coordinate is computed as ``(aa_pos - 1) * 3 + 1``. nt_ruler_step: Nucleotide interval between ticks when ``show_nt_ruler`` is ``True``. Default ``250`` NT. + row_label_mode: How the renderer draws the left-margin row labels + for data rows. One of ``"group_rollup"`` (default; one + ``"{name} ({count})"`` label per group, preserving existing + behaviour), ``"per_row_numbered"`` (one ``"{N}. {short_seqid}"`` + label per data row; N starts at 1 and monotonically increments + within the panel; reference rows are NOT numbered), or + ``"raw_seqid"`` (one ``short_seqid`` label per data row). + ``short_seqid`` is ``seq_id[:row_label_max_chars]``. + row_label_max_chars: Truncation length for per-row labels. + Default ``30`` — enough to distinguish typical SHIV/HIV SGS + identifiers while keeping the left margin compact. Returns: Panel with regions, PNGS markers, grouped sequences, and HxB2 ticks. @@ -344,4 +357,6 @@ def hiv_panel( extra_ref_rows=extra_ref_rows, secondary_ref_row=secondary_ref_row, nt_ruler_labels=nt_ruler_labels, + row_label_mode=row_label_mode, + row_label_max_chars=row_label_max_chars, ) diff --git a/src/tpixel/models.py b/src/tpixel/models.py index 3ab7136..9e57175 100644 --- a/src/tpixel/models.py +++ b/src/tpixel/models.py @@ -3,6 +3,9 @@ from __future__ import annotations from dataclasses import dataclass, field +from typing import Literal + +RowLabelMode = Literal["group_rollup", "per_row_numbered", "raw_seqid"] @dataclass @@ -67,6 +70,33 @@ class Panel: title: Optional title displayed above the panel. extra_ref_rows: Additional reference-style rows rendered above the primary ref_row as (label, bases) tuples. + row_label_mode: How left-margin row labels are rendered for data + rows (the rows in ``groups[*].seqs``). Reference + rows (``extra_ref_rows`` and the primary + ``ref_row`` labeled by ``panel.label``) are ALWAYS + drawn with their logical label and are never + numbered, regardless of mode. Three modes: + + - ``"group_rollup"`` (default, preserves existing + behavior): one label per group, formatted as + ``"{group.name} ({len(group.seqs)})"``, placed + at the vertical center of the group's rows. + - ``"per_row_numbered"``: one label per data row + formatted as ``"{N}. {short_seqid}"`` where + ``N`` starts at 1 for the first data row and + increments monotonically within the panel (it + does not restart at group boundaries). + - ``"raw_seqid"``: one label per data row + containing just ``short_seqid`` (no ``N.`` + prefix). + + ``short_seqid`` is the sequence ID truncated to + ``row_label_max_chars`` characters. + row_label_max_chars: Maximum characters kept when truncating + ``seq_id`` to ``short_seqid`` for per-row label + modes. Defaults to 30, enough to distinguish + typical SHIV/HIV SGS identifiers while keeping + the left margin narrow. """ label: str @@ -85,6 +115,8 @@ class Panel: secondary_ref_row: list[str] | None = None heterologous_color: str = "#FF6F00" nt_ruler_labels: list[tuple[int, str]] | None = None + row_label_mode: RowLabelMode = "group_rollup" + row_label_max_chars: int = 30 def __post_init__(self) -> None: if self.ins_columns is None: diff --git a/src/tpixel/renderer.py b/src/tpixel/renderer.py index df2b81d..13fb140 100644 --- a/src/tpixel/renderer.py +++ b/src/tpixel/renderer.py @@ -24,6 +24,75 @@ from tpixel.models import Panel + +def compute_row_labels(panel: Panel) -> list[str]: + """Return left-margin row labels in top-to-bottom render order. + + The returned list enumerates labels for every row the renderer draws + a text label for: + + 1. ``panel.extra_ref_rows`` (e.g. HxB2) — rendered ABOVE the primary + reference, unnumbered, using each tuple's first element. + 2. The primary reference row — labelled ``panel.label`` (e.g. + ``"_ref"``), unnumbered. + 3. Data rows from ``panel.effective_groups`` — labelled according to + ``panel.row_label_mode``: + + - ``"group_rollup"`` produces one label per group + (``"{name} ({count})"``). Groups with an empty name emit no + label, matching the existing renderer behaviour. + - ``"per_row_numbered"`` produces one label per sequence, + ``"{N}. {short_seqid}"``, with ``N`` starting at 1 and + monotonically incrementing across groups. + - ``"raw_seqid"`` produces one label per sequence, just + ``short_seqid``. + + ``short_seqid`` is the sequence ID truncated to + ``panel.row_label_max_chars`` characters. + + This helper mirrors (but does not execute) the label-placement logic + in :func:`_draw_panel` so tests can assert label content without + inspecting matplotlib ``Text`` objects. + + Examples: + >>> from tpixel.models import Panel, SeqGroup + >>> p = Panel( + ... label="LIN_ref", + ... ref_row=list("A"), + ... seq_rows=[], + ... total_cols=1, + ... col_labels=[], + ... extra_ref_rows=[("HxB2", list("A"))], + ... groups=[SeqGroup("g", [("s1", list("A")), ("s2", list("A"))])], + ... row_label_mode="per_row_numbered", + ... ) + >>> compute_row_labels(p) + ['HxB2', 'LIN_ref', '1. s1', '2. s2'] + """ + labels: list[str] = [] + if panel.extra_ref_rows: + for eref_label, _bases in panel.extra_ref_rows: + labels.append(eref_label) + labels.append(panel.label) + + mode = panel.row_label_mode + max_chars = panel.row_label_max_chars + n = 1 + for group in panel.effective_groups: + if mode == "group_rollup": + if group.name: + labels.append(f"{group.name} ({len(group.seqs)})") + elif mode == "per_row_numbered": + for seq_id, _bases in group.seqs: + labels.append(f"{n}. {seq_id[:max_chars]}") + n += 1 + elif mode == "raw_seqid": + for seq_id, _bases in group.seqs: + labels.append(seq_id[:max_chars]) + else: # pragma: no cover — Literal type prevents unknown values + raise ValueError(f"Unknown row_label_mode: {mode!r}") + return labels + # -- Roark 3-color scheme -------------------------------------------------- MATCH_COLOR = "#BDBDBD" MISMATCH_COLOR = "#D32F2F" @@ -542,14 +611,22 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) # -- Layer 5: Sequence group blocks ---------------------------------------- + mode = panel.row_label_mode + max_chars = panel.row_label_max_chars + y_cursor = y_seq_start + # Group-rollup labels: (y_center, group_name, seq_count). label_positions: list[tuple[float, str, int]] = [] + # Per-row labels (per_row_numbered / raw_seqid): (y_center, label_str). + per_row_label_positions: list[tuple[float, str]] = [] + row_counter = 1 for group_idx, group in enumerate(groups): group_y_start = y_cursor - for _seq_id, row in group.seqs: + for seq_id, row in group.seqs: row_y = y_cursor + row_center = row_y + SEQ_DATA_ROW * 0.85 / 2 # Grey background for entire row ax.add_patch( @@ -586,6 +663,17 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) ) + # Collect per-row label positions for non-group-rollup modes. + if mode == "per_row_numbered": + per_row_label_positions.append( + (row_center, f"{row_counter}. {seq_id[:max_chars]}") + ) + row_counter += 1 + elif mode == "raw_seqid": + per_row_label_positions.append( + (row_center, seq_id[:max_chars]) + ) + y_cursor += SEQ_DATA_ROW group_y_center = (group_y_start + y_cursor) / 2 @@ -595,17 +683,32 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: if group_idx < n_groups - 1: y_cursor += GROUP_DATA_GAP - for y_center, name, count in label_positions: - label = f"{name} ({count})" - ax.text( - -aln_len * 0.005, - y_center, - label, - fontsize=8, - ha="right", - va="center", - color="#424242", - ) + if mode == "group_rollup": + for y_center, name, count in label_positions: + label = f"{name} ({count})" + ax.text( + -aln_len * 0.005, + y_center, + label, + fontsize=8, + ha="right", + va="center", + color="#424242", + ) + else: + # Per-row labels: many labels stacked tightly, so use a smaller + # fontsize (matching Layer 6 col-label ticks) to keep the left + # margin compact for panels with ~100+ data rows. + for y_center, row_label in per_row_label_positions: + ax.text( + -aln_len * 0.005, + y_center, + row_label, + fontsize=4, + ha="right", + va="center", + color="#424242", + ) # -- Layer 6: X-axis ticks ------------------------------------------------- # When the NT ruler is drawn as a dedicated top header track diff --git a/tests/test_hiv_panel_extensions.py b/tests/test_hiv_panel_extensions.py index 813b5d6..8418f32 100644 --- a/tests/test_hiv_panel_extensions.py +++ b/tests/test_hiv_panel_extensions.py @@ -13,7 +13,8 @@ from PIL import Image from tpixel.hiv import hiv_panel -from tpixel.renderer import render_panels +from tpixel.models import Panel, SeqGroup +from tpixel.renderer import compute_row_labels, render_panels # -- Fixtures ------------------------------------------------------------- @@ -327,6 +328,251 @@ def test_v1_v2_different_colors_not_merged(self, aa_hiv_fasta): assert "V2" in names +# -- VAL-FIG-ROWLABELS-001: row_label_mode -------------------------------- + + +def _make_test_panel( + row_label_mode: str = "group_rollup", + row_label_max_chars: int = 30, + extra_ref_rows: list[tuple[str, list[str]]] | None = None, + groups: list[SeqGroup] | None = None, + label: str = "CH505_ref", +) -> Panel: + """Build a minimal Panel exercising the row-label codepath only. + + Using the Panel constructor directly (rather than ``hiv_panel``) + avoids the HxB2-mapping requirements for these label-only tests, so + we can pin exact seq IDs, group membership, and reference-row + composition and then assert the label list produced by + :func:`compute_row_labels`. + """ + ref_row = list("A") + if groups is None: + groups = [ + SeqGroup("animalA", [("animalA_s1", list("A")), ("animalA_s2", list("A"))]), + SeqGroup("animalB", [("animalB_s1", list("A"))]), + ] + return Panel( + label=label, + ref_row=ref_row, + seq_rows=[], + total_cols=1, + col_labels=[], + groups=groups, + extra_ref_rows=extra_ref_rows, + row_label_mode=row_label_mode, + row_label_max_chars=row_label_max_chars, + ) + + +class TestRowLabelMode: + """Tests for ``Panel.row_label_mode`` and ``compute_row_labels``. + + Backs VAL-FIG-ROWLABELS-001 (option b: reference rows unnumbered) + and the scrutiny-m2 blocker #3 fix — per-row numbered labels on the + SHIV figure_s4e.png. + """ + + def test_default_mode_is_group_rollup(self): + """Default ``row_label_mode`` is ``"group_rollup"`` (backwards-compat).""" + panel = Panel( + label="lin_ref", + ref_row=list("A"), + seq_rows=[("s1", list("A"))], + total_cols=1, + col_labels=[], + ) + assert panel.row_label_mode == "group_rollup" + assert panel.row_label_max_chars == 30 + + def test_group_rollup_labels_match_existing_format(self): + """``group_rollup`` emits one ``{name} ({count})`` label per group + after the (unnumbered) reference rows.""" + panel = _make_test_panel( + row_label_mode="group_rollup", + extra_ref_rows=[("HxB2", list("A"))], + label="CH505_ref", + ) + labels = compute_row_labels(panel) + assert labels == [ + "HxB2", + "CH505_ref", + "animalA (2)", + "animalB (1)", + ] + + def test_per_row_numbered_labels_numbered_monotonically(self): + """``per_row_numbered`` emits ``N. `` for every data + row with N starting at 1 and incrementing monotonically across + group boundaries; reference rows (``_ref`` and ``HxB2``) are + present unnumbered in the expected render order.""" + groups = [ + SeqGroup("g1", [ + ("r18012_AAA_clone1", list("A")), + ("r18012_BBB_clone2", list("A")), + ]), + SeqGroup("g2", [("rhBD06_CCC_clone1", list("A"))]), + ] + panel = _make_test_panel( + row_label_mode="per_row_numbered", + extra_ref_rows=[("HxB2", list("A"))], + groups=groups, + label="CH505_ref", + ) + labels = compute_row_labels(panel) + # Reference rows first (in render order: extra_ref above primary ref), + # then data rows numbered starting at 1. + assert labels == [ + "HxB2", + "CH505_ref", + "1. r18012_AAA_clone1", + "2. r18012_BBB_clone2", + "3. rhBD06_CCC_clone1", + ] + # Explicit prefix checks (matches verificationSteps/expectedBehavior + # wording: "labels start with `1. `, `2. `, ..."). + data_labels = labels[2:] + assert data_labels[0].startswith("1. ") + assert data_labels[1].startswith("2. ") + assert data_labels[2].startswith("3. ") + # Reference labels are NOT numbered. + assert "HxB2" in labels + assert "CH505_ref" in labels + for ref_label in ("HxB2", "CH505_ref"): + assert not ref_label[0].isdigit(), ( + f"Reference row label {ref_label!r} must not start with a digit" + ) + + def test_per_row_numbered_truncates_long_seqids(self): + """``short_seqid`` is the first ``row_label_max_chars`` characters + of the raw seq ID (default 30).""" + long_id = "A" * 80 # 80 characters + groups = [SeqGroup("g", [(long_id, list("A"))])] + panel = _make_test_panel( + row_label_mode="per_row_numbered", + groups=groups, + extra_ref_rows=None, + ) + labels = compute_row_labels(panel) + # Last label is the single data row. + data_label = labels[-1] + assert data_label.startswith("1. ") + # Everything after the "1. " prefix is the truncated seq_id. + short = data_label[len("1. "):] + assert len(short) == 30 + assert short == "A" * 30 + + def test_per_row_numbered_custom_max_chars(self): + """``row_label_max_chars`` is honoured when overridden.""" + long_id = "SHIV_LONG_IDENTIFIER_AAA_BBB_CCC_DDD" # > 15 chars + groups = [SeqGroup("g", [(long_id, list("A"))])] + panel = _make_test_panel( + row_label_mode="per_row_numbered", + row_label_max_chars=15, + groups=groups, + extra_ref_rows=None, + ) + labels = compute_row_labels(panel) + data_label = labels[-1] + short = data_label[len("1. "):] + assert len(short) == 15 + assert short == long_id[:15] + + def test_raw_seqid_emits_seqid_per_row_without_numbering(self): + """``raw_seqid`` emits one ``short_seqid`` label per data row + with no numeric prefix; reference rows remain unnumbered.""" + groups = [ + SeqGroup("g1", [("abc_1", list("A"))]), + SeqGroup("g2", [("xyz_2", list("A")), ("xyz_3", list("A"))]), + ] + panel = _make_test_panel( + row_label_mode="raw_seqid", + extra_ref_rows=[("HxB2", list("A"))], + groups=groups, + label="SF162p3_ref", + ) + labels = compute_row_labels(panel) + assert labels == [ + "HxB2", + "SF162p3_ref", + "abc_1", + "xyz_2", + "xyz_3", + ] + # None of the data labels carry an "N. " prefix. + for data_label in labels[2:]: + assert "." not in data_label.split("_")[0], ( + f"raw_seqid mode should NOT add a numeric prefix, got {data_label!r}" + ) + + def test_per_row_numbered_without_extra_ref_rows(self): + """Without ``extra_ref_rows`` the render order is just primary + ref then numbered data rows.""" + groups = [SeqGroup("g", [("alpha", list("A")), ("beta", list("A"))])] + panel = _make_test_panel( + row_label_mode="per_row_numbered", + extra_ref_rows=None, + groups=groups, + label="T250-4_ref", + ) + labels = compute_row_labels(panel) + assert labels == [ + "T250-4_ref", + "1. alpha", + "2. beta", + ] + + def test_per_row_numbered_renders_to_png(self, output_dir): + """End-to-end: a panel with ``per_row_numbered`` labels renders + without errors and produces a non-empty PNG.""" + groups = [SeqGroup("g", [("alpha_seq", list("A")), ("beta_seq", list("A"))])] + panel = Panel( + label="LIN_ref", + ref_row=list("A" * 40), + seq_rows=[], + total_cols=40, + col_labels=[(0, "1"), (20, "20"), (39, "40")], + groups=[ + SeqGroup( + "g", + [ + ("alpha_seq", list("A" * 40)), + ("beta_seq", list("A" * 40)), + ], + ) + ], + extra_ref_rows=[("HxB2", list("A" * 40))], + row_label_mode="per_row_numbered", + ) + out = Path(output_dir) / "row_labels_per_row_numbered.png" + render_panels([panel], str(out), dpi=100) + assert out.exists() + assert out.stat().st_size > 0 + with Image.open(out) as im: + assert im.width > 0 and im.height > 0 + + def test_hiv_panel_threads_row_label_mode(self, aa_hiv_fasta): + """``hiv_panel()`` accepts ``row_label_mode`` and forwards it to + the returned Panel so SHIV's ``figure_s4e.py`` can opt in.""" + panel = hiv_panel( + str(aa_hiv_fasta), + seq_type="AA", + row_label_mode="per_row_numbered", + ) + assert panel.row_label_mode == "per_row_numbered" + # And the label list from compute_row_labels has numbered data rows. + labels = compute_row_labels(panel) + # All non-reference labels are of the form "N. ..." + ref_label_set = {panel.label} + if panel.extra_ref_rows: + ref_label_set.update(name for name, _ in panel.extra_ref_rows) + data_labels = [label for label in labels if label not in ref_label_set] + for i, label in enumerate(data_labels, start=1): + assert label.startswith(f"{i}. "), ( + f"Expected label #{i} to start with {i!r}., got {label!r}" + ) + + # -- write_fasta fixture in conftest is tmp_path-scoped, but this test # module's fixtures pass the path positionally, so we rely on the existing # conftest write_fasta fixture. From 39bc0159e296514a4e36ce631443015b681d9353 Mon Sep 17 00:00:00 2001 From: tmsincomb Date: Thu, 30 Apr 2026 16:43:34 -0700 Subject: [PATCH 8/8] feat(hiv): anchor-mode HIV rendering for alignments without HxB2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds tpixel.anchors with bundled lineage→HxB2 maps (CH505, SF162p3, T250-4) so hiv_panel can place the region bar even when the alignment lacks an HxB2 row. New CLI/API surface: * hiv_panel: anchor_id, anchor_lineage, show_variant_labels, show_markers * CLI: --anchor-id, --anchor-lineage, --variant-labels, --markers/--no-markers * hxb2_variant_labels(): K169E-style mutation labels vs HxB2 * hxb2_col_labels(): always include first mapped column tick * hxb2_regions(): absorb gap columns instead of fragmenting bar * renderer: uuid-suffixed brick labels (fixes patchworklib reuse error across multiple render_panels calls in one process); footer height bump for legend clearance; font-size tweaks Package data: anchor reference fastas shipped via setuptools package-data declaration. Tests fixtures trimmed to 10 records each (full alignment width preserved) — repo no longer carries large sequence sets. .gitignore: add refs/, restore tests/output/, drop dead prototype entries. --- .gitignore | 18 +- pyproject.toml | 3 + src/tpixel/__init__.py | 8 + src/tpixel/_version.py | 28 +-- src/tpixel/anchors.py | 223 ++++++++++++++++++++++ src/tpixel/cli.py | 82 +++++++- src/tpixel/data/anchor_refs/CH505.fasta | 4 + src/tpixel/data/anchor_refs/SF162p3.fasta | 4 + src/tpixel/data/anchor_refs/T250-4.fasta | 4 + src/tpixel/hiv.py | 90 ++++++++- src/tpixel/hxb2.py | 114 +++++++++-- src/tpixel/renderer.py | 80 ++++---- tests/data/CH505.aln.fasta | 20 ++ tests/data/SF162p3.aln.fasta | 20 ++ tests/data/T205-4.fasta | 20 ++ tests/test_anchor_panel.py | 118 ++++++++++++ tests/test_anchors.py | 126 ++++++++++++ tests/test_t205_panel.py | 84 ++++++++ tests/test_variant_labels.py | 113 +++++++++++ 19 files changed, 1080 insertions(+), 79 deletions(-) create mode 100644 src/tpixel/anchors.py create mode 100644 src/tpixel/data/anchor_refs/CH505.fasta create mode 100644 src/tpixel/data/anchor_refs/SF162p3.fasta create mode 100644 src/tpixel/data/anchor_refs/T250-4.fasta create mode 100644 tests/data/CH505.aln.fasta create mode 100644 tests/data/SF162p3.aln.fasta create mode 100644 tests/data/T205-4.fasta create mode 100644 tests/test_anchor_panel.py create mode 100644 tests/test_anchors.py create mode 100644 tests/test_t205_panel.py create mode 100644 tests/test_variant_labels.py diff --git a/.gitignore b/.gitignore index e41c06b..f61169d 100644 --- a/.gitignore +++ b/.gitignore @@ -209,15 +209,19 @@ __marimo__/ # macOS .DS_Store +# IDE +.vscode/ +.cursor/ +.claude/ +.cursorignore +.cursorindexingignore + # Test output artifacts tests/output/ +# Local scratch reference fastas (not shipped) +refs/ + # Ad-hoc alignment and image files -top_env_protein_aligned.fasta -mid_env_protein_aligned.fasta -bot_env_protein_aligned.fasta -combined.png tpixel.skill -# Local prototype artifacts -pixel_logo_prototype.png -proto_pixel_logo.py + diff --git a/pyproject.toml b/pyproject.toml index 564417b..5028ee9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,9 @@ tpixel = "tpixel.cli:main" [tool.setuptools.packages.find] where = ["src"] +[tool.setuptools.package-data] +tpixel = ["data/anchor_refs/*.fasta"] + [tool.setuptools_scm] version_file = "src/tpixel/_version.py" local_scheme = "no-local-version" diff --git a/src/tpixel/__init__.py b/src/tpixel/__init__.py index bbbf836..25151b6 100644 --- a/src/tpixel/__init__.py +++ b/src/tpixel/__init__.py @@ -2,6 +2,11 @@ from importlib.metadata import PackageNotFoundError, version +from tpixel.anchors import ( + KNOWN_ANCHOR_LINEAGES, + build_anchor_hxb2_map, + detect_anchor_lineage, +) from tpixel.fasta import fasta_panel, read_fasta from tpixel.hiv import hiv_panel from tpixel.models import Marker, Panel, Region, SeqGroup @@ -13,10 +18,13 @@ __version__ = "0.0.0" __all__ = [ + "KNOWN_ANCHOR_LINEAGES", "Marker", "Panel", "Region", "SeqGroup", + "build_anchor_hxb2_map", + "detect_anchor_lineage", "fasta_panel", "hiv_panel", "panel_figsize", diff --git a/src/tpixel/_version.py b/src/tpixel/_version.py index 18bf27d..8f4f093 100644 --- a/src/tpixel/_version.py +++ b/src/tpixel/_version.py @@ -1,5 +1,6 @@ -# file generated by setuptools-scm +# file generated by vcs-versioning # don't change, don't track in version control +from __future__ import annotations __all__ = [ "__version__", @@ -10,25 +11,14 @@ "commit_id", ] -TYPE_CHECKING = False -if TYPE_CHECKING: - from typing import Tuple - from typing import Union - - VERSION_TUPLE = Tuple[Union[int, str], ...] - COMMIT_ID = Union[str, None] -else: - VERSION_TUPLE = object - COMMIT_ID = object - version: str __version__: str -__version_tuple__: VERSION_TUPLE -version_tuple: VERSION_TUPLE -commit_id: COMMIT_ID -__commit_id__: COMMIT_ID +__version_tuple__: tuple[int | str, ...] +version_tuple: tuple[int | str, ...] +commit_id: str | None +__commit_id__: str | None -__version__ = version = '0.1.dev1' -__version_tuple__ = version_tuple = (0, 1, 'dev1') +__version__ = version = '0.1.5.dev7' +__version_tuple__ = version_tuple = (0, 1, 5, 'dev7') -__commit_id__ = commit_id = 'g130e224c1' +__commit_id__ = commit_id = 'gcd6fbdfdc' diff --git a/src/tpixel/anchors.py b/src/tpixel/anchors.py new file mode 100644 index 0000000..4deb02d --- /dev/null +++ b/src/tpixel/anchors.py @@ -0,0 +1,223 @@ +"""Non-HxB2 anchor coordinate mapping for HIV Env alignments. + +When the user's alignment lacks the literal ``HxB2`` row (e.g. they want to +plot a single-reference panel against ``SF162p3_ref`` only), region bands and +the NT ruler still need HxB2 amino-acid coordinates. This module ships +pre-aligned ``(HxB2, lineage)`` AA pairs for known lineages and produces a +:class:`tpixel.hxb2.HxB2Position` list that the renderer consumes identically +to one built from a real HxB2 row. + +Bundled anchor lineages (``data/anchor_refs/{lineage}.fasta``): + +* ``CH505`` — CH505 T/F clone (from ``top_env_protein_aligned.fasta``). +* ``SF162p3`` — SF162p3 reference (from ``mid_env_protein_aligned.fasta``). +* ``T250-4`` — T250-4 reference (from ``bot_env_protein_aligned.fasta``). + +Each bundled FASTA has two records of equal column count: HxB2 first, lineage +second, both gapped against each other (``-``). Provenance is the project's +own MAFFT-derived panel alignments. +""" + +from __future__ import annotations + +import functools +from importlib import resources + +from tpixel.hxb2 import HxB2Position, _is_nucleotide, get_env_region + +KNOWN_ANCHOR_LINEAGES: tuple[str, ...] = ("CH505", "SF162p3", "T250-4") + + +def detect_anchor_lineage(seq_id: str) -> str | None: + """Return canonical lineage name if ``seq_id`` matches a known anchor. + + Args: + seq_id: Sequence ID, typically ``"_ref"``. + + Returns: + One of :data:`KNOWN_ANCHOR_LINEAGES` or ``None``. + + Examples: + >>> detect_anchor_lineage("SF162p3_ref") + 'SF162p3' + >>> detect_anchor_lineage("CH505_ref") + 'CH505' + >>> detect_anchor_lineage("T250-4_ref") + 'T250-4' + >>> detect_anchor_lineage("foo_ref") is None + True + >>> detect_anchor_lineage("HxB2") is None + True + """ + base = seq_id.removesuffix("_ref") + return base if base in KNOWN_ANCHOR_LINEAGES else None + + +@functools.cache +def _load_anchor_pair(lineage: str) -> tuple[str, str]: + """Read the bundled ``(HxB2_aligned, lineage_aligned)`` AA pair. + + Args: + lineage: One of :data:`KNOWN_ANCHOR_LINEAGES`. + + Returns: + Tuple of two equal-length AA strings (HxB2, lineage), gapped with ``-``. + + Raises: + ValueError: If lineage unknown or bundle malformed. + """ + if lineage not in KNOWN_ANCHOR_LINEAGES: + raise ValueError( + f"Unknown anchor lineage '{lineage}'. Known: {KNOWN_ANCHOR_LINEAGES}" + ) + fasta_path = resources.files("tpixel") / "data" / "anchor_refs" / f"{lineage}.fasta" + text = fasta_path.read_text() + + records: list[tuple[str, list[str]]] = [] + current: list[str] | None = None + for line in text.splitlines(): + line = line.strip() + if not line: + continue + if line.startswith(">"): + current = [] + records.append((line[1:].split()[0], current)) + else: + assert current is not None + current.append(line.upper()) + + if len(records) != 2: + raise ValueError( + f"Anchor bundle {lineage} must have exactly 2 records, got {len(records)}" + ) + hxb2_id, hxb2_chunks = records[0] + lin_id, lin_chunks = records[1] + if hxb2_id != "HxB2": + raise ValueError(f"Anchor bundle {lineage} first record must be HxB2, got {hxb2_id}") + if lin_id != lineage: + raise ValueError( + f"Anchor bundle {lineage} second record must be {lineage}, got {lin_id}" + ) + hxb2_aligned = "".join(hxb2_chunks) + lin_aligned = "".join(lin_chunks) + if len(hxb2_aligned) != len(lin_aligned): + raise ValueError( + f"Anchor bundle {lineage} record lengths differ: " + f"HxB2={len(hxb2_aligned)} {lineage}={len(lin_aligned)}" + ) + return hxb2_aligned, lin_aligned + + +@functools.cache +def _build_lineage_to_hxb2_lookup( + lineage: str, +) -> tuple[list[int | None], list[str], str]: + """Build per-lineage-residue lookup of HxB2 AA position and residue. + + Args: + lineage: One of :data:`KNOWN_ANCHOR_LINEAGES`. + + Returns: + Tuple of: + * ``lineage_to_hxb2_aa`` — list indexed by 0-based lineage AA + position. Entry is the 1-based HxB2 AA position, or ``None`` + when the lineage residue is an insertion vs HxB2. + * ``lineage_to_hxb2_residue`` — list of HxB2 residue characters + (or ``"-"`` for insertions vs HxB2) at each lineage position. + * ``lineage_canonical_aa`` — ungapped lineage AA sequence, + for sanity-checking against the user's anchor row. + """ + hxb2_aligned, lin_aligned = _load_anchor_pair(lineage) + lineage_to_hxb2_aa: list[int | None] = [] + lineage_to_hxb2_residue: list[str] = [] + canonical_chars: list[str] = [] + hxb2_pos = 0 + for h_res, l_res in zip(hxb2_aligned, lin_aligned): + h_gap = h_res in ("-", ".") + l_gap = l_res in ("-", ".") + if not h_gap: + hxb2_pos += 1 + if not l_gap: + canonical_chars.append(l_res) + if h_gap: + lineage_to_hxb2_aa.append(None) + lineage_to_hxb2_residue.append("-") + else: + lineage_to_hxb2_aa.append(hxb2_pos) + lineage_to_hxb2_residue.append(h_res) + return lineage_to_hxb2_aa, lineage_to_hxb2_residue, "".join(canonical_chars) + + +def build_anchor_hxb2_map( + aligned_seqs: list[tuple[str, str]], + anchor_id: str, + lineage: str, + seq_type: str | None = None, +) -> list[HxB2Position]: + """Walk an anchor row to build an HxB2-coordinate map without an HxB2 row. + + The bundled lineage->HxB2 lookup supplies the HxB2 AA position for every + non-gap residue of the canonical lineage AA sequence. This function pairs + that lookup with the user's anchor row in their alignment, producing one + :class:`HxB2Position` per alignment column. + + Args: + aligned_seqs: List of ``(name, sequence)`` from + :func:`tpixel.fasta.read_fasta`. + anchor_id: Sequence ID of the anchor row in ``aligned_seqs`` + (typically ``"_ref"``). + lineage: Anchor lineage. Must be in :data:`KNOWN_ANCHOR_LINEAGES`. + seq_type: ``"NT"`` or ``"AA"``. Auto-detected from the anchor row + when ``None``. + + Returns: + One :class:`HxB2Position` per alignment column. + + Raises: + ValueError: If ``anchor_id`` not found, lineage unknown, NT mode + requested (not yet supported), or the user's anchor row's + ungapped sequence does not match the bundled canonical. + """ + if lineage not in KNOWN_ANCHOR_LINEAGES: + raise ValueError( + f"Unknown anchor lineage '{lineage}'. Known: {KNOWN_ANCHOR_LINEAGES}" + ) + + anchor_seq: str | None = None + for name, seq in aligned_seqs: + if name == anchor_id or name.split()[0] == anchor_id: + anchor_seq = seq + break + if anchor_seq is None: + raise ValueError(f"Anchor sequence '{anchor_id}' not found in alignment") + + if seq_type is None: + seq_type = "NT" if _is_nucleotide(anchor_seq) else "AA" + if seq_type == "NT": + raise ValueError( + "Anchor mode currently supports AA alignments only; " + "include the HxB2 row for NT alignments, or run with --aa." + ) + + lin_to_hxb2_aa, lin_to_hxb2_res, canonical = _build_lineage_to_hxb2_lookup(lineage) + ungapped = "".join(c for c in anchor_seq.upper() if c not in ("-", ".")) + if ungapped != canonical: + raise ValueError( + f"Anchor row '{anchor_id}' (ungapped len={len(ungapped)}) does not match " + f"bundled canonical {lineage} (len={len(canonical)}). The bundled mapping " + f"is built from the project's panel alignments; sequence variants are not " + f"supported yet." + ) + + positions: list[HxB2Position] = [] + lineage_pos = 0 + for col_idx, residue in enumerate(anchor_seq.upper()): + if residue in ("-", "."): + positions.append(HxB2Position(col_idx, None, None, residue)) + else: + hxb2_aa = lin_to_hxb2_aa[lineage_pos] + hxb2_res = lin_to_hxb2_res[lineage_pos] + lineage_pos += 1 + region = get_env_region(hxb2_aa) if hxb2_aa is not None else None + positions.append(HxB2Position(col_idx, hxb2_aa, region, hxb2_res)) + return positions diff --git a/src/tpixel/cli.py b/src/tpixel/cli.py index f1f0bd8..0096d01 100644 --- a/src/tpixel/cli.py +++ b/src/tpixel/cli.py @@ -4,6 +4,7 @@ import click +from tpixel.anchors import KNOWN_ANCHOR_LINEAGES, detect_anchor_lineage from tpixel.fasta import fasta_panel, read_fasta from tpixel.renderer import render_panels @@ -29,19 +30,30 @@ def _expand_stdin(paths: list[str]) -> list[str]: def _auto_detect_hiv(fasta_path: str) -> bool: - """Check if alignment contains HxB2 and a ``*_ref`` sequence. + """Check if alignment qualifies for HIV mode. + + HIV mode is triggered when either: + + * the alignment contains an ``HxB2`` row plus a ``*_ref`` row + (classic dual-reference layout), or + * the alignment lacks ``HxB2`` but contains a ``*_ref`` row whose + lineage prefix matches a known anchor (e.g. ``SF162p3_ref``, + ``CH505_ref``, ``T250-4_ref``) so the renderer can still place the + region bar via the bundled lineage→HxB2 mapping. Args: fasta_path: Path to the aligned FASTA file. Returns: - ``True`` if both HxB2 and a ``*_ref`` sequence are present. + ``True`` if HIV mode applies. """ seqs = read_fasta(fasta_path) names = {n.split()[0] for n, _ in seqs} has_hxb2 = "HxB2" in names - has_ref = any(n.endswith("_ref") for n in names) - return has_hxb2 and has_ref + refs = [n for n in names if n.endswith("_ref")] + if has_hxb2 and refs: + return True + return any(detect_anchor_lineage(n) is not None for n in refs) @click.command( @@ -94,7 +106,53 @@ def _auto_detect_hiv(fasta_path: str) -> bool: default=None, help="Title displayed above the plot.", ) -def main(fasta_args, fasta, columns, output, dpi, cell, hiv, nt, ref_pos, title): +@click.option( + "--variant-labels/--no-variant-labels", + "variant_labels", + default=False, + show_default=True, + help="Draw 'wildtype+pos+mutation' labels (e.g. K169E) under the x-axis " + "for every column where the lineage _ref differs from HxB2. HIV mode only. " + "Works in anchor mode too — labels are computed against the bundled HxB2 " + "residues carried by the lineage→HxB2 mapping.", +) +@click.option( + "--anchor-id", + default=None, + help="Sequence ID to use as the header coordinate anchor when HxB2 is " + "absent from the alignment. Defaults to the primary _ref row.", +) +@click.option( + "--anchor-lineage", + default=None, + type=click.Choice(list(KNOWN_ANCHOR_LINEAGES)), + help="Anchor lineage. Auto-detected from the anchor-id prefix if omitted " + "(e.g. SF162p3_ref → SF162p3). Ignored when HxB2 is in the alignment.", +) +@click.option( + "--markers/--no-markers", + "markers", + default=True, + show_default=True, + help="Show annotation markers above the reference row (currently PNGS " + "green dots in HIV mode). Use --no-markers to suppress them.", +) +def main( + fasta_args, + fasta, + columns, + output, + dpi, + cell, + hiv, + nt, + ref_pos, + title, + variant_labels, + markers, + anchor_id, + anchor_lineage, +): """Pixel-block alignment viewer for hundreds of sequences. Renders Roark-style PIXEL plots: grey=match, red=substitution, black=gap. @@ -128,8 +186,20 @@ def main(fasta_args, fasta, columns, output, dpi, cell, hiv, nt, ref_pos, title) seq_type = "NT" elif nt is False: seq_type = "AA" - panel = hiv_panel(fasta_path, ref_positions=ref_positions, seq_type=seq_type) + panel = hiv_panel( + fasta_path, + ref_positions=ref_positions, + seq_type=seq_type, + show_variant_labels=variant_labels, + show_markers=markers, + anchor_id=anchor_id, + anchor_lineage=anchor_lineage, + ) else: + if variant_labels: + raise click.UsageError( + "--variant-labels requires HIV mode" + ) panel = fasta_panel(fasta_path, col_start, col_end, ref_positions=ref_positions) if title: diff --git a/src/tpixel/data/anchor_refs/CH505.fasta b/src/tpixel/data/anchor_refs/CH505.fasta new file mode 100644 index 0000000..55a7fef --- /dev/null +++ b/src/tpixel/data/anchor_refs/CH505.fasta @@ -0,0 +1,4 @@ +>HxB2 +---MRVKEKYQHLWRWGWRWGTMLLG--MLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGK-IGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVA--------------EGTDRVIEVVQGAC-----------RAIRHIPRRIRQGLERILL +>CH505 +MRVMGIQRNYPQWWIWS------MLGFWMLMICNG---MWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCT---NATASNSS----IIEG--MKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRI--GPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYF-PHKNITFQPSSGGDLEITTHSFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTET--FRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL diff --git a/src/tpixel/data/anchor_refs/SF162p3.fasta b/src/tpixel/data/anchor_refs/SF162p3.fasta new file mode 100644 index 0000000..c3b18ee --- /dev/null +++ b/src/tpixel/data/anchor_refs/SF162p3.fasta @@ -0,0 +1,4 @@ +>HxB2 +MRVK---EKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGK-IGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTWSTEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNS-NNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGIEEEGGERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWEALKYWWNLLQYWSQELKNSAVSLLNATAIAVAEGTDRVIEVVQGACRAIRHIPRRIRQGLERILL +>SF162p3 +MRVKGIRKNYQHL----WRGGTLLLGMLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTGSNWKE-MNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTXYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPI--GPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQF-ENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNN----TIGPNNTNG--TITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEGTDRIIEVAQRIGRAFLHIPRRIRQGLERTLL diff --git a/src/tpixel/data/anchor_refs/T250-4.fasta b/src/tpixel/data/anchor_refs/T250-4.fasta new file mode 100644 index 0000000..f98ccec --- /dev/null +++ b/src/tpixel/data/anchor_refs/T250-4.fasta @@ -0,0 +1,4 @@ +>HxB2 +---MRVKEKYQHLWRWGWRWGTMLLGMLMICSATEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEVVLVNVTENFNMWKNDMVEQMHEDIISLWDQSLKPCVKLTPLCVSLKCTDLKNDTNTNSSSGRMIMEKGEIKNCSFNISTSIRGKVQKEYAFFYKLDIIPIDNDTTSYKLTSCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNNKTFNGTGPCTNVSTVQCTHGIRPVVSTQLLLNGSLAEEEVVIRSVNFTDNAKTIIVQLNTSVEINCTRPNNNTRKRIRIQRGPGRAFVTIGK-IGNMRQAHCNISRAKWNNTLKQIASKLREQFGNNKTIIFKQSSGGDPEIVTHSFNCGGEFFYCNSTQLFNSTWFNSTW--STEGSNNTEGSDTITLPCRIKQIINMWQKVGKAMYAPPISGQIRCSSNITGLLLTRDGGNSNNESEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVGIGALFLGFLGAAGSTMGAASMTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARILAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLEQIWNHTTWMEWDREINNYTSLIHSLIEESQNQQEKNEQELLELDKWASLWNWFNITNWLWYIKLFIMIVGGLVGLRIVFAVLSIVNRVRQGYSPLSFQTHLPTPRGPDRPEGIEEEG--------------------GERDRDRSIRLVNGSLALIWDDLRSLCLFSYHRLRDLLLIVTRIVELLGRRGWE-----ALKYWWNLLQYWSQELKNSAVSLLNATAIAVA--------------EGTDRVIEVVQGAC-----------RAIRHIPRRIRQGLERILL +>T250-4 +MRVMGIQRNYPPL----WRWGTMIFWMMMLCSA-EKLWVTVYYGVPVWREADTTLFCASDAKGYDTEAHNVWATHACVPTDPRPQEMYLENVTENFNMWKNSMVEQMHTDIISLWDESLKPCVKLTPLCVTLDCQAFNSSSHTNSS-----IAMQEMKNCSFNVTTELRDKKKKEYSFFYKTDIEQINKNGRQYRLINCNTSAITQACPKVSFEPIPIHFCAPAGFAILKCNEKHFNGKGPCKNVSTVQCTHGIKPVVSTQLLLNGSLAEEEVVIRVENTIDNAKTIIVQLAKPVKINCTRPNNNTRKSIRI--GPGQTFYATGDIIGNIRKAYCNVSKREWNNTLQQVAAQLSKSF-NNTKIVFEKHSGGDLEVITHMFVCGGEFFYCNTSGLFNSTWTNSTWTNSTTGSNGTESNDTITLQCEIKQFINMWQRVGRAMYAPPIPGVIRCESDITGLLLTRDGPNS-TQNETFRPGGGDMRDNWRSELYKYKVVQIEPLGVAPTHAKRRVVEREKRAVGLGAVFFGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQQLLRLTVWGIKQLQARVLALERYLKDQQLLGIWGCSGKLICTTTVPWNSSWSNKNYTDIWDNMTWLQWDREISNYTDEIYRLIEQSQNQQEKNEQDLLALDKWASLWNWFDITNWLWYIKIFIMIVGGLIGLRIIFTVLNVINRVRQGYSPLSFQTLLPTPRGPARPEGIEEEG--------------------GEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWE-----ALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL diff --git a/src/tpixel/hiv.py b/src/tpixel/hiv.py index 104c39a..c4b7347 100644 --- a/src/tpixel/hiv.py +++ b/src/tpixel/hiv.py @@ -9,8 +9,19 @@ from collections import defaultdict from pathlib import Path +from tpixel.anchors import ( + KNOWN_ANCHOR_LINEAGES, + build_anchor_hxb2_map, + detect_anchor_lineage, +) from tpixel.fasta import read_fasta -from tpixel.hxb2 import _is_nucleotide, build_hxb2_map, hxb2_col_labels, hxb2_regions +from tpixel.hxb2 import ( + _is_nucleotide, + build_hxb2_map, + hxb2_col_labels, + hxb2_regions, + hxb2_variant_labels, +) from tpixel.models import Marker, Panel, Region, RowLabelMode, SeqGroup from tpixel.pngs import find_pngs_markers, find_pngs_markers_nt @@ -203,6 +214,10 @@ def hiv_panel( nt_ruler_step: int = 250, row_label_mode: RowLabelMode = "group_rollup", row_label_max_chars: int = 30, + show_variant_labels: bool = False, + show_markers: bool = True, + anchor_id: str | None = None, + anchor_lineage: str | None = None, ) -> Panel: """Build a full Roark-style Panel from an HIV Env alignment. @@ -243,6 +258,25 @@ def hiv_panel( row_label_max_chars: Truncation length for per-row labels. Default ``30`` — enough to distinguish typical SHIV/HIV SGS identifiers while keeping the left margin compact. + show_variant_labels: When ``True``, populate + ``panel.extra_col_labels`` with ``"wildtype+pos+mutation"`` + strings (e.g. ``"K169E"``) for every column where the primary + lineage ``_ref`` differs from HxB2. Renderer draws them as a + red tick row below the main x-axis. Position numbering + follows ``seq_type``: AA positions for amino-acid alignments, + NT positions for nucleotide alignments. + show_markers: When ``True`` (default), draw PNGS (N-linked + glycosylation site) markers as green dots above the reference + row. Set to ``False`` to suppress all marker annotations. + Named generically to allow future non-PNGS marker types. + anchor_id: Sequence ID to use as the header coordinate anchor when + HxB2 is missing from the alignment. Defaults to the primary + ``_ref`` row resolved via ``ref_id``/``ref_positions``. Ignored + when HxB2 is present. + anchor_lineage: Anchor lineage (one of + :data:`tpixel.anchors.KNOWN_ANCHOR_LINEAGES`). Auto-detected + from ``anchor_id`` prefix when ``None``. Ignored when HxB2 is + present. Returns: Panel with regions, PNGS markers, grouped sequences, and HxB2 ticks. @@ -276,7 +310,40 @@ def hiv_panel( if seq_type is None: seq_type = "NT" if _is_nucleotide(ref_seq) else "AA" - hxb2_map = build_hxb2_map(seqs, hxb2_id, seq_type=seq_type) + hxb2_present = any( + n == hxb2_id or n.split()[0] == hxb2_id for n, _ in seqs + ) + if hxb2_present: + hxb2_map = build_hxb2_map(seqs, hxb2_id, seq_type=seq_type) + else: + resolved_anchor_id = anchor_id or ref_id + resolved_lineage = anchor_lineage or ( + detect_anchor_lineage(resolved_anchor_id) if resolved_anchor_id else None + ) + if resolved_anchor_id is None or resolved_lineage is None: + ref_rows = [n for n in names if n.endswith("_ref")] + anchor_hint = next( + ( + f"--ref-pos {names.index(n) + 1}" + for n in ref_rows + if detect_anchor_lineage(n) is not None + ), + None, + ) + hint = ( + f" Try `{anchor_hint} --anchor-lineage {detect_anchor_lineage(ref_rows[0])}`" + if anchor_hint and ref_rows and detect_anchor_lineage(ref_rows[0]) + else "" + ) + raise ValueError( + f"HxB2 sequence '{hxb2_id}' not in alignment and could not " + f"resolve an anchor lineage from anchor_id={resolved_anchor_id!r}. " + f"Pass anchor_lineage explicitly or point ref-pos at a known " + f"lineage _ref row. Known: {KNOWN_ANCHOR_LINEAGES}.{hint}" + ) + hxb2_map = build_anchor_hxb2_map( + seqs, resolved_anchor_id, resolved_lineage, seq_type=seq_type + ) regions = hxb2_regions(hxb2_map) # Apply region palette override (and merge same-color adjacent regions). @@ -297,7 +364,9 @@ def hiv_panel( nt_ruler_labels = None col_labels = hxb2_col_labels(hxb2_map, step=tick_step) - if seq_type == "NT": + if not show_markers: + markers = [] + elif seq_type == "NT": markers = find_pngs_markers_nt(ref_seq, hxb2_map) else: markers = find_pngs_markers(ref_seq, hxb2_map) @@ -344,6 +413,20 @@ def hiv_panel( ) secondary_ref_row = list(sec_seq.upper()) + extra_col_labels: list[tuple[int, str]] | None = None + if show_variant_labels: + hxb2_seq = seq_dict.get(hxb2_id) + if hxb2_seq is not None: + hxb2_row = list(hxb2_seq.upper()[:aln_len]) + hxb2_row += ["-"] * (aln_len - len(hxb2_row)) + else: + # No HxB2 row in alignment — synthesize from the anchor map's + # per-column HxB2 residues so K169E-style labels still work. + hxb2_row = [p.hxb2_residue for p in hxb2_map] + extra_col_labels = hxb2_variant_labels( + hxb2_row, ref_row, hxb2_map, seq_type=seq_type + ) + return Panel( label=ref_id, ref_row=ref_row, @@ -357,6 +440,7 @@ def hiv_panel( extra_ref_rows=extra_ref_rows, secondary_ref_row=secondary_ref_row, nt_ruler_labels=nt_ruler_labels, + extra_col_labels=extra_col_labels, row_label_mode=row_label_mode, row_label_max_chars=row_label_max_chars, ) diff --git a/src/tpixel/hxb2.py b/src/tpixel/hxb2.py index 4f0f1fa..52f0980 100644 --- a/src/tpixel/hxb2.py +++ b/src/tpixel/hxb2.py @@ -152,30 +152,122 @@ def build_hxb2_map( def hxb2_col_labels(hxb2_map: list[HxB2Position], step: int = 50) -> list[tuple[int, str]]: - """Build x-axis tick labels at regular HxB2 AA intervals.""" - max_pos = max((p.hxb2_aa_pos for p in hxb2_map if p.hxb2_aa_pos is not None), default=0) - labels: list[tuple[int, str]] = [] + """Build x-axis tick labels at regular HxB2 AA intervals. + + Always includes the first HxB2-mapped column as a tick so that + panels starting at a non-standard AA position (e.g. a windowed + alignment that begins mid-sequence) expose their starting + coordinate. Duplicates with the step-interval ticks are filtered + so a panel whose first AA lands exactly on a multiple of ``step`` + doesn't render two overlapping tick labels. + """ + mapped = [p for p in hxb2_map if p.hxb2_aa_pos is not None] + if not mapped: + return [] + first = mapped[0] + max_pos = mapped[-1].hxb2_aa_pos + labels: list[tuple[int, str]] = [(first.alignment_col, str(first.hxb2_aa_pos))] for target in range(step, max_pos + 1, step): - for p in hxb2_map: + for p in mapped: if p.hxb2_aa_pos == target: - labels.append((p.alignment_col, str(target))) + if p.alignment_col != labels[0][0]: + labels.append((p.alignment_col, str(target))) break return labels +def hxb2_variant_labels( + hxb2_row: list[str], + query_row: list[str], + hxb2_map: list[HxB2Position], + *, + seq_type: str = "AA", +) -> list[tuple[int, str]]: + """Return (column_index, ``"wildtype+pos+mutation"``) labels for every + column where *query_row* differs from *hxb2_row*. + + Label format follows the standard mutation shorthand: + + * Substitution → ``"K169E"`` (HxB2 residue + HxB2 position + query residue). + * Deletion (query gap) → ``"K169-"``. + * Insertion relative to HxB2 (HxB2 gap) → skipped (those columns have + no stable HxB2 position to print). + * Matching positions → skipped. + + For AA alignments the position is the 1-based HxB2 amino-acid position + (e.g. ``169``). For NT alignments it is the 1-based HxB2 nucleotide + position (e.g. ``507``). + + Args: + hxb2_row: Aligned HxB2 row (single-character strings). + query_row: Aligned query row to compare against HxB2 (same length). + hxb2_map: Output of :func:`build_hxb2_map` (one entry per column). + seq_type: ``"AA"`` or ``"NT"``. Controls whether positions are + reported in amino-acid or nucleotide coordinates. + + Returns: + Ordered list of ``(col_idx, label)`` pairs, one per variant column. + + Examples: + >>> from tpixel.hxb2 import build_hxb2_map, hxb2_variant_labels + >>> seqs = [("HxB2", "MKRVK"), ("lin_ref", "MKEVK")] + >>> m = build_hxb2_map(seqs, hxb2_id="HxB2", seq_type="AA") + >>> hxb2_variant_labels(list("MKRVK"), list("MKEVK"), m, seq_type="AA") + [(2, 'R3E')] + """ + GAPS = {"-", ".", " "} + is_nt = seq_type == "NT" + + out: list[tuple[int, str]] = [] + nt_counter = 0 + for i, p in enumerate(hxb2_map): + hxb2_res = hxb2_row[i] if i < len(hxb2_row) else "-" + query_res = query_row[i] if i < len(query_row) else "-" + + if hxb2_res in GAPS: + continue + + if is_nt: + nt_counter += 1 + pos_label = str(nt_counter) + else: + if p.hxb2_aa_pos is None: + continue + pos_label = str(p.hxb2_aa_pos) + + if query_res == hxb2_res: + continue + + mut = "-" if query_res in GAPS else query_res.upper() + out.append((i, f"{hxb2_res.upper()}{pos_label}{mut}")) + + return out + + def hxb2_regions(hxb2_map: list[HxB2Position]) -> list[Region]: - """Build Region annotations from HxB2 position map.""" + """Build Region annotations from HxB2 position map. + + Columns whose ``region`` is ``None`` (HxB2 row gap, or lineage + insertion vs HxB2 in anchor mode) extend whichever region they fall + within rather than fragmenting the bar — small indels do not break + visual continuity. A region ends only when the position map either + transitions to a different named region or runs out of columns. + """ region_spans: list[tuple[str, int, int]] = [] current: str | None = None span_start = 0 for p in hxb2_map: r = p.region - if r != current: - if current is not None: - region_spans.append((current, span_start, p.alignment_col)) - current = r - span_start = p.alignment_col + if r is None: + # Stay inside the current span — gap column gets absorbed. + continue + if r == current: + continue + if current is not None: + region_spans.append((current, span_start, p.alignment_col)) + current = r + span_start = p.alignment_col if current is not None: region_spans.append((current, span_start, len(hxb2_map))) diff --git a/src/tpixel/renderer.py b/src/tpixel/renderer.py index 13fb140..a3a973c 100644 --- a/src/tpixel/renderer.py +++ b/src/tpixel/renderer.py @@ -148,8 +148,9 @@ def _data_height(panel: Panel, *, show_footer: bool = True) -> float: y += REF_ROW_HEIGHT + REF_SEQ_PAD # primary ref seq_data_total = total_seqs * SEQ_DATA_ROW + max(0, n_groups - 1) * GROUP_DATA_GAP - # 0.5 gap before axis; 2.0 for legend or 0.8 for axis labels only - y += seq_data_total + 0.5 + (2.0 if show_footer else 0.8) + # 0.5 gap before axis; 3.0 for legend (clearance from rotated x-tick + # labels) or 0.8 for axis labels only + y += seq_data_total + 0.5 + (3.0 if show_footer else 0.8) # Account for title headroom in ylim if has_title: @@ -286,10 +287,19 @@ def render_panels( # Derive a shared inches-per-data-unit scale from the tallest panel # so chrome (regions, markers, ref rows) renders at identical physical # size across all panels regardless of sequence count. + import uuid + import patchworklib as pw pw.param["margin"] = 0 + # patchworklib enforces globally-unique Brick labels via a module-level + # _labelset, so reusing "panel_0"/"panel_1" across multiple + # render_panels calls (e.g. across pytest cases in one process) + # raises ValueError. Suffix every label with a fresh uuid to keep the + # labels stable within one call but unique across calls. + call_id = uuid.uuid4().hex[:8] + last = len(panels) - 1 data_heights = [ _data_height(p, show_footer=(i == last)) @@ -303,7 +313,7 @@ def render_panels( bricks = [ to_patchwork( panel, - label=f"panel_{i}", + label=f"panel_{i}_{call_id}", figsize=(ref_w, data_heights[i] * shared_scale), show_footer=(i == last), ) @@ -404,7 +414,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) y_axis_pos = y_seq_start + seq_data_total + 0.5 - y_max = y_axis_pos + (2.0 if show_footer else 0.8) + y_max = y_axis_pos + (3.0 if show_footer else 0.8) ax.set_xlim(-aln_len * 0.08, aln_len * 1.02) ax.set_ylim(y_max, -0.5 if has_title else -0.1) @@ -416,7 +426,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: aln_len / 2, -0.3, panel.title, - fontsize=8, + fontsize=6, ha="center", va="bottom", fontweight="bold", @@ -458,7 +468,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: x, numeral_y, label, - fontsize=5, + fontsize=4, ha="center", va="bottom", color="#212121", @@ -484,7 +494,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: region.start + width / 2, (y_region_top + y_region_bot) / 2, region.name, - fontsize=10, + fontsize=6, ha="center", va="center", fontweight="bold", @@ -530,7 +540,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: col + 0.5, y_text, marker.label, - fontsize=2.5, + fontsize=4, ha="center", va=va, rotation=0, @@ -569,7 +579,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: -aln_len * 0.005, (y_eref + eref_bot) / 2, eref_label, - fontsize=10, + fontsize=6, ha="right", va="center", fontweight="bold", @@ -603,7 +613,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: -aln_len * 0.005, (y_ref_top + y_ref_bot) / 2, panel.label, - fontsize=10, + fontsize=6, ha="right", va="center", fontweight="bold", @@ -690,21 +700,20 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: -aln_len * 0.005, y_center, label, - fontsize=8, + fontsize=6, ha="right", va="center", color="#424242", ) else: - # Per-row labels: many labels stacked tightly, so use a smaller - # fontsize (matching Layer 6 col-label ticks) to keep the left - # margin compact for panels with ~100+ data rows. + # Per-row labels: many labels stacked tightly. Match the unified + # 6pt body font; users wanting denser packing can tune row_label_max_chars. for y_center, row_label in per_row_label_positions: ax.text( -aln_len * 0.005, y_center, row_label, - fontsize=4, + fontsize=6, ha="right", va="center", color="#424242", @@ -752,7 +761,7 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: col_idx + 0.5, extra_y + 0.05, label, - fontsize=3.5, + fontsize=4, ha="right", va="top", rotation=45, @@ -761,8 +770,13 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: ) # -- Layer 7: Legend ------------------------------------------------------- + # Stats summary is drawn on EVERY panel so multi-panel compositions + # aren't missing per-panel metadata on the upper panels. Color + # swatches remain gated by ``show_footer`` so they only appear once + # in a stacked figure (typically the bottom panel). + legend_y = y_axis_pos + 2.4 + if show_footer: - legend_y = y_axis_pos + 1.8 legend_items = [ ("Match", MATCH_COLOR), ("Substitution", MISMATCH_COLOR), @@ -798,19 +812,19 @@ def _draw_panel(panel: Panel, ax: Axes, *, show_footer: bool = True) -> None: color="#424242", ) - # Stats summary on the right side of the legend - sample_word = "sample" if n_groups == 1 else "samples" - stats = ( - f"{total_seqs} sequences, " - f"{n_groups} {sample_word}, " - f"{aln_len} positions, {panel.seq_type}" - ) - ax.text( - aln_len * 1.0, - legend_y + 0.2, - stats, - fontsize=5, - ha="right", - va="center", - color="#757575", - ) + # Stats summary — always shown, even when color swatches are suppressed. + sample_word = "sample" if n_groups == 1 else "samples" + stats = ( + f"{total_seqs} sequences, " + f"{n_groups} {sample_word}, " + f"{aln_len} positions, {panel.seq_type}" + ) + ax.text( + aln_len * 1.0, + legend_y + 0.2, + stats, + fontsize=5, + ha="right", + va="center", + color="#757575", + ) diff --git a/tests/data/CH505.aln.fasta b/tests/data/CH505.aln.fasta new file mode 100644 index 0000000..bcf2fa7 --- /dev/null +++ b/tests/data/CH505.aln.fasta @@ -0,0 +1,20 @@ +>CH505_ref +MRVMGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHSFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_TCTTTTAC +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_TCTCTTTG +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKMTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_TGACTTTA +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_GTTGTCGT +---MGIQRNFPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQALYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_GTGTATGC +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKRGKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_TCTCGTGA +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQALYATGQVIGDIREAYCNINESKWNKTLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_TCTAACTT +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRVINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_CTCTTAGA +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQAFYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL +>CH505_CTTCACGT +---MGIQRNYPQWWIWSMLGFWMLMICNGMWVTVYYGVPVWKEAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLKNVTENFNMWKNDMVDQMHEDVISLWDQSLKPCVKLTPLCVTLNCTNATASNSSIIEGMKNCSFNITTELRDKREKKNALFYKLDIVQLDGNSSQYRLINCNTSVITQACPKVSFDPIPIHYCAPAGYAILKCNNKTFTGTGPCNNVSTVQCTHGIKPVVSTQLLLNGSLAEGEIIIRSENITNNVKTIIVHLNESVKIECTRPNNKTRTSIRIGPGQALYATGQVIGDIREAYCNINESKWNETLQRVSKKLKEYFPHKNITFQPSSGGDLEITTHHFNCGGEFFYCNTSSLFNRTYMANSTDMANSTETNSTRTITIHCRIKQIINMWQEVGRAMYAPPIAGNITCISNITGLLLTRDGGKNNTETFRPGGGNMKDNWRSELYKYKVVEVKPLGVAPTNARRRVVEREKRAVGMGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLKAIEAQQHMLKLTVWGIKQLQARVLALERYLKDQQLLGMWGCSGKLICTTNVYWNSSWSNKTYGDIWDNMTWMQWEREISNYTEIIYELLEESQNQQEKNEQDLLALDRWNSLWNWFNITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTLLPTPRGPARPEGIEEEGGEQGRDRSIRLLTGLSELIWDDLRNLCLFSYHHLRDLILIAARIVQLLGRRGWEALKYLWNILQYWIQELKNSAISLFDTIAIAVAYLQYGWSYFHEAVQAGWRSATETLAGAWGDLWETLRRGGRWILAIPRRIRQGLELTLL diff --git a/tests/data/SF162p3.aln.fasta b/tests/data/SF162p3.aln.fasta new file mode 100644 index 0000000..ed9c1a7 --- /dev/null +++ b/tests/data/SF162p3.aln.fasta @@ -0,0 +1,20 @@ +>SF162p3_ref +MRVKGIRKNYQHLWRGGTLLLGMLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTGSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTXYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNASWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_GAACTAGT +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_CGGGACCG +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_TGGGTTAA +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_CTATTTTA +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_GAGTGTGC +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQGLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_GTGCCTTC +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSTGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_GTTCTATA +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEGDLWKTLRRGGR-WILAIPRRIRQGL------------ELTLL- +>CH505_ACGGCGTG +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAMYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- +>CH505_AGGTTATC +----------------------MLMICSAVEKLWVTVYYGVPVWKEATTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLENVTENFNMWKNNMVEQMHEDIISLWDQSLKPCVKLTPLCVTLHCTNLENATNTTSSNWKEMNRGEIKNCSFNVTTSIGNKMQKEYALFYKLDVVPIDNDNTSYNLINCNTSVITQACPKVSFEPIPIHYCAPAGFAILKCNDKKFNGSGPCINVSTVQCTHGIRPVVSTQLLLNGSLAEEGVVIRSENFTDNVKTIIVQLKESVEINCTRPNNNTRKSIPIGPGKAFYATGDIIGDIRQAHCNISGEKWNNTLKQIVTKLQAQFENKTIVFKQSSGGDPEIVMHSFNCGGEFFYCNSTQLFNSTWNNTIGPNNTNGTITLPCRIKQIINRWQEVGKAVYAPPIRGQIRCSSNITGLLLTRDGGREVSNTTEIFRPGGGDMRDNWRSELYKYKVVKIEPLGVAPTKAKRRVVQREKRAVTLGAVFLGFLGAAGSTMGAASLTLTVQARQLLSGIVQQQNNLLRAIEAQQHLLQLTVWGIKQLQARVLAVERYLKDQQLLGIWGCSGKLICTTAVPWNTSWSNKSLDQIWNNMTWMEWEREIGNYTNLIYTLIEESQNQQEKNEQELLELDKWASLWNWFDISKWLWYIKIFIMIVGGLVGLRIVFTVLSIVNRVRQGYSPLSFQTRFPAPRGLDRPEGIEEEGGERDRDRSRPLVHGLLALIWDDLRSLCLFSYHRLRDLILIAARIVELLGRRGWEALKYWGNLLQYWIQELKNSAVSLFGAIAIAVAEG----TDRI------IEVAQRIGRAFLHIPRRIRQGLERTLL-- diff --git a/tests/data/T205-4.fasta b/tests/data/T205-4.fasta new file mode 100644 index 0000000..744a53a --- /dev/null +++ b/tests/data/T205-4.fasta @@ -0,0 +1,20 @@ +>T250-4_ref +ATGAGAGTGATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATATGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTTTTTTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTTTATTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTATTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTAATTTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTTATTTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TATTTTTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTAGTTTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTTAATTT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTGTTTTC +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG +>T250-4_TTGTGATT +---------ATGGGGATACAGAGGAATTATCCACCCTTATGGAGATGGGGAACTATGATCTTTTGGATGATGATGCTTTGTAGTGCTGAAAAGTTATGGGTCACAGTCTACTATGGGGTACCTGTGTGGAGAGAAGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGATATGATACAGAAGCACATAATGTCTGGGCTACACATGCCTGTGTACCCACAGACCCCCGCCCACAAGAAATGTATTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATAGCATGGTGGAACAAATGCACACAGATATAATTAGTCTATGGGACGAAAGCCTAAAGCCATGTGTGAAGTTAACCCCTCTCTGCGTTACTTTAGATTGTCAGGCCTTTAACAGCAGCAGCCATACCAACAGCAGCATAGCTATGCAAGAAATGAAAAACTGCTCTTTCAATGTAACCACAGAACTAAGAGATAAGAAAAAGAAAGAGTATTCATTTTTTTATAAAACTGATATAGAACAAATTAATAAAAATGGTAGGCAATACAGACTAATAAATTGTAATACTTCAGCCATTACACAGGCTTGTCCAAAGGTGTCCTTTGAGCCAATTCCCATACATTTTTGTGCCCCAGCTGGTTTTGCGATTCTGAAGTGTAATGAGAAGCATTTCAATGGAAAAGGGCCATGCAAGAATGTCAGCACAGTACAATGCACACATGGAATCAAACCAGTGGTATCAACTCAACTGCTGTTAAATGGCAGTCTAGCAGAAGAAGAGGTAGTAATTAGAGTTGAAAATACCATAGACAATGCCAAAACCATAATAGTACAACTGGCTAAGCCTGTAAAAATTAATTGTACCAGACCTAACAACAATACAAGAAAAAGTATACGCATAGGACCAGGACAAACATTCTATGCAACAGGTGACATAATAGGGAATATAAGAAAAGCATATTGTAATGTCAGTAAAAGAGAATGGAATAACACTTTACAACAGGTAGCTGCACAATTAAGTAAGTCCTTTAACAACACAAAAATAGTCTTTGAGAAGCACTCAGGAGGGGATTTAGAAGTTATAACACATTGGTTTGTTTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGACTATTTAATAGCACTTGGACCAATAGCACTTGGACCAATAGCACCACTGGCTCAAATGGCACAGAGTCAAATGACACTATAACTCTCCAATGCGAAATAAAGCAATTTATAAATATGTGGCAGAGAGTAGGACGAGCAATGTATGCCCCTCCCATCCCAGGAGTGATAAGGTGTGAATCAGACATTACAGGACTACTATTAACAAGAGATGGACCGAATAGTACACAAAATGAGACATTCAGGCCTGGAGGAGGAGATATGAGAGACAATTGGAGAAGTGAATTATATAAGTATAAAGTAGTACAAATTGAACCACTAGGTGTGGCACCCACCCATGCAAAAAGAAGAGTGGTGGAGAGAGAAAAAAGAGCAGTTGGACTGGGAGCTGTCTTCTTTGGGTTCTTGGGAGCGGCAGGAAGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAGAGCAATTTGCTGAAAGCTATAGAGGCTCAACAACAACTGTTGAGACTCACGGTCTGGGGCATTAAACAGCTCCAGGCCAGAGTCCTGGCCCTGGAAAGATACCTAAAGGATCAACAGCTCCTAGGAATTTGGGGCTGCTCTGGAAAACTCATCTGCACCACTACTGTGCCCTGGAACTCTAGTTGGAGTAATAAAAATTATACTGACATATGGGATAACATGACCTGGCTGCAATGGGATAGAGAAATTAGCAATTACACAGATGAAATATATAGGCTCATTGAACAATCACAGAACCAGCAGGAAAAGAATGAACAAGACTTATTGGCATTGGACAAGTGGGCAAGTCTGTGGAATTGGTTTGACATAACAAACTGGCTATGGTACATAAAAATATTTATAATGATAGTAGGAGGCTTGATAGGTTTAAGAATAATTTTTACTGTGCTTAATGTAATAAATAGAGTTAGGCAGGGATACTCACCTTTGTCATTCCAGACCCTCCTCCCAACCCCACGGGGACCCGCCAGGCCCGAAGGAATAGAAGAAGAAGGTGGAGAGCAAGGCAGAGACAGATCCATTCGATTGCTGACCGGATTGTCAGAACTTATCTGGGACGACCTGAGGAACCTGTGCCTCTTCAGCTACCACCACTTGAGAGACTTAATCTTAATTGCAGCGAGGATTGTGCAACTTCTGGGACGCAGGGGGTGGGAAGCCCTCAAATATCTTTGGAACATCCTCCAGTATTGGATCCAGGAACTGAAGAATAGTGCTATCAGCTTGTTTGATACCATAGCAATAGCAGTAGCCTACCTACAATATGGGTGGAGCTATTTCCATGAGGCGGTCCAAGCCGGCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGACTTATGGGAGACTCTTAGGAGAGGTGGAAGATGGATCCTCGCAATCCCTAGGAGGATTAGACAAGGGCTTGAGCTCACTCTCTTG diff --git a/tests/test_anchor_panel.py b/tests/test_anchor_panel.py new file mode 100644 index 0000000..b441557 --- /dev/null +++ b/tests/test_anchor_panel.py @@ -0,0 +1,118 @@ +"""Integration tests: hiv_panel + render_panels with no-HxB2 alignments. + +Exercises the full pipeline a user hits when they drop HxB2 from their +alignment and expect the region bar / NT ruler to keep rendering. + +Fixtures live in ``tests/data/`` and are pre-stripped of HxB2 (only the +lineage ``_ref`` row plus samples). Dual-render visual comparisons +(`with HxB2` vs `without HxB2`) cannot be synthesised from these +fixtures alone — the bundled HxB2 has residues at columns where the +lineage has a gap, and those residues are lost when projecting back +into a no-HxB2 alignment, shifting AA numbering. For that reason the +tests below compare anchor mode against itself (CH505 vs SF162p3 +fixtures) rather than reconstructing a synthetic HxB2 row. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from tpixel.hiv import hiv_panel +from tpixel.renderer import render_panels + +DATA_DIR = Path(__file__).resolve().parent / "data" +ANCHOR_FIXTURES = { + "CH505": DATA_DIR / "CH505.aln.fasta", + "SF162p3": DATA_DIR / "SF162p3.aln.fasta", +} + + +@pytest.mark.parametrize("lineage", ["CH505", "SF162p3"]) +def test_hiv_panel_anchor_mode_produces_regions_and_ruler(lineage): + """hiv_panel must produce non-empty regions + NT ruler without HxB2.""" + src = ANCHOR_FIXTURES[lineage] + if not src.exists(): + pytest.skip(f"missing fixture {src}") + + panel = hiv_panel(str(src), ref_positions=[1], show_nt_ruler=True) + + assert panel.regions, "anchor mode must yield a non-empty region list" + assert panel.nt_ruler_labels, "anchor mode must yield NT ruler labels" + region_names = {r.name for r in panel.regions} + assert {"SP", "C1", "V1", "V2", "C2", "V3", "C3", "V4", "C4", "V5", "C5", "gp41"} <= region_names + + +@pytest.mark.parametrize("lineage", ["CH505", "SF162p3"]) +def test_anchor_mode_renders_to_png(output_dir, lineage): + """End-to-end render: no-HxB2 fixture -> PNG file on disk.""" + src = ANCHOR_FIXTURES[lineage] + if not src.exists(): + pytest.skip(f"missing fixture {src}") + + panel = hiv_panel(str(src), ref_positions=[1]) + out = output_dir / f"{lineage.lower()}_anchor_no_hxb2.png" + if out.exists(): + out.unlink() + render_panels([panel], str(out)) + assert out.exists() and out.stat().st_size > 0 + + +def test_anchor_mode_variant_labels_emit(): + """--variant-labels in anchor mode must yield non-empty substitution labels.""" + src = ANCHOR_FIXTURES["CH505"] + if not src.exists(): + pytest.skip(f"missing fixture {src}") + + panel = hiv_panel(str(src), ref_positions=[1], show_variant_labels=True) + assert panel.extra_col_labels, "variant labels must be emitted in anchor mode" + subs = [lbl for _, lbl in panel.extra_col_labels if not lbl.endswith("-")] + assert subs, "anchor mode must emit substitution labels" + + +def test_anchor_mode_pngs_markers_present(): + """PNGS markers must persist in anchor mode.""" + src = ANCHOR_FIXTURES["CH505"] + if not src.exists(): + pytest.skip(f"missing fixture {src}") + + panel = hiv_panel(str(src), ref_positions=[1]) + assert panel.markers, "PNGS markers must persist in anchor mode" + + +def test_dual_anchor_renders_for_visual_comparison(output_dir): + """Render both tests/data fixtures side by side via the anchor pathway. + + Lands two PNGs in ``tests/output/`` so the user can confirm region + bars and PNGS markers survive on both lineages without HxB2. + """ + rendered = [] + for lineage, src in ANCHOR_FIXTURES.items(): + if not src.exists(): + continue + panel = hiv_panel(str(src), ref_positions=[1]) + out = output_dir / f"dual_{lineage.lower()}_no_hxb2.png" + if out.exists(): + out.unlink() + render_panels([panel], str(out)) + assert out.exists() and out.stat().st_size > 0 + assert panel.regions, f"{lineage}: region header must persist" + assert panel.markers, f"{lineage}: PNGS markers must persist" + rendered.append(out) + assert len(rendered) >= 1 + + +def test_hiv_panel_explicit_anchor_kwargs(): + """anchor_id + anchor_lineage override the auto-detected default.""" + src = ANCHOR_FIXTURES["SF162p3"] + if not src.exists(): + pytest.skip(f"missing fixture {src}") + + panel = hiv_panel( + str(src), + ref_positions=[1], + anchor_id="SF162p3_ref", + anchor_lineage="SF162p3", + ) + assert panel.regions diff --git a/tests/test_anchors.py b/tests/test_anchors.py new file mode 100644 index 0000000..820e145 --- /dev/null +++ b/tests/test_anchors.py @@ -0,0 +1,126 @@ +"""Tests for tpixel.anchors — non-HxB2 anchor coordinate mapping.""" + +from __future__ import annotations + +import pytest + +from tpixel.anchors import ( + KNOWN_ANCHOR_LINEAGES, + _build_lineage_to_hxb2_lookup, + _load_anchor_pair, + build_anchor_hxb2_map, + detect_anchor_lineage, +) +from tpixel.hxb2 import HxB2Position + + +def test_known_lineages_includes_three_canonical_anchors(): + assert set(KNOWN_ANCHOR_LINEAGES) == {"CH505", "SF162p3", "T250-4"} + + +@pytest.mark.parametrize( + "seq_id,expected", + [ + ("SF162p3_ref", "SF162p3"), + ("CH505_ref", "CH505"), + ("T250-4_ref", "T250-4"), + ("HxB2", None), + ("foo_ref", None), + ("SF162p3", "SF162p3"), + ], +) +def test_detect_anchor_lineage(seq_id, expected): + assert detect_anchor_lineage(seq_id) == expected + + +@pytest.mark.parametrize("lineage", KNOWN_ANCHOR_LINEAGES) +def test_load_anchor_pair_yields_two_aligned_strings(lineage): + hxb2_aligned, lin_aligned = _load_anchor_pair(lineage) + assert len(hxb2_aligned) == len(lin_aligned) + assert len(hxb2_aligned) > 800 # gp160 ~856 AA, plus a handful of indels + # HxB2 ungapped length is fixed (canonical 856 AA). + assert len(hxb2_aligned.replace("-", "")) == 856 + + +@pytest.mark.parametrize("lineage", KNOWN_ANCHOR_LINEAGES) +def test_lineage_to_hxb2_lookup_consistent_with_pair(lineage): + aa_lookup, res_lookup, canonical = _build_lineage_to_hxb2_lookup(lineage) + assert len(aa_lookup) == len(res_lookup) == len(canonical) + # HxB2 positions in the lookup are 1-based and bounded by canonical HxB2 length. + mapped = [p for p in aa_lookup if p is not None] + assert mapped == sorted(mapped) + assert min(mapped) >= 1 + assert max(mapped) <= 856 + + +def test_build_anchor_hxb2_map_synthetic_alignment(): + """Walk a 5-column anchor row that matches the bundled canonical prefix.""" + _, _, canonical = _build_lineage_to_hxb2_lookup("SF162p3") + # Take the first 5 residues of the canonical lineage; embed in a fake + # alignment with no gaps in the anchor row. + anchor_row = canonical[:5] + assert len(anchor_row) == 5 + seqs = [("SF162p3_ref", anchor_row), ("sample_1", anchor_row)] + # Force the canonical sequence sanity-check to pass by extending the anchor + # row to the full canonical length (otherwise ungapped(anchor) != canonical). + anchor_row_full = canonical + seqs = [("SF162p3_ref", anchor_row_full), ("sample_1", anchor_row_full)] + + positions = build_anchor_hxb2_map(seqs, "SF162p3_ref", "SF162p3") + assert len(positions) == len(canonical) + # First position should map to HxB2 AA 1 (canonical Met start). + assert positions[0].alignment_col == 0 + assert positions[0].hxb2_aa_pos in (1, None) + # All positions are HxB2Position instances. + assert all(isinstance(p, HxB2Position) for p in positions) + # All non-gap columns yield a numeric or None hxb2_aa_pos and a non-empty residue. + for p in positions: + assert isinstance(p.alignment_col, int) + assert p.hxb2_residue + + +def test_build_anchor_hxb2_map_preserves_alignment_columns(write_fasta): + """Anchor map length must equal alignment column count.""" + _, _, canonical = _build_lineage_to_hxb2_lookup("SF162p3") + # Insert a 3-residue gap in the anchor row at position 100. + gapped_anchor = canonical[:100] + "---" + canonical[100:] + path = write_fasta( + [("SF162p3_ref", gapped_anchor), ("sample_1", gapped_anchor)], + name="anchor_gapped.fasta", + ) + from tpixel.fasta import read_fasta + + seqs = read_fasta(path) + positions = build_anchor_hxb2_map(seqs, "SF162p3_ref", "SF162p3") + assert len(positions) == len(gapped_anchor) + # The 3 inserted gap columns have hxb2_aa_pos = None. + gap_cols = [p for p in positions[100:103]] + assert all(p.hxb2_aa_pos is None for p in gap_cols) + assert all(p.hxb2_residue == "-" for p in gap_cols) + + +def test_build_anchor_hxb2_map_rejects_unknown_lineage(): + seqs = [("SF162p3_ref", "MRVK")] + with pytest.raises(ValueError, match="Unknown anchor lineage"): + build_anchor_hxb2_map(seqs, "SF162p3_ref", "BG505") + + +def test_build_anchor_hxb2_map_rejects_missing_anchor(): + seqs = [("HxB2", "MRVK")] + with pytest.raises(ValueError, match="Anchor sequence 'SF162p3_ref' not found"): + build_anchor_hxb2_map(seqs, "SF162p3_ref", "SF162p3") + + +def test_build_anchor_hxb2_map_rejects_mismatched_anchor(): + """Anchor row whose ungapped form differs from bundled canonical errors.""" + fake = "MRVKEKYQHL" + "X" * 800 + seqs = [("SF162p3_ref", fake)] + with pytest.raises(ValueError, match="does not match bundled canonical"): + build_anchor_hxb2_map(seqs, "SF162p3_ref", "SF162p3") + + +def test_build_anchor_hxb2_map_rejects_nt_alignment(): + nt = "ATGCGT" * 200 + seqs = [("SF162p3_ref", nt)] + with pytest.raises(ValueError, match="AA alignments only"): + build_anchor_hxb2_map(seqs, "SF162p3_ref", "SF162p3", seq_type="NT") diff --git a/tests/test_t205_panel.py b/tests/test_t205_panel.py new file mode 100644 index 0000000..35fb4d5 --- /dev/null +++ b/tests/test_t205_panel.py @@ -0,0 +1,84 @@ +"""Tests for the T205-4 NT fixture and a stacked CH505-over-T205 render. + +T205-4.fasta is a 10-record nucleotide alignment whose ``_ref`` row is +``T250-4_ref`` (a known anchor lineage) but has no HxB2 row. Because anchor +mode currently rejects NT alignments, the plain ``fasta_panel`` is the only +supported path — that's the contract these tests pin down. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from tpixel import fasta_panel, hiv_panel, render_panels + +DATA_DIR = Path(__file__).resolve().parent / "data" +T205_FASTA = DATA_DIR / "T205-4.fasta" +CH505_FASTA = DATA_DIR / "CH505.aln.fasta" + + +def _require(path: Path) -> None: + if not path.exists(): + pytest.skip(f"missing fixture {path}") + + +def test_fasta_panel_t205_builds(): + """Plain fasta_panel must load the NT T205-4 fixture cleanly.""" + _require(T205_FASTA) + + panel = fasta_panel(str(T205_FASTA)) + + assert panel.total_cols == 2619 + assert panel.total_seqs == 9 # 10 records minus the primary ref + assert panel.label == "T205-4" + assert panel.ref_row[0] == "A" # NT alphabet sanity check + + +def test_hiv_panel_t205_nt_raises(): + """Anchor mode + NT is documented as unsupported — keep it that way.""" + _require(T205_FASTA) + + with pytest.raises(ValueError) as exc: + hiv_panel(str(T205_FASTA), ref_positions=[1]) + + msg = str(exc.value) + assert "Anchor mode" in msg + assert "AA" in msg + + +def test_t205_renders_to_png(output_dir): + """End-to-end: T205-4 fasta_panel renders to a non-empty PNG.""" + _require(T205_FASTA) + + panel = fasta_panel(str(T205_FASTA)) + out = output_dir / "t205_panel.png" + if out.exists(): + out.unlink() + + render_panels([panel], str(out)) + + assert out.exists() + assert out.stat().st_size > 0 + + +def test_stack_ch505_over_t205(output_dir): + """Stack CH505 (AA hiv_panel) above T205-4 (NT fasta_panel) in one render.""" + _require(CH505_FASTA) + _require(T205_FASTA) + + p_ch505 = hiv_panel(str(CH505_FASTA), ref_positions=[1]) + p_t205 = fasta_panel(str(T205_FASTA)) + + panels = [p_ch505, p_t205] + assert len(panels) == 2 + + out = output_dir / "stacked_ch505_over_t205.png" + if out.exists(): + out.unlink() + + render_panels(panels, str(out)) + + assert out.exists() + assert out.stat().st_size > 0 diff --git a/tests/test_variant_labels.py b/tests/test_variant_labels.py new file mode 100644 index 0000000..cee2afc --- /dev/null +++ b/tests/test_variant_labels.py @@ -0,0 +1,113 @@ +"""Tests for wildtype+pos+mutation variant labels vs HxB2.""" + +from __future__ import annotations + +import pytest + +from tpixel.hiv import hiv_panel +from tpixel.hxb2 import build_hxb2_map, hxb2_variant_labels + + +class TestHxb2VariantLabelsHelper: + def test_single_aa_substitution(self): + """Query differs by one AA → one label in K+pos+M form.""" + hxb2 = "MKRVK" + query = "MKEVK" + m = build_hxb2_map( + [("HxB2", hxb2), ("lin_ref", query)], hxb2_id="HxB2", seq_type="AA" + ) + out = hxb2_variant_labels(list(hxb2), list(query), m, seq_type="AA") + assert out == [(2, "R3E")] + + def test_identical_rows_produce_no_labels(self): + hxb2 = "MKRVK" + m = build_hxb2_map( + [("HxB2", hxb2), ("lin_ref", hxb2)], hxb2_id="HxB2", seq_type="AA" + ) + assert hxb2_variant_labels(list(hxb2), list(hxb2), m, seq_type="AA") == [] + + def test_deletion_emits_dash_mutation(self): + """Query gap at a position where HxB2 has a residue → 'K3-'.""" + hxb2 = "MKRVK" + query = "MK-VK" + m = build_hxb2_map( + [("HxB2", hxb2), ("lin_ref", query)], hxb2_id="HxB2", seq_type="AA" + ) + out = hxb2_variant_labels(list(hxb2), list(query), m, seq_type="AA") + assert out == [(2, "R3-")] + + def test_insertion_relative_to_hxb2_is_skipped(self): + """HxB2 gap column has no stable position → no label emitted.""" + # HxB2 gap at col 2; query has residue there (an insertion). + hxb2 = "MK-VK" + query = "MKRVK" + m = build_hxb2_map( + [("HxB2", hxb2), ("lin_ref", query)], hxb2_id="HxB2", seq_type="AA" + ) + out = hxb2_variant_labels(list(hxb2), list(query), m, seq_type="AA") + assert out == [] + + def test_nt_mode_uses_nucleotide_positions(self): + """NT mode numbers positions by 1-based NT counter, not codon.""" + # 6 NT = 2 codons. Change col 4 (pos 5) from A→T. + hxb2 = "ATGAAA" + query = "ATGATA" + m = build_hxb2_map( + [("HxB2", hxb2), ("lin_ref", query)], hxb2_id="HxB2", seq_type="NT" + ) + out = hxb2_variant_labels(list(hxb2), list(query), m, seq_type="NT") + assert out == [(4, "A5T")] + + def test_multiple_variants_preserve_column_order(self): + hxb2 = "MKRVKE" + query = "MEEVKD" + m = build_hxb2_map( + [("HxB2", hxb2), ("lin_ref", query)], hxb2_id="HxB2", seq_type="AA" + ) + out = hxb2_variant_labels(list(hxb2), list(query), m, seq_type="AA") + assert out == [(1, "K2E"), (2, "R3E"), (5, "E6D")] + + +class TestHivPanelVariantLabelsWiring: + def _fasta(self, tmp_path, write_fasta): + return write_fasta( + [ + ("HxB2", "M" * 600), + ("animal1_ref", "M" * 168 + "E" + "M" * 431), # K169E equivalent: M vs E at pos 169 + ("animal1_s1", "M" * 600), + ], + name="variant.fasta", + ) + + def test_flag_off_leaves_extra_col_labels_unset(self, tmp_path, write_fasta): + panel = hiv_panel( + str(self._fasta(tmp_path, write_fasta)), + seq_type="AA", + show_variant_labels=False, + ) + assert panel.extra_col_labels is None + + def test_flag_on_populates_variant_labels(self, tmp_path, write_fasta): + panel = hiv_panel( + str(self._fasta(tmp_path, write_fasta)), + seq_type="AA", + show_variant_labels=True, + ) + assert panel.extra_col_labels is not None + # One variant: HxB2 M vs lineage E at column 168 (AA pos 169). + assert panel.extra_col_labels == [(168, "M169E")] + + def test_renderer_consumes_variant_labels_without_error( + self, tmp_path, write_fasta, output_dir + ): + """Full render path including Layer 6b must succeed.""" + from tpixel.renderer import render_panels + + panel = hiv_panel( + str(self._fasta(tmp_path, write_fasta)), + seq_type="AA", + show_variant_labels=True, + ) + out = output_dir / "variant_labels.png" + render_panels([panel], out, dpi=72) + assert out.exists() and out.stat().st_size > 0