Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Thumbs.db
data/bactopia.json
data/cli.json
TODO.md
__pycache__/
30 changes: 30 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"ASMNGS",
"aspera",
"authorships",
"Bacto",
"Bactopia",
"bakta",
"basepair",
Expand All @@ -34,12 +35,15 @@
"contigs",
"cpus",
"Csvtk",
"cyvcf",
"datahub",
"dockerbuild",
"Dragonflye",
"easyops",
"EDLB",
"emmtyper",
"endfor",
"Enterobase",
"EOSS",
"fasp",
"FASTA",
Expand All @@ -54,36 +58,55 @@
"ftype",
"gaeip",
"genbank",
"Geno",
"genotyphi",
"groovydoc",
"GTDB",
"Gubbins",
"Haemophilus",
"Haha",
"Hier",
"htslib",
"ICEID",
"influenzae",
"Kleborate",
"Kmer",
"kmers",
"Legsta",
"llms",
"Mashpit",
"metagenomic",
"MKDOCS",
"mlst",
"MRSA",
"mykrobe",
"Nanoplot",
"Nanopore",
"NATA",
"NCBI",
"Neisseria",
"nextflow",
"nfconfig",
"ngmaster",
"NIAID",
"noopener",
"nullarbor",
"OPENALEX",
"panaroo",
"pbptyper",
"pggb",
"phix",
"PHRED",
"Pigz",
"PMGA",
"pneumoniae",
"porechop",
"Português",
"PRJNA",
"Prokka",
"Pseudomonas",
"pymummer",
"pyrodigal",
"pysam",
"pytest",
"pyyaml",
Expand All @@ -95,19 +118,26 @@
"samplesheet",
"sccmec",
"Seqera",
"Sero",
"Seroba",
"serogrouping",
"shigatyper",
"Shovill",
"silico",
"sistr",
"slurm",
"smoove",
"spatyper",
"Ssuis",
"Staphopia",
"subsampling",
"subworkflow",
"subworkflows",
"tabix",
"taxid",
"trimq",
"typer",
"Typhi",
"Unicycler",
"wyphd"
]
Expand Down
26 changes: 25 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ BACTOPIA_REPO ?=

BACTOPIA_DEV_PYTHON ?= /home/rpetit3/.conda/envs/bactopia-dev/bin/python

.PHONY: generate parse copy-changelog generate-workflows generate-subworkflows generate-modules generate-citations generate-acknowledgements generate-enhancements parse-cli generate-cli generate-tools-index update-citations generate-llms-catalog llms-catalog clean-generated snapshot-list snapshot-add snapshot-deactivate snapshot-activate
.PHONY: generate parse copy-changelog generate-workflows generate-subworkflows generate-modules generate-citations generate-acknowledgements generate-enhancements parse-cli generate-cli generate-tools-index update-citations generate-llms-catalog llms-catalog clean-generated snapshot-list snapshot-add snapshot-deactivate snapshot-activate translate translate-sync translate-verify

generate: parse copy-changelog generate-workflows generate-subworkflows generate-modules generate-citations generate-acknowledgements generate-enhancements parse-cli generate-cli generate-tools-index

Expand Down Expand Up @@ -49,6 +49,30 @@ generate-llms-catalog:

llms-catalog: generate-llms-catalog

# --- Translation ---

LOCALES ?= pt

translate: translate-sync

translate-sync:
@for locale in $(LOCALES); do \
echo "Syncing translations for $$locale..."; \
$(BACTOPIA_DEV_PYTHON) -m bin.translate sync --locale $$locale; \
done

translate-full:
@for locale in $(LOCALES); do \
echo "Full retranslation for $$locale..."; \
$(BACTOPIA_DEV_PYTHON) -m bin.translate sync --locale $$locale --full; \
done

translate-verify:
@for locale in $(LOCALES); do \
echo "Verifying translations for $$locale..."; \
$(BACTOPIA_DEV_PYTHON) -m bin.translate verify --locale $$locale; \
done

clean-generated:
rm -rf data/bactopia.json data/cli.json bactopia-tools/*.mdx bactopia-pipelines/*.mdx developers/subworkflows/*.mdx developers/modules/*.mdx developers/cli/*.mdx impact/citations.md impact/acknowledgements.md impact/enhancements.md static/llms.txt static/catalog.json docs/changelog.md

Expand Down
Binary file removed bin/__pycache__/generator_utils.cpython-312.pyc
Binary file not shown.
Binary file removed bin/__pycache__/generator_utils.cpython-313.pyc
Binary file not shown.
Binary file removed bin/__pycache__/generator_utils.cpython-314.pyc
Binary file not shown.
5 changes: 5 additions & 0 deletions bin/translate/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Translation tooling for Bactopia documentation.
#
# Adapted from the Nextflow Training project's translation system:
# https://github.com/nextflow-io/training/tree/master/_scripts/translate
# Original prompts and architecture by the Nextflow team.
4 changes: 4 additions & 0 deletions bin/translate/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Allow running as: python -m bin.translate"""
from .cli import main

main()
123 changes: 123 additions & 0 deletions bin/translate/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Claude API integration for translation.

Adapted from the Nextflow Training project's translation system:
https://github.com/nextflow-io/training/tree/master/_scripts/translate
"""

import asyncio
import random
from dataclasses import dataclass

import anthropic

from .config import (
BASE_DELAY,
MAX_CONTINUATIONS,
MAX_RETRIES,
MAX_TOKENS,
MODEL,
REQUEST_TIMEOUT,
TranslationError,
validate_api_key,
)


@dataclass
class TranslationResult:
text: str
model: str
input_tokens: int
output_tokens: int
stop_reason: str
continuations: int


def _extract_text(response: anthropic.types.Message) -> str:
"""Extract text from all text content blocks in a response."""
return "".join(
block.text for block in response.content if hasattr(block, "text")
)


async def _call_once(
client: anthropic.AsyncAnthropic,
system: str,
messages: list[dict],
label: str = "",
model: str = MODEL,
) -> anthropic.types.Message:
"""Single API call with jittered exponential backoff on transient errors."""
for attempt in range(MAX_RETRIES):
try:
return await client.messages.create(
model=model,
max_tokens=MAX_TOKENS,
system=[{
"type": "text",
"text": system,
"cache_control": {"type": "ephemeral"},
}],
messages=messages,
timeout=REQUEST_TIMEOUT,
)
except (
anthropic.APIConnectionError,
anthropic.APITimeoutError,
anthropic.RateLimitError,
anthropic.InternalServerError,
) as e:
if attempt == MAX_RETRIES - 1:
raise TranslationError(f"[{label}] API failed after {MAX_RETRIES} retries: {e}")
delay = BASE_DELAY * (2**attempt) + random.uniform(0, 1)
await asyncio.sleep(delay)

raise TranslationError(f"[{label}] Exhausted retries")


async def call_claude_async(
prompt: str,
system: str,
client: anthropic.AsyncAnthropic | None = None,
label: str = "",
model: str = MODEL,
) -> TranslationResult:
"""Send a translation prompt to Claude and return the result.

Handles automatic continuation when responses hit the token limit.
"""
own_client = client is None
if own_client:
client = anthropic.AsyncAnthropic(api_key=validate_api_key())

try:
messages = [{"role": "user", "content": prompt}]
total_input = 0
total_output = 0
full_text = ""
continuations = 0

response = await _call_once(client, system, messages, label, model)
full_text += _extract_text(response)
total_input += response.usage.input_tokens
total_output += response.usage.output_tokens

while response.stop_reason == "max_tokens" and continuations < MAX_CONTINUATIONS:
continuations += 1
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": "Continue exactly where you left off."})
response = await _call_once(client, system, messages, label, model)
full_text += _extract_text(response)
total_input += response.usage.input_tokens
total_output += response.usage.output_tokens

return TranslationResult(
text=full_text,
model=response.model,
input_tokens=total_input,
output_tokens=total_output,
stop_reason=response.stop_reason,
continuations=continuations,
)
finally:
if own_client:
await client.close()
Loading