Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions osism/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def run_ansible_in_environment(
publish=True,
locking=False,
auto_release_time=3600,
ssh_retries=3,
):
result = ""
extracted_hosts = set() # Local set for host deduplication
Expand Down Expand Up @@ -185,6 +186,19 @@ def run_ansible_in_environment(
logger.debug(f"Using per-task SSH ControlPath directory: {ssh_control_dir}")
env["ANSIBLE_SSH_CONTROL_PATH_DIR"] = ssh_control_dir

# Because the per-task ControlPath directory above is unique per run and
# removed afterwards, there is no persistent ControlMaster socket reuse
# across runs anymore. Every run therefore cold-establishes the SSH
# connection to every host. With Ansible's default of ANSIBLE_SSH_RETRIES=0
# a single transient failure during this cold connection setup (e.g. a
# ControlMaster spawn race during the first task's connection burst)
# immediately fails the host as UNREACHABLE with "Permission denied
# (publickey)", even though an immediate re-run succeeds. Retry the
# connection a few times so these transient first-contact glitches no
# longer abort the whole run.
if "ANSIBLE_SSH_RETRIES" not in env:
env["ANSIBLE_SSH_RETRIES"] = str(ssh_retries)

try:
# handle sub environments
if "." in environment:
Expand Down