From 9a3b84c6858b16b2a3aa12111cd37ba30c220a1c Mon Sep 17 00:00:00 2001 From: Saurabh Singh <1623701+saurabh500@users.noreply.github.com> Date: Wed, 27 May 2026 19:14:06 +0000 Subject: [PATCH 1/2] FIX: Release GIL around teardown SQLFreeHandle/SQLFreeStmt (#565) The 1.7.x fix released the GIL around SQLDriverConnect, SQLSetConnectAttr, SQLEndTran and SQLDisconnect, which fixed the plain-connect deadlock through in-process Python TCP forwarders (paramiko + sshtunnel). However, when a parametrized SELECT leaves an open server-side cursor, the connection teardown cascade still blocks: SqlHandle::free() calls SQLFreeHandle on the STMT handle, which sends a SQL_CLOSE-equivalent network packet to the server. With the GIL held, the forwarder thread cannot run sock.sendall and the call deadlocks forever. This change releases the GIL around the three remaining teardown calls that may perform network I/O: * SqlHandle::free() -> SQLFreeHandle(STMT/DBC) * SqlHandle::close_cursor() -> SQLFreeStmt(SQL_CLOSE) * SQLFreeHandle_wrap() -> SQLFreeHandle Each release is gated on PyGILState_Check() AND !is_python_finalizing(), because these paths can also be reached from interpreter-shutdown cleanup where gil_scoped_release would crash with Fatal Python error: PyEval_SaveThread: ... the GIL is released Adds a second regression test (test_param_query_close_through_python_tcp_forwarder_does_not_deadlock) that reproduces the latest @jschuba scenario via an in-process TCP forwarder + parametrized SELECTs + conn.close() under a hard watchdog. Verified the new test hangs (watchdog fires at 30s) without the C++ change and passes (completes in <0.5s) with it. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- mssql_python/pybind/ddbc_bindings.cpp | 40 ++++++- tests/test_023_ssh_tunnel_gil_release.py | 128 +++++++++++++++++++---- 2 files changed, 141 insertions(+), 27 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 5ed7820f..e5a255d2 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -1384,8 +1384,21 @@ void SqlHandle::free() { return; } - // Handle is valid and not implicitly freed, proceed with normal freeing - SQLFreeHandle_ptr(_type, _handle); + // Handle is valid and not implicitly freed, proceed with normal freeing. + // Release the GIL during the blocking ODBC call (SQLFreeHandle on a STMT + // with an open server-side cursor, or on a DBC, performs network I/O). + // This is critical when the connection is reached through an in-process + // Python TCP forwarder (e.g. paramiko + sshtunnel) - the forwarder + // thread needs the GIL to push bytes, so holding it here deadlocks + // (issue #565). Only release the GIL if it is actually held AND the + // interpreter is not finalizing - gil_scoped_release is unsafe during + // shutdown even if PyGILState_Check() reports the GIL as held. + if (!pythonShuttingDown && PyGILState_Check()) { + py::gil_scoped_release release; + SQLFreeHandle_ptr(_type, _handle); + } else { + SQLFreeHandle_ptr(_type, _handle); + } _handle = nullptr; } } @@ -1400,7 +1413,17 @@ void SqlHandle::close_cursor() { if (!SQLFreeStmt_ptr) { ThrowStdException("SQLFreeStmt function not loaded"); } - SQLRETURN ret = SQLFreeStmt_ptr(_handle, SQL_CLOSE); + // Release the GIL during the blocking SQLFreeStmt(SQL_CLOSE) network + // round-trip; see issue #565 (in-process forwarder deadlock). + // Skip GIL release when the GIL isn't held or the interpreter is + // finalizing - gil_scoped_release is unsafe in shutdown. + SQLRETURN ret; + if (!is_python_finalizing() && PyGILState_Check()) { + py::gil_scoped_release release; + ret = SQLFreeStmt_ptr(_handle, SQL_CLOSE); + } else { + ret = SQLFreeStmt_ptr(_handle, SQL_CLOSE); + } if (ret != SQL_SUCCESS && ret != SQL_SUCCESS_WITH_INFO) { ThrowStdException("SQLFreeStmt(SQL_CLOSE) failed"); } @@ -5695,7 +5718,16 @@ SQLRETURN SQLFreeHandle_wrap(SQLSMALLINT HandleType, SqlHandlePtr Handle) { DriverLoader::getInstance().loadDriver(); // Load the driver } - SQLRETURN ret = SQLFreeHandle_ptr(HandleType, Handle->get()); + // Release the GIL during the blocking SQLFreeHandle network round-trip + // (see issue #565 - in-process Python TCP forwarder deadlock). + // Skip GIL release in shutdown paths where it would crash. + SQLRETURN ret; + if (!is_python_finalizing() && PyGILState_Check()) { + py::gil_scoped_release release; + ret = SQLFreeHandle_ptr(HandleType, Handle->get()); + } else { + ret = SQLFreeHandle_ptr(HandleType, Handle->get()); + } if (!SQL_SUCCEEDED(ret)) { LOG("SQLFreeHandle_wrap: SQLFreeHandle failed with error code - %d", ret); return ret; diff --git a/tests/test_023_ssh_tunnel_gil_release.py b/tests/test_023_ssh_tunnel_gil_release.py index 686570af..a274ed4e 100644 --- a/tests/test_023_ssh_tunnel_gil_release.py +++ b/tests/test_023_ssh_tunnel_gil_release.py @@ -187,22 +187,60 @@ def _run_forwarded_connect_subprocess() -> int: return 0 +def _run_forwarded_param_query_subprocess() -> int: + """ + Subprocess body for the parametrized-query teardown regression from + issue #565 (latest comment by @jschuba). + + Executes parametrized SELECTs through the in-process forwarder, then + closes the connection. Before the GIL-release fix on the teardown + handle-freeing path, ``conn.close()`` blocks indefinitely inside + ``SQLFreeHandle`` (the server-side cursor opened by the parametrized + SELECT triggers a network round-trip during STMT-handle teardown, + while the GIL is held - starving the in-process forwarder thread). + """ + import mssql_python + from mssql_python import connect + + base = os.environ["DB_CONNECTION_STRING"] + target = _parse_server(base) + if target is None: + print("ERR: could not parse Server=... clause", file=sys.stderr) + return 2 + + fwd_host, fwd_port = _start_forwarder(target) + mssql_python.pooling(enabled=False) + tunneled = _replace_server(base, fwd_host, fwd_port) + + conn = connect(tunneled) + + # Use Connection.execute (same flow as the issue report) so cursors are + # not explicitly closed by the user - the deadlock occurs when conn.close() + # cascades through the still-open server-side cursors. + res = conn.execute("SELECT 1 as result WHERE 1=?", (1,)) + row_qmark = res.fetchall() + + res = conn.execute("SELECT 1 as result WHERE 1=%(parameter)s", {"parameter": 1}) + row_named = res.fetchall() + + # This is the call that hangs in the unfixed binary. + conn.close() + + print(f"OK qmark={row_qmark} named={row_named}", flush=True) + return 0 + + # --------------------------------------------------------------------------- # The actual pytest test. # --------------------------------------------------------------------------- -def test_connect_through_python_tcp_forwarder_does_not_deadlock(): - """ - Regression test for issue #565. - - Routes ``mssql_python.connect()`` through a pure-Python TCP forwarder - in a subprocess and applies a hard watchdog. Before the - connection-attribute GIL-release fix, the subprocess hangs forever - inside ``SQLSetConnectAttr(SQL_ATTR_AUTOCOMMIT, OFF)`` (called by - ``Connection.setautocommit`` immediately after ``SQLDriverConnect`` - returns); with the fix it completes in well under a second. - """ +def _run_subprocess_scenario( + scenario_env_value: str, + expected_marker: bytes, + failure_message: str, +) -> None: + """Shared helper: spawn this file as a subprocess and apply the watchdog.""" base_conn_str = os.getenv("DB_CONNECTION_STRING") if not base_conn_str: pytest.skip("DB_CONNECTION_STRING environment variable not set") @@ -213,10 +251,8 @@ def test_connect_through_python_tcp_forwarder_does_not_deadlock(): env = os.environ.copy() env["DB_CONNECTION_STRING"] = base_conn_str env["PYTHONUNBUFFERED"] = "1" - # Propagate sys.path so the subprocess can find mssql_python even when - # the package is only available via pytest's path manipulation or an - # editable install rooted in the workspace. env["PYTHONPATH"] = os.pathsep.join(sys.path) + env["MSSQL_PYTHON_TEST_565_SCENARIO"] = scenario_env_value proc = subprocess.Popen( [sys.executable, os.path.abspath(__file__)], @@ -229,26 +265,72 @@ def test_connect_through_python_tcp_forwarder_does_not_deadlock(): except subprocess.TimeoutExpired: proc.kill() proc.communicate() - pytest.fail( - f"connect()+SELECT through in-process Python TCP forwarder did " - f"not complete within {WATCHDOG_SECONDS}s — this is the issue " - f"#565 deadlock (GIL held across SQLSetConnectAttr_AUTOCOMMIT)." - ) + pytest.fail(failure_message) assert proc.returncode == 0, ( f"Subprocess exited with {proc.returncode}.\n" f"stdout:\n{out.decode(errors='replace')}\n" f"stderr:\n{err.decode(errors='replace')}" ) - assert b"OK (1,)" in out, ( + assert expected_marker in out, ( f"Unexpected subprocess output.\n" f"stdout:\n{out.decode(errors='replace')}\n" f"stderr:\n{err.decode(errors='replace')}" ) -# Subprocess entry point: when this file is run as a script (by the test -# above), execute the forwarded-connect body. Pytest collection ignores -# this block. +def test_connect_through_python_tcp_forwarder_does_not_deadlock(): + """ + Regression test for issue #565. + + Routes ``mssql_python.connect()`` through a pure-Python TCP forwarder + in a subprocess and applies a hard watchdog. Before the + connection-attribute GIL-release fix, the subprocess hangs forever + inside ``SQLSetConnectAttr(SQL_ATTR_AUTOCOMMIT, OFF)`` (called by + ``Connection.setautocommit`` immediately after ``SQLDriverConnect`` + returns); with the fix it completes in well under a second. + """ + _run_subprocess_scenario( + scenario_env_value="connect", + expected_marker=b"OK (1,)", + failure_message=( + f"connect()+SELECT through in-process Python TCP forwarder did " + f"not complete within {WATCHDOG_SECONDS}s — this is the issue " + f"#565 deadlock (GIL held across SQLSetConnectAttr_AUTOCOMMIT)." + ), + ) + + +def test_param_query_close_through_python_tcp_forwarder_does_not_deadlock(): + """ + Regression test for the parametrized-query teardown variant of issue + #565 (latest comment by @jschuba on the reopened issue). + + Executes parametrized SELECTs through the in-process forwarder and + then calls ``conn.close()``. Before the fix that releases the GIL + around ``SQLFreeHandle`` / ``SQLFreeStmt`` in the handle-teardown + path, ``conn.close()`` deadlocks inside ``SQLFreeHandle(SQL_HANDLE_STMT)`` + because the server-side cursor opened by the parametrized SELECT + triggers a network round-trip during handle teardown while the GIL + is held — starving the in-process forwarder thread. + """ + _run_subprocess_scenario( + scenario_env_value="param_close", + expected_marker=b"OK qmark=[(1,)] named=[(1,)]", + failure_message=( + f"Parametrized-query teardown through in-process Python TCP " + f"forwarder did not complete within {WATCHDOG_SECONDS}s — this " + f"is the issue #565 deadlock variant (GIL held across " + f"SQLFreeHandle/SQLFreeStmt during cursor/connection close)." + ), + ) + + +# Subprocess entry point: when this file is run as a script (by the tests +# above), execute the requested scenario. Pytest collection ignores this +# block. if __name__ == "__main__": + scenario = os.environ.get("MSSQL_PYTHON_TEST_565_SCENARIO", "connect") + if scenario == "param_close": + sys.exit(_run_forwarded_param_query_subprocess()) sys.exit(_run_forwarded_connect_subprocess()) From 898bd29a0edeb2add2b76d75eb7a9ff9d0383f67 Mon Sep 17 00:00:00 2001 From: Saurabh Singh <1623701+saurabh500@users.noreply.github.com> Date: Wed, 27 May 2026 19:53:55 +0000 Subject: [PATCH 2/2] FIX: Release GIL around SQLDescribeParam network round-trip (#565) Audit follow-up to the previous teardown fix. Identified one additional ODBC call site that performs a server round-trip while holding the GIL, which deadlocks when the connection is routed through an in-process Python TCP forwarder (the same #565-family pattern). Fixed call site: * BindParameters() -> SQLDescribeParam Issued by the driver as sp_describe_undeclared_parameters when a parameter has SQL_UNKNOWN_TYPE - hit on every parametrized query that includes a None value. Earlier drafts of this commit also wrapped Connection::isAlive(), Connection::getInfo() and Connection::getAutocommit(), but those were removed after empirical verification: SQL_ATTR_CONNECTION_DEAD and SQL_ATTR_AUTOCOMMIT GETs are driver-cached client-side state per the ODBC spec (MSDN explicitly states SQL_ATTR_CONNECTION_DEAD 'does not query the data source for activity'), and SQLGetInfo for the standard info types (SQL_DBMS_NAME, SQL_DATABASE_NAME, ...) returns data the driver cached at login time. None of these are network calls. Reached only from a Python-callable wrapper with the GIL held, so a plain py::gil_scoped_release is sufficient (no is_python_finalizing guard needed - that's only required for the destructor cascade fixed in the prior commit). Adds test_param_describe_through_python_tcp_forwarder_does_not_deadlock which runs a parametrized SELECT with a None argument through the in-process Python TCP forwarder under a 30s watchdog. Verified: the test hangs (watchdog fires at 30s) without the SQLDescribeParam GIL release and passes (<1s) with it. Full pytest suite: 1767 passed, 0 failed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- mssql_python/pybind/ddbc_bindings.cpp | 14 +++- tests/test_023_ssh_tunnel_gil_release.py | 88 ++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index e5a255d2..12bab21c 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -635,9 +635,17 @@ SQLRETURN BindParameters(SQLHANDLE hStmt, const py::list& params, SQLULEN describedSize; SQLSMALLINT describedDigits; SQLSMALLINT nullable; - RETCODE rc = SQLDescribeParam_ptr( - hStmt, static_cast(paramIndex + 1), &describedType, - &describedSize, &describedDigits, &nullable); + // SQLDescribeParam may issue a server round-trip + // (sp_describe_undeclared_parameters). Release the GIL + // around it so in-process Python TCP forwarders can run + // (issue #565 family). + RETCODE rc; + { + py::gil_scoped_release release; + rc = SQLDescribeParam_ptr( + hStmt, static_cast(paramIndex + 1), &describedType, + &describedSize, &describedDigits, &nullable); + } if (!SQL_SUCCEEDED(rc)) { // SQLDescribeParam can fail for generic SELECT statements where // no table column is referenced. Fall back to SQL_VARCHAR as a safe diff --git a/tests/test_023_ssh_tunnel_gil_release.py b/tests/test_023_ssh_tunnel_gil_release.py index a274ed4e..8031465e 100644 --- a/tests/test_023_ssh_tunnel_gil_release.py +++ b/tests/test_023_ssh_tunnel_gil_release.py @@ -230,6 +230,65 @@ def _run_forwarded_param_query_subprocess() -> int: return 0 +def _run_forwarded_introspection_subprocess() -> int: + """ + Subprocess body covering the parametrized-query introspection path + (issue #565 family). + + The only actual network round-trip here is ``SQLDescribeParam`` + (``sp_describe_undeclared_parameters``), triggered by executing a + parametrized statement with a ``None`` argument. Before the fix this + call ran with the GIL held and would deadlock when routed through an + in-process Python TCP forwarder. + + ``Connection.getinfo`` and ``conn.autocommit`` are included as local + sanity probes -- per the ODBC spec and MS ODBC driver behavior these + return data cached at login time / driver-side state and do *not* + issue server round-trips, so they should complete instantly with or + without the fix. + """ + import mssql_python + from mssql_python import connect + + base = os.environ["DB_CONNECTION_STRING"] + target = _parse_server(base) + if target is None: + print("ERR: could not parse Server=... clause", file=sys.stderr) + return 2 + + fwd_host, fwd_port = _start_forwarder(target) + mssql_python.pooling(enabled=False) + tunneled = _replace_server(base, fwd_host, fwd_port) + + conn = connect(tunneled) + + # (1) Local sanity probe: SQLGetInfo. Per ODBC spec / MS driver, info + # types like SQL_DBMS_NAME are cached at login time and don't issue a + # round-trip, so this should complete instantly even without any fix. + if hasattr(conn, "getinfo"): + try: + _ = conn.getinfo(17) # SQL_DBMS_NAME + print("[child] getinfo(SQL_DBMS_NAME) OK", flush=True) + except Exception as e: + print(f"[child] getinfo skipped: {type(e).__name__}: {e}", flush=True) + + # (2) The real deadlock path: parametrized execute with a None argument + # forces the driver to issue sp_describe_undeclared_parameters via + # SQLDescribeParam, which is a server round-trip. Without the GIL + # release on SQLDescribeParam this hangs through the in-process forwarder. + res = conn.execute("SELECT ISNULL(?, 42) as result", (None,)) + row_none = res.fetchall() + + # (3) Local sanity probe: SQL_ATTR_AUTOCOMMIT is client-cached and the + # getter does not round-trip; included to lock in current behavior. + _ = conn.autocommit + + conn.close() + + print(f"OK introspection none_param={row_none}", flush=True) + return 0 + + # --------------------------------------------------------------------------- # The actual pytest test. # --------------------------------------------------------------------------- @@ -326,6 +385,33 @@ def test_param_query_close_through_python_tcp_forwarder_does_not_deadlock(): ) +def test_param_describe_through_python_tcp_forwarder_does_not_deadlock(): + """ + Regression test for the parametrized-query introspection path + (issue #565 family). + + Executing a parametrized statement with a ``None`` argument forces the + MS ODBC driver to issue ``sp_describe_undeclared_parameters`` via + ``SQLDescribeParam``. Before the fix this call was made with the GIL + held and would deadlock when routed through an in-process Python TCP + forwarder. + + The subprocess also exercises ``Connection.getinfo`` and + ``conn.autocommit`` as local sanity probes (both are driver-cached + and should never round-trip). + """ + _run_subprocess_scenario( + scenario_env_value="introspection", + expected_marker=b"OK introspection", + failure_message=( + f"Parametrized-with-None path through in-process Python TCP " + f"forwarder did not complete within {WATCHDOG_SECONDS}s — " + f"this is an issue #565-family deadlock (GIL held across " + f"SQLDescribeParam / sp_describe_undeclared_parameters)." + ), + ) + + # Subprocess entry point: when this file is run as a script (by the tests # above), execute the requested scenario. Pytest collection ignores this # block. @@ -333,4 +419,6 @@ def test_param_query_close_through_python_tcp_forwarder_does_not_deadlock(): scenario = os.environ.get("MSSQL_PYTHON_TEST_565_SCENARIO", "connect") if scenario == "param_close": sys.exit(_run_forwarded_param_query_subprocess()) + if scenario == "introspection": + sys.exit(_run_forwarded_introspection_subprocess()) sys.exit(_run_forwarded_connect_subprocess())