No need to change order of imports (bad conflict resolution)

Remove duplicate import
Onyl clear logcontext if spans should finish
2025-10-02 11:33:34 -05:00 · 2025-10-02 11:32:41 -05:00 · 2025-10-02 11:31:29 -05:00 · 2025-10-02 11:30:47 -05:00
2 changed files with 168 additions and 3 deletions
--- a/synapse/logging/scopecontextmanager.py
+++ b/synapse/logging/scopecontextmanager.py
@@ -155,7 +155,9 @@ class _LogContextScope(Scope):
        if self._finish_on_close:
            self.span.finish()

-        self.logcontext.scope = None
+            # The tracing scope/span should stick to the logcontext if it's active
+            # so we only clear if we're finishing the span.
+            self.logcontext.scope = None

        if self._enter_logcontext:
            self.logcontext.__exit__(None, None, None)
--- a/tests/logging/test_opentracing.py
+++ b/tests/logging/test_opentracing.py
@@ -19,13 +19,15 @@
 #
 #

-from typing import Awaitable, Optional, cast
+from typing import Awaitable, Dict, Optional, cast

 from twisted.internet import defer
 from twisted.internet.testing import MemoryReactorClock

 from synapse.logging.context import (
    LoggingContext,
+    PreserveLoggingContext,
+    current_context,
    make_deferred_yieldable,
    run_in_background,
 )
@@ -45,7 +47,6 @@ try:
 except ImportError:
    jaeger_client = None  # type: ignore

-
 try:
    import opentracing

@@ -54,6 +55,7 @@ except ImportError:
    opentracing = None  # type: ignore
    LogContextScopeManager = None  # type: ignore

+
 import logging

 from tests.unittest import TestCase
@@ -224,6 +226,167 @@ class LogContextScopeManagerTestCase(TestCase):
            [scopes[1].span, scopes[2].span, scopes[0].span],
        )

+    def test_run_in_background_active_scope_still_available(self) -> None:
+        """
+        Test that tasks running via `run_in_background` still have access to the
+        active tracing scope.
+
+        This is a regression test for a previous Synapse issue where the tracing scope
+        would `__exit__` and close before the `run_in_background` task completed and our
+        own previous custom `_LogContextScope.close(...)` would clear
+        `LoggingContext.scope` preventing further tracing spans from having the correct
+        parent.
+        """
+        reactor, clock = get_clock()
+
+        scope_map: Dict[str, opentracing.Scope] = {}
+
+        callback_finished = False
+
+        async def async_task() -> None:
+            nonlocal callback_finished
+            try:
+                logger.info("asdf async_task1 - %s", current_context().scope)
+                root_scope = scope_map["root"]
+                root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
+
+                self.assertEqual(
+                    self._tracer.active_span,
+                    root_scope.span,
+                    "expected to inherit the root tracing scope from where this was run",
+                )
+
+                # Return control back to the reactor thread and wait an arbitrary amount
+                await clock.sleep(4)
+
+                logger.info("asdf async_task2 - %s", current_context().scope)
+
+                # This is a key part of what we're testing! In a previous version of
+                # Synapse, we would lose the active span at this point.
+                self.assertEqual(
+                    self._tracer.active_span,
+                    root_scope.span,
+                    "expected to still have a root tracing scope/span active",
+                )
+
+                # For complete-ness sake, let's also trace more sub-tasks here and assert
+                # they have the correct span parents as well (root)
+
+                # Start tracing some other sub-task.
+                #
+                # This is a key part of what we're testing! In a previous version of
+                # Synapse, it would have the incorrect span parents.
+                scope = start_active_span(
+                    "task1",
+                    tracer=self._tracer,
+                )
+                scope_map["task1"] = scope
+
+                # Ensure the span parent is pointing to the root scope
+                context = cast(jaeger_client.SpanContext, scope.span.context)
+                self.assertEqual(
+                    context.parent_id,
+                    root_context.span_id,
+                    "expected task1 parent to be the root span",
+                )
+
+                # Ensure that the active span is our new sub-task now
+                self.assertEqual(self._tracer.active_span, scope.span)
+                # Return control back to the reactor thread and wait an arbitrary amount
+                await clock.sleep(4)
+                # We should still see the active span as the scope wasn't closed yet
+                self.assertEqual(self._tracer.active_span, scope.span)
+                # Clock the scope using the context manager lifecycle (`scope.close()`)
+                with scope:
+                    pass
+            finally:
+                # When exceptions happen, we still want to mark the callback as finished
+                # so that the test can complete and we see the underlying error.
+                callback_finished = True
+
+        async def root() -> None:
+            with start_active_span(
+                "root span",
+                tracer=self._tracer,
+                # We will close this off later. We're basically just mimicking the same
+                # pattern for how we handle requests. We pass the span off to the
+                # request for it to finish.
+                finish_on_close=False,
+            ) as root_scope:
+                scope_map["root"] = root_scope
+                self.assertEqual(self._tracer.active_span, root_scope.span)
+
+                logger.info("asdf root1 - %s", current_context().scope)
+
+                # Fire-and-forget a task
+                #
+                # XXX: The root scope context manager will `__exit__` before this task
+                # completes.
+                run_in_background(async_task)
+
+                logger.info("asdf root2 - %s", current_context().scope)
+
+                # Because we used `run_in_background`, the active span should still be
+                # the root.
+                self.assertEqual(self._tracer.active_span, root_scope.span)
+
+            logger.info("asdf root3 - %s", current_context().scope)
+
+            # We shouldn't see any active spans outside of the scope
+            self.assertIsNone(self._tracer.active_span)
+
+        with LoggingContext(name="root logcontext", server_name="test_server"):
+            # Start the test off
+            d_root = defer.ensureDeferred(root())
+
+            # Pump the reactor until the tasks complete
+            #
+            # We're manually pumping the reactor (and causing any pending callbacks to
+            # be called) so we need to be in the sentinel logcontext to avoid leaking
+            # our current logcontext into the reactor (which would then get picked up
+            # and associated with the next thing the reactor does). `with
+            # PreserveLoggingContext()` will reset the logcontext to the sentinel while
+            # we're pumping the reactor in the block and return us back to our current
+            # logcontext after the block.
+            with PreserveLoggingContext():
+                reactor.pump((2,) * 8)
+                self.successResultOf(d_root)
+
+            self.assertTrue(
+                callback_finished,
+                "Callback never finished which means the test probably didn't wait long enough",
+            )
+
+            # After we see all of the tasks are done (like a request when it
+            # `_finished_processing`), let's finish our root span
+            scope_map["root"].span.finish()
+
+            # Sanity check again: We shouldn't see any active spans leftover in this
+            # this context.
+            self.assertIsNone(self._tracer.active_span)
+
+        # The spans should be reported in order of their finishing: task 1, task 2,
+        # root.
+        #
+        # We use `assertIncludes` just as an easier way to see if items are missing or
+        # added. We assert the order just below
+        self.assertIncludes(
+            set(self._reporter.get_spans()),
+            {
+                scope_map["task1"].span,
+                scope_map["root"].span,
+            },
+            exact=True,
+        )
+        # This is where we actually assert the correct order
+        self.assertEqual(
+            self._reporter.get_spans(),
+            [
+                scope_map["task1"].span,
+                scope_map["root"].span,
+            ],
+        )
+
    def test_trace_decorator_sync(self) -> None:
        """
        Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`
Author	SHA1	Message	Date
Eric Eastwood	52d88ac369	No need to change order of imports (bad conflict resolution)	2025-10-02 11:33:34 -05:00
Eric Eastwood	c92f905267	Remove duplicate import	2025-10-02 11:32:41 -05:00
Eric Eastwood	4e06a504c4	Onyl clear logcontext if spans should finish	2025-10-02 11:31:29 -05:00
Eric Eastwood	70d0fd310a	Fix logcontext problems in `test_run_in_background_active_scope_still_available` This test is still expected to fail with our current `LogContextScopeManager` implementation but there were some flaws in how we were handling the logcontext before (like pumping the reactor in a non-sentinel logcontext) This test is originally from https://github.com/element-hq/synapse/pull/18849 spawning from https://github.com/element-hq/synapse/pull/18804#discussion_r2268254424	2025-10-02 11:30:47 -05:00