Compare commits

...

4 Commits

Author SHA1 Message Date
Eric Eastwood
52d88ac369 No need to change order of imports (bad conflict resolution) 2025-10-02 11:33:34 -05:00
Eric Eastwood
c92f905267 Remove duplicate import 2025-10-02 11:32:41 -05:00
Eric Eastwood
4e06a504c4 Onyl clear logcontext if spans should finish 2025-10-02 11:31:29 -05:00
Eric Eastwood
70d0fd310a Fix logcontext problems in test_run_in_background_active_scope_still_available
This test is still expected to fail with our current `LogContextScopeManager`
implementation but there were some flaws in how we were handling
the logcontext before (like pumping the reactor in a non-sentinel logcontext)

This test is originally from
https://github.com/element-hq/synapse/pull/18849
spawning from https://github.com/element-hq/synapse/pull/18804#discussion_r2268254424
2025-10-02 11:30:47 -05:00
2 changed files with 168 additions and 3 deletions

View File

@@ -155,7 +155,9 @@ class _LogContextScope(Scope):
if self._finish_on_close:
self.span.finish()
self.logcontext.scope = None
# The tracing scope/span should stick to the logcontext if it's active
# so we only clear if we're finishing the span.
self.logcontext.scope = None
if self._enter_logcontext:
self.logcontext.__exit__(None, None, None)

View File

@@ -19,13 +19,15 @@
#
#
from typing import Awaitable, Optional, cast
from typing import Awaitable, Dict, Optional, cast
from twisted.internet import defer
from twisted.internet.testing import MemoryReactorClock
from synapse.logging.context import (
LoggingContext,
PreserveLoggingContext,
current_context,
make_deferred_yieldable,
run_in_background,
)
@@ -45,7 +47,6 @@ try:
except ImportError:
jaeger_client = None # type: ignore
try:
import opentracing
@@ -54,6 +55,7 @@ except ImportError:
opentracing = None # type: ignore
LogContextScopeManager = None # type: ignore
import logging
from tests.unittest import TestCase
@@ -224,6 +226,167 @@ class LogContextScopeManagerTestCase(TestCase):
[scopes[1].span, scopes[2].span, scopes[0].span],
)
def test_run_in_background_active_scope_still_available(self) -> None:
"""
Test that tasks running via `run_in_background` still have access to the
active tracing scope.
This is a regression test for a previous Synapse issue where the tracing scope
would `__exit__` and close before the `run_in_background` task completed and our
own previous custom `_LogContextScope.close(...)` would clear
`LoggingContext.scope` preventing further tracing spans from having the correct
parent.
"""
reactor, clock = get_clock()
scope_map: Dict[str, opentracing.Scope] = {}
callback_finished = False
async def async_task() -> None:
nonlocal callback_finished
try:
logger.info("asdf async_task1 - %s", current_context().scope)
root_scope = scope_map["root"]
root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
self.assertEqual(
self._tracer.active_span,
root_scope.span,
"expected to inherit the root tracing scope from where this was run",
)
# Return control back to the reactor thread and wait an arbitrary amount
await clock.sleep(4)
logger.info("asdf async_task2 - %s", current_context().scope)
# This is a key part of what we're testing! In a previous version of
# Synapse, we would lose the active span at this point.
self.assertEqual(
self._tracer.active_span,
root_scope.span,
"expected to still have a root tracing scope/span active",
)
# For complete-ness sake, let's also trace more sub-tasks here and assert
# they have the correct span parents as well (root)
# Start tracing some other sub-task.
#
# This is a key part of what we're testing! In a previous version of
# Synapse, it would have the incorrect span parents.
scope = start_active_span(
"task1",
tracer=self._tracer,
)
scope_map["task1"] = scope
# Ensure the span parent is pointing to the root scope
context = cast(jaeger_client.SpanContext, scope.span.context)
self.assertEqual(
context.parent_id,
root_context.span_id,
"expected task1 parent to be the root span",
)
# Ensure that the active span is our new sub-task now
self.assertEqual(self._tracer.active_span, scope.span)
# Return control back to the reactor thread and wait an arbitrary amount
await clock.sleep(4)
# We should still see the active span as the scope wasn't closed yet
self.assertEqual(self._tracer.active_span, scope.span)
# Clock the scope using the context manager lifecycle (`scope.close()`)
with scope:
pass
finally:
# When exceptions happen, we still want to mark the callback as finished
# so that the test can complete and we see the underlying error.
callback_finished = True
async def root() -> None:
with start_active_span(
"root span",
tracer=self._tracer,
# We will close this off later. We're basically just mimicking the same
# pattern for how we handle requests. We pass the span off to the
# request for it to finish.
finish_on_close=False,
) as root_scope:
scope_map["root"] = root_scope
self.assertEqual(self._tracer.active_span, root_scope.span)
logger.info("asdf root1 - %s", current_context().scope)
# Fire-and-forget a task
#
# XXX: The root scope context manager will `__exit__` before this task
# completes.
run_in_background(async_task)
logger.info("asdf root2 - %s", current_context().scope)
# Because we used `run_in_background`, the active span should still be
# the root.
self.assertEqual(self._tracer.active_span, root_scope.span)
logger.info("asdf root3 - %s", current_context().scope)
# We shouldn't see any active spans outside of the scope
self.assertIsNone(self._tracer.active_span)
with LoggingContext(name="root logcontext", server_name="test_server"):
# Start the test off
d_root = defer.ensureDeferred(root())
# Pump the reactor until the tasks complete
#
# We're manually pumping the reactor (and causing any pending callbacks to
# be called) so we need to be in the sentinel logcontext to avoid leaking
# our current logcontext into the reactor (which would then get picked up
# and associated with the next thing the reactor does). `with
# PreserveLoggingContext()` will reset the logcontext to the sentinel while
# we're pumping the reactor in the block and return us back to our current
# logcontext after the block.
with PreserveLoggingContext():
reactor.pump((2,) * 8)
self.successResultOf(d_root)
self.assertTrue(
callback_finished,
"Callback never finished which means the test probably didn't wait long enough",
)
# After we see all of the tasks are done (like a request when it
# `_finished_processing`), let's finish our root span
scope_map["root"].span.finish()
# Sanity check again: We shouldn't see any active spans leftover in this
# this context.
self.assertIsNone(self._tracer.active_span)
# The spans should be reported in order of their finishing: task 1, task 2,
# root.
#
# We use `assertIncludes` just as an easier way to see if items are missing or
# added. We assert the order just below
self.assertIncludes(
set(self._reporter.get_spans()),
{
scope_map["task1"].span,
scope_map["root"].span,
},
exact=True,
)
# This is where we actually assert the correct order
self.assertEqual(
self._reporter.get_spans(),
[
scope_map["task1"].span,
scope_map["root"].span,
],
)
def test_trace_decorator_sync(self) -> None:
"""
Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`