Fix logcontext problems in test_run_in_background_active_scope_still_available
This test is still expected to fail with our current `LogContextScopeManager` implementation but there were some flaws in how we were handling the logcontext before (like pumping the reactor in a non-sentinel logcontext) This test is originally from https://github.com/element-hq/synapse/pull/18849 spawning from https://github.com/element-hq/synapse/pull/18804#discussion_r2268254424
This commit is contained in:
@@ -19,13 +19,15 @@
|
||||
#
|
||||
#
|
||||
|
||||
from typing import Awaitable, Optional, cast
|
||||
from typing import Awaitable, Dict, Optional, cast
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.internet.testing import MemoryReactorClock
|
||||
|
||||
from synapse.logging.context import (
|
||||
LoggingContext,
|
||||
PreserveLoggingContext,
|
||||
current_context,
|
||||
make_deferred_yieldable,
|
||||
run_in_background,
|
||||
)
|
||||
@@ -40,6 +42,14 @@ from synapse.util.clock import Clock
|
||||
|
||||
from tests.server import get_clock
|
||||
|
||||
try:
|
||||
import opentracing
|
||||
|
||||
from synapse.logging.scopecontextmanager import LogContextScopeManager
|
||||
except ImportError:
|
||||
opentracing = None # type: ignore
|
||||
LogContextScopeManager = None # type: ignore
|
||||
|
||||
try:
|
||||
import jaeger_client
|
||||
except ImportError:
|
||||
@@ -224,6 +234,167 @@ class LogContextScopeManagerTestCase(TestCase):
|
||||
[scopes[1].span, scopes[2].span, scopes[0].span],
|
||||
)
|
||||
|
||||
def test_run_in_background_active_scope_still_available(self) -> None:
|
||||
"""
|
||||
Test that tasks running via `run_in_background` still have access to the
|
||||
active tracing scope.
|
||||
|
||||
This is a regression test for a previous Synapse issue where the tracing scope
|
||||
would `__exit__` and close before the `run_in_background` task completed and our
|
||||
own previous custom `_LogContextScope.close(...)` would clear
|
||||
`LoggingContext.scope` preventing further tracing spans from having the correct
|
||||
parent.
|
||||
"""
|
||||
reactor, clock = get_clock()
|
||||
|
||||
scope_map: Dict[str, opentracing.Scope] = {}
|
||||
|
||||
callback_finished = False
|
||||
|
||||
async def async_task() -> None:
|
||||
nonlocal callback_finished
|
||||
try:
|
||||
logger.info("asdf async_task1 - %s", current_context().scope)
|
||||
root_scope = scope_map["root"]
|
||||
root_context = cast(jaeger_client.SpanContext, root_scope.span.context)
|
||||
|
||||
self.assertEqual(
|
||||
self._tracer.active_span,
|
||||
root_scope.span,
|
||||
"expected to inherit the root tracing scope from where this was run",
|
||||
)
|
||||
|
||||
# Return control back to the reactor thread and wait an arbitrary amount
|
||||
await clock.sleep(4)
|
||||
|
||||
logger.info("asdf async_task2 - %s", current_context().scope)
|
||||
|
||||
# This is a key part of what we're testing! In a previous version of
|
||||
# Synapse, we would lose the active span at this point.
|
||||
self.assertEqual(
|
||||
self._tracer.active_span,
|
||||
root_scope.span,
|
||||
"expected to still have a root tracing scope/span active",
|
||||
)
|
||||
|
||||
# For complete-ness sake, let's also trace more sub-tasks here and assert
|
||||
# they have the correct span parents as well (root)
|
||||
|
||||
# Start tracing some other sub-task.
|
||||
#
|
||||
# This is a key part of what we're testing! In a previous version of
|
||||
# Synapse, it would have the incorrect span parents.
|
||||
scope = start_active_span(
|
||||
"task1",
|
||||
tracer=self._tracer,
|
||||
)
|
||||
scope_map["task1"] = scope
|
||||
|
||||
# Ensure the span parent is pointing to the root scope
|
||||
context = cast(jaeger_client.SpanContext, scope.span.context)
|
||||
self.assertEqual(
|
||||
context.parent_id,
|
||||
root_context.span_id,
|
||||
"expected task1 parent to be the root span",
|
||||
)
|
||||
|
||||
# Ensure that the active span is our new sub-task now
|
||||
self.assertEqual(self._tracer.active_span, scope.span)
|
||||
# Return control back to the reactor thread and wait an arbitrary amount
|
||||
await clock.sleep(4)
|
||||
# We should still see the active span as the scope wasn't closed yet
|
||||
self.assertEqual(self._tracer.active_span, scope.span)
|
||||
# Clock the scope using the context manager lifecycle (`scope.close()`)
|
||||
with scope:
|
||||
pass
|
||||
finally:
|
||||
# When exceptions happen, we still want to mark the callback as finished
|
||||
# so that the test can complete and we see the underlying error.
|
||||
callback_finished = True
|
||||
|
||||
async def root() -> None:
|
||||
with start_active_span(
|
||||
"root span",
|
||||
tracer=self._tracer,
|
||||
# We will close this off later. We're basically just mimicking the same
|
||||
# pattern for how we handle requests. We pass the span off to the
|
||||
# request for it to finish.
|
||||
finish_on_close=False,
|
||||
) as root_scope:
|
||||
scope_map["root"] = root_scope
|
||||
self.assertEqual(self._tracer.active_span, root_scope.span)
|
||||
|
||||
logger.info("asdf root1 - %s", current_context().scope)
|
||||
|
||||
# Fire-and-forget a task
|
||||
#
|
||||
# XXX: The root scope context manager will `__exit__` before this task
|
||||
# completes.
|
||||
run_in_background(async_task)
|
||||
|
||||
logger.info("asdf root2 - %s", current_context().scope)
|
||||
|
||||
# Because we used `run_in_background`, the active span should still be
|
||||
# the root.
|
||||
self.assertEqual(self._tracer.active_span, root_scope.span)
|
||||
|
||||
logger.info("asdf root3 - %s", current_context().scope)
|
||||
|
||||
# We shouldn't see any active spans outside of the scope
|
||||
self.assertIsNone(self._tracer.active_span)
|
||||
|
||||
with LoggingContext(name="root logcontext", server_name="test_server"):
|
||||
# Start the test off
|
||||
d_root = defer.ensureDeferred(root())
|
||||
|
||||
# Pump the reactor until the tasks complete
|
||||
#
|
||||
# We're manually pumping the reactor (and causing any pending callbacks to
|
||||
# be called) so we need to be in the sentinel logcontext to avoid leaking
|
||||
# our current logcontext into the reactor (which would then get picked up
|
||||
# and associated with the next thing the reactor does). `with
|
||||
# PreserveLoggingContext()` will reset the logcontext to the sentinel while
|
||||
# we're pumping the reactor in the block and return us back to our current
|
||||
# logcontext after the block.
|
||||
with PreserveLoggingContext():
|
||||
reactor.pump((2,) * 8)
|
||||
self.successResultOf(d_root)
|
||||
|
||||
self.assertTrue(
|
||||
callback_finished,
|
||||
"Callback never finished which means the test probably didn't wait long enough",
|
||||
)
|
||||
|
||||
# After we see all of the tasks are done (like a request when it
|
||||
# `_finished_processing`), let's finish our root span
|
||||
scope_map["root"].span.finish()
|
||||
|
||||
# Sanity check again: We shouldn't see any active spans leftover in this
|
||||
# this context.
|
||||
self.assertIsNone(self._tracer.active_span)
|
||||
|
||||
# The spans should be reported in order of their finishing: task 1, task 2,
|
||||
# root.
|
||||
#
|
||||
# We use `assertIncludes` just as an easier way to see if items are missing or
|
||||
# added. We assert the order just below
|
||||
self.assertIncludes(
|
||||
set(self._reporter.get_spans()),
|
||||
{
|
||||
scope_map["task1"].span,
|
||||
scope_map["root"].span,
|
||||
},
|
||||
exact=True,
|
||||
)
|
||||
# This is where we actually assert the correct order
|
||||
self.assertEqual(
|
||||
self._reporter.get_spans(),
|
||||
[
|
||||
scope_map["task1"].span,
|
||||
scope_map["root"].span,
|
||||
],
|
||||
)
|
||||
|
||||
def test_trace_decorator_sync(self) -> None:
|
||||
"""
|
||||
Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`
|
||||
|
||||
Reference in New Issue
Block a user