1
0

Compare commits

...

11 Commits

Author SHA1 Message Date
Erik Johnston 22245fb8a7 Actuall set cache factors in workers 2018-02-21 21:10:48 +00:00
Erik Johnston 78c5eca141 Add hacky cache factor override system 2018-02-21 21:00:24 +00:00
Richard van der Hoff 55bef59cc7 (Really) fix tablescan of event_push_actions on purge
commit 278d21b5 added new code to avoid the tablescan, but didn't remove the
old :/
2018-02-20 10:37:44 +00:00
Erik Johnston ad683cd203 Merge branch 'develop' of github.com:matrix-org/synapse into matrix-org-hotfixes 2018-02-15 14:10:11 +00:00
Erik Johnston 6ed9ff69c2 Merge pull request #2873 from matrix-org/erikj/event_creator_no_state
Don't serialize current state over replication
2018-02-15 14:09:42 +00:00
Erik Johnston 106906a65e Don't serialize current state over replication 2018-02-15 13:53:18 +00:00
Erik Johnston d66afef01e Merge branch 'develop' of github.com:matrix-org/synapse into matrix-org-hotfixes 2018-02-15 13:03:05 +00:00
Erik Johnston 5fb347fc41 Merge pull request #2872 from matrix-org/erikj/event_worker_dont_log
Don't log errors propogated from send_event
2018-02-15 12:31:49 +00:00
Erik Johnston cd94728e93 Merge pull request #2871 from matrix-org/erikj/event_creator_state
Fix state group storage bug in workers
2018-02-15 11:34:01 +00:00
Erik Johnston fd1601c596 Fix state group storage bug in workers
We needed to move `_count_state_group_hops_txn` to the
StateGroupWorkerStore.
2018-02-15 11:04:32 +00:00
Erik Johnston ef344b10e5 Don't log errors propogated from send_event 2018-02-15 11:03:49 +00:00
7 changed files with 135 additions and 60 deletions
+19 -5
View File
@@ -108,7 +108,7 @@ def stop(pidfile, app):
Worker = collections.namedtuple("Worker", [
"app", "configfile", "pidfile", "cache_factor"
"app", "configfile", "pidfile", "cache_factor", "cache_factors",
])
@@ -171,6 +171,10 @@ def main():
if cache_factor:
os.environ["SYNAPSE_CACHE_FACTOR"] = str(cache_factor)
cache_factors = config.get("synctl_cache_factors", {})
for cache_name, factor in cache_factors.iteritems():
os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor)
worker_configfiles = []
if options.worker:
start_stop_synapse = False
@@ -211,6 +215,10 @@ def main():
or pidfile
)
worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor
worker_cache_factors = (
worker_config.get("synctl_cache_factors")
or cache_factors
)
daemonize = worker_config.get("daemonize") or config.get("daemonize")
assert daemonize, "Main process must have daemonize set to true"
@@ -226,8 +234,10 @@ def main():
assert worker_daemonize, "In config %r: expected '%s' to be True" % (
worker_configfile, "worker_daemonize")
worker_cache_factor = worker_config.get("synctl_cache_factor")
worker_cache_factors = worker_config.get("synctl_cache_factors", {})
workers.append(Worker(
worker_app, worker_configfile, worker_pidfile, worker_cache_factor,
worker_cache_factors,
))
action = options.action
@@ -261,15 +271,19 @@ def main():
start(configfile)
for worker in workers:
env = os.environ.copy()
if worker.cache_factor:
os.environ["SYNAPSE_CACHE_FACTOR"] = str(worker.cache_factor)
for cache_name, factor in worker.cache_factors.iteritems():
os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor)
start_worker(worker.app, configfile, worker.configfile)
if cache_factor:
os.environ["SYNAPSE_CACHE_FACTOR"] = str(cache_factor)
else:
os.environ.pop("SYNAPSE_CACHE_FACTOR", None)
# Reset env back to the original
os.environ.clear()
os.environ.update(env)
if __name__ == "__main__":
+35 -6
View File
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from twisted.internet import defer
from frozendict import frozendict
@@ -77,16 +78,30 @@ class EventContext(object):
self.app_service = None
def serialize(self):
def serialize(self, event):
"""Converts self to a type that can be serialized as JSON, and then
deserialized by `deserialize`
Args:
event (FrozenEvent): The event that this context relates to
Returns:
dict
"""
# We don't serialize the full state dicts, instead they get pulled out
# of the DB on the other side. However, the other side can't figure out
# the prev_state_ids, so if we're a state event we include the event
# id that we replaced in the state.
if event.is_state():
prev_state_id = self.prev_state_ids.get((event.type, event.state_key))
else:
prev_state_id = None
return {
"current_state_ids": _encode_state_dict(self.current_state_ids),
"prev_state_ids": _encode_state_dict(self.prev_state_ids),
"prev_state_id": prev_state_id,
"event_type": event.type,
"event_state_key": event.state_key if event.is_state() else None,
"state_group": self.state_group,
"rejected": self.rejected,
"push_actions": self.push_actions,
@@ -97,6 +112,7 @@ class EventContext(object):
}
@staticmethod
@defer.inlineCallbacks
def deserialize(store, input):
"""Converts a dict that was produced by `serialize` back into a
EventContext.
@@ -109,8 +125,6 @@ class EventContext(object):
EventContext
"""
context = EventContext()
context.current_state_ids = _decode_state_dict(input["current_state_ids"])
context.prev_state_ids = _decode_state_dict(input["prev_state_ids"])
context.state_group = input["state_group"]
context.rejected = input["rejected"]
context.push_actions = input["push_actions"]
@@ -118,11 +132,26 @@ class EventContext(object):
context.delta_ids = _decode_state_dict(input["delta_ids"])
context.prev_state_events = input["prev_state_events"]
# We use the state_group and prev_state_id stuff to pull the
# current_state_ids out of the DB and construct prev_state_ids.
prev_state_id = input["prev_state_id"]
event_type = input["event_type"]
event_state_key = input["event_state_key"]
context.current_state_ids = yield store.get_state_ids_for_group(
context.state_group,
)
if prev_state_id and event_state_key:
context.prev_state_ids = dict(context.current_state_ids)
context.prev_state_ids[(event_type, event_state_key)] = prev_state_id
else:
context.prev_state_ids = context.current_state_ids
app_service_id = input["app_service_id"]
if app_service_id:
context.app_service = store.get_app_service_by_id(app_service_id)
return context
defer.returnValue(context)
def _encode_state_dict(state_dict):
+12 -3
View File
@@ -15,6 +15,7 @@
from twisted.internet import defer
from synapse.api.errors import SynapseError, MatrixCodeMessageException
from synapse.events import FrozenEvent
from synapse.events.snapshot import EventContext
from synapse.http.servlet import RestServlet, parse_json_object_from_request
@@ -27,6 +28,7 @@ import re
logger = logging.getLogger(__name__)
@defer.inlineCallbacks
def send_event_to_master(client, host, port, requester, event, context):
"""Send event to be handled on the master
@@ -44,11 +46,18 @@ def send_event_to_master(client, host, port, requester, event, context):
"event": event.get_pdu_json(),
"internal_metadata": event.internal_metadata.get_dict(),
"rejected_reason": event.rejected_reason,
"context": context.serialize(),
"context": context.serialize(event),
"requester": requester.serialize(),
}
return client.post_json_get_json(uri, payload)
try:
result = yield client.post_json_get_json(uri, payload)
except MatrixCodeMessageException as e:
# We convert to SynapseError as we know that it was a SynapseError
# on the master process that we should send to the client. (And
# importantly, not stack traces everywhere)
raise SynapseError(e.code, e.msg, e.errcode)
defer.returnValue(result)
class ReplicationSendEventRestServlet(RestServlet):
@@ -87,7 +96,7 @@ class ReplicationSendEventRestServlet(RestServlet):
event = FrozenEvent(event_dict, internal_metadata, rejected_reason)
requester = Requester.deserialize(self.store, content["requester"])
context = EventContext.deserialize(self.store, content["context"])
context = yield EventContext.deserialize(self.store, content["context"])
if requester.user:
request.authenticated_entity = requester.user.to_string()
-1
View File
@@ -2317,7 +2317,6 @@ class EventsStore(SQLBaseStore):
"event_edge_hashes",
"event_edges",
"event_forward_extremities",
"event_push_actions",
"event_reference_hashes",
"event_search",
"event_signatures",
+57 -43
View File
@@ -20,7 +20,7 @@ from twisted.internet import defer
from synapse.storage.background_updates import BackgroundUpdateStore
from synapse.storage.engines import PostgresEngine
from synapse.util.caches import intern_string, CACHE_SIZE_FACTOR
from synapse.util.caches import intern_string, get_cache_factor_for
from synapse.util.caches.descriptors import cached, cachedList
from synapse.util.caches.dictionary_cache import DictionaryCache
from synapse.util.stringutils import to_ascii
@@ -54,7 +54,7 @@ class StateGroupWorkerStore(SQLBaseStore):
super(StateGroupWorkerStore, self).__init__(db_conn, hs)
self._state_group_cache = DictionaryCache(
"*stateGroupCache*", 500000 * CACHE_SIZE_FACTOR
"*stateGroupCache*", 500000 * get_cache_factor_for("stateGroupCache")
)
@cached(max_entries=100000, iterable=True)
@@ -139,6 +139,20 @@ class StateGroupWorkerStore(SQLBaseStore):
defer.returnValue(group_to_state)
@defer.inlineCallbacks
def get_state_ids_for_group(self, state_group):
"""Get the state IDs for the given state group
Args:
state_group (int)
Returns:
Deferred[dict]: Resolves to a map of (type, state_key) -> event_id
"""
group_to_state = yield self._get_state_for_groups((state_group,))
defer.returnValue(group_to_state[state_group])
@defer.inlineCallbacks
def get_state_groups(self, room_id, event_ids):
""" Get the state groups for the given list of event_ids
@@ -654,6 +668,47 @@ class StateGroupWorkerStore(SQLBaseStore):
return self.runInteraction("store_state_group", _store_state_group_txn)
def _count_state_group_hops_txn(self, txn, state_group):
"""Given a state group, count how many hops there are in the tree.
This is used to ensure the delta chains don't get too long.
"""
if isinstance(self.database_engine, PostgresEngine):
sql = ("""
WITH RECURSIVE state(state_group) AS (
VALUES(?::bigint)
UNION ALL
SELECT prev_state_group FROM state_group_edges e, state s
WHERE s.state_group = e.state_group
)
SELECT count(*) FROM state;
""")
txn.execute(sql, (state_group,))
row = txn.fetchone()
if row and row[0]:
return row[0]
else:
return 0
else:
# We don't use WITH RECURSIVE on sqlite3 as there are distributions
# that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
next_group = state_group
count = 0
while next_group:
next_group = self._simple_select_one_onecol_txn(
txn,
table="state_group_edges",
keyvalues={"state_group": next_group},
retcol="prev_state_group",
allow_none=True,
)
if next_group:
count += 1
return count
class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
""" Keeps track of the state at a given event.
@@ -728,47 +783,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
(event_id,), state_group_id
)
def _count_state_group_hops_txn(self, txn, state_group):
"""Given a state group, count how many hops there are in the tree.
This is used to ensure the delta chains don't get too long.
"""
if isinstance(self.database_engine, PostgresEngine):
sql = ("""
WITH RECURSIVE state(state_group) AS (
VALUES(?::bigint)
UNION ALL
SELECT prev_state_group FROM state_group_edges e, state s
WHERE s.state_group = e.state_group
)
SELECT count(*) FROM state;
""")
txn.execute(sql, (state_group,))
row = txn.fetchone()
if row and row[0]:
return row[0]
else:
return 0
else:
# We don't use WITH RECURSIVE on sqlite3 as there are distributions
# that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
next_group = state_group
count = 0
while next_group:
next_group = self._simple_select_one_onecol_txn(
txn,
table="state_group_edges",
keyvalues={"state_group": next_group},
retcol="prev_state_group",
allow_none=True,
)
if next_group:
count += 1
return count
@defer.inlineCallbacks
def _background_deduplicate_state(self, progress, batch_size):
"""This background update will slowly deduplicate state by reencoding
+10
View File
@@ -18,6 +18,16 @@ import os
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
def get_cache_factor_for(cache_name):
env_var = "SYNAPSE_CACHE_FACTOR_" + cache_name.upper()
factor = os.environ.get(env_var)
if factor:
return float(factor)
return CACHE_SIZE_FACTOR
metrics = synapse.metrics.get_metrics_for("synapse.util.caches")
caches_by_name = {}
+2 -2
View File
@@ -16,7 +16,7 @@ import logging
from synapse.util.async import ObservableDeferred
from synapse.util import unwrapFirstError, logcontext
from synapse.util.caches import CACHE_SIZE_FACTOR
from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.lrucache import LruCache
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
from synapse.util.stringutils import to_ascii
@@ -298,7 +298,7 @@ class CacheDescriptor(_CacheDescriptorBase):
orig, num_args=num_args, inlineCallbacks=inlineCallbacks,
cache_context=cache_context)
max_entries = int(max_entries * CACHE_SIZE_FACTOR)
max_entries = int(max_entries * get_cache_factor_for(orig.__name__))
self.max_entries = max_entries
self.tree = tree