Try and apply review comments about sanitizing names.

1. Update a few comments for clarity. 2. Apply a replace() to remove whitespace in a requested worker name and use an underscore instead. 3. Stop-error if requesting a worker name, but forgetting to actually put one in. 4. If a worker name isn't requested, just use the set of worker types, sorted and concatenated with a hyphen. This avoids having to sanitize the worker name in this case. 5. Add regex to avoid nasty name surprises.
2023-03-08 20:49:49 -06:00
parent 0cf5e80870
commit e2fecab988
1 changed files with 49 additions and 34 deletions
@@ -47,6 +47,7 @@

 import os
 import platform
+import re
 import subprocess
 import sys
 from collections import defaultdict
@@ -630,14 +631,26 @@ def parse_worker_types(
                    "To many worker names requested for a single worker, or to many "
                    f"'='. Please fix: {worker_type_string}"
                )
-            # if there was no name given, this will still be an empty string
-            requested_worker_name = worker_type_split[0]
+            # Assign the name
+            # Hopefully, no one will actually use whitespace in a requested worker name,
+            # but if they do, replace it with an underscore to maintain readability.
+            requested_worker_name = worker_type_split[0].replace(" ", "_")
+
+            # If there was no name given, this will still be an empty string. If we got
+            # here, it's because there was a name requested with a '='. They must have
+            # forgotten to actually type in a name. e.g. '=type'
+            if not requested_worker_name:
+                error(
+                    "Worker name requested but not actually found at: "
+                    f"'{worker_type_string}'. Please fix."
+                )

            # Check the last character of a requested name is not a number. This can
            # cause an error that comes across during startup as an exception in
            # 'startListening' and ends with 'Address already in use' for the port.
            # This only seems to occur when requesting more than 10 of a given
-            # worker_type, otherwise it would be ok.
+            # worker_type, otherwise it would be ok. Check this separately from the
+            # regex below, as otherwise digits are ok in a name
            if requested_worker_name[-1].isdigit():
                error(
                    "Found a number at the end of the requested worker name: "
@@ -646,19 +659,30 @@ def parse_worker_types(
                    "underscore after the number if this what you really want to do."
                )

+            # Uncommon mistake that will cause problems. Name string containing any of a
+            # range of nasties will do Bad Things to filenames and probably nginx too.
+            if not re.match(r"^[a-zA-Z0-9_+-]*[a-zA-Z0-9_+-]$", requested_worker_name):
+                error(
+                    "Requesting a worker name containing an invalid character is not "
+                    "allowed, as it would raise a FileNotFoundError. Please only use "
+                    "a-z, A-Z, 0-9, or '_', '+', '-' when forming your name: "
+                    f"{requested_worker_name}"
+                )
+
            # Reassign the worker_type string with no name on it.
            new_worker_type_string = worker_type_split[1]

        # At this point, we have:
-        #   requested_worker_name which might be an empty string
-        #   new_worker_type_string which might still be what it was when it came in
+        #   requested_worker_name which might be an empty string or a requested name
+        #   new_worker_type_string which needs to be split into it's worker_type parts

        # Split the worker_type_string on "+", remove whitespace from ends then make
        # the list a set so it's deduplicated.
        worker_types_list = split_and_strip_string(new_worker_type_string, "+")
        worker_types_set: Set[str] = set(worker_types_list)

-        worker_base_name = new_worker_type_string
+        # Settle on a base name for this worker
+        worker_base_name: str
        if requested_worker_name:
            worker_base_name = requested_worker_name
            # It'll be useful to have this in the log in case it's a complex of many
@@ -670,37 +694,26 @@ def parse_worker_types(
            )

        else:
-            # The worker name will be the worker_type, however if spaces exist
-            # between concatenated worker_types and the "+" because of readability,
-            # it will error on startup. Recombine worker_types without spaces and log.
-            # Allows for human readability while declaring a complex worker type, e.g.
+            # The worker name will be the worker_type(s), however the potential for
+            # whitespace could still exist if we use the new_worker_type_string from
+            # above. Use the set of worker_type(s), sort it alphabetically(for
+            # determinism) and concatenate with a "-" instead of a "+" as it came in
+            # with.
            # 'event_persister + federation_reader + federation_sender + pusher'
-            if (len(worker_types_set) > 1) and (" " in worker_base_name):
-                worker_base_name = "+".join(sorted(worker_types_set))
-                log(
-                    "Default worker name would have contained spaces, which is not "
-                    f"allowed: '{worker_type_string}'. Reformed name to not contain "
-                    f"spaces: '{worker_base_name}'"
-                )
+            # would become
+            # 'event_persister-federation_reader-federation_sender-pusher'
+            # This is later checked for sanity when pulling from WORKERS_CONFIG.
+            worker_base_name = "-".join(sorted(worker_types_set))
+            log(
+                "Default worker name might have contained spaces, which is not "
+                f"allowed: '{new_worker_type_string}'. Reformed name to not contain "
+                f"spaces and be sorted alphabetically by type: '{worker_base_name}'"
+            )

        # At this point, we have:
-        #   worker_base_name which might be identical to
-        #   new_worker_type_string which might still be what it was when it came in
+        #   worker_base_name and
        #   worker_types_set which is a Set of what worker_types are requested

-        # Uncommon mistake that will cause problems. Name string containing quotes
-        # or spaces will do Bad Things to filenames and probably nginx too.
-        if (
-            (" " in worker_base_name)
-            or ('"' in worker_base_name)
-            or ("'" in worker_base_name)
-        ):
-            error(
-                "Requesting a worker name containing a quote mark or a space is "
-                "not allowed, as it would raise a FileNotFoundError. Please fix: "
-                f"{worker_base_name}"
-            )
-
        # This counter is used for naming workers with an incrementing number. Use the
        # worker_base_name for the index
        worker_type_counter[worker_base_name] += 1
@@ -714,11 +727,13 @@ def parse_worker_types(
        # deterministic.
        deterministic_worker_type_string = "+".join(sorted(worker_types_set))
        check_worker_type = worker_name_checklist.get(worker_base_name)
-        # Either this doesn't exist yet, or it matches with a twin
+        # Either this doesn't exist yet, which means it's being seen for the first time,
+        # or it matches with a previously seen worker_type(s) combination.
        if (check_worker_type is None) or (
            check_worker_type == deterministic_worker_type_string
        ):
-            # This is a no-op if it exists, which is expected to avoid the else block
+            # This is either setting the base name for the first time, or a no-op if it
+            # exists(e.g. 'event_persister:2').
            worker_name_checklist.setdefault(
                worker_base_name, deterministic_worker_type_string
            )