From 562c6daea3bcc453b3abab5a8617d04c4205ab4c Mon Sep 17 00:00:00 2001 From: Aleksandr Mezin Date: Sun, 4 Sep 2022 18:13:18 +0600 Subject: [PATCH] Restore old initial batch distribution logic in LoadScheduling pytest orders tests for optimal sequential execution - i. e. avoiding unnecessary setup and teardown of fixtures. So executing tests in consecutive chunks is important for optimal performance. Commit 09d79ace356ebf14d221cecd3fd8002d075a2034 optimized test distribution for the corner case, when the number of tests is less than 2 * number of nodes. At the same time, it made initial test distribution worse for all other cases. If some tests use some fixture, and these tests fit into the initial batch, the fixture will be created min(n_tests, n_workers) times, no matter how many other tests there are. With the old algorithm (before 09d79ace356ebf14d221cecd3fd8002d075a2034), if there are enough tests not using the fixture, the fixture was created only once. So restore the old behavior for typical cases where the number of tests is much greater than the number of workers (or, strictly speaking, when there are at least 2 tests for every node). In my test suite, where fixtures create Docker containers, this change reduces total run time by 10-15%. This is a partial revert of commit 09d79ace356ebf14d221cecd3fd8002d075a2034 --- changelog/812.feature | 1 + src/xdist/scheduler/load.py | 20 +++++++++++++------- testing/test_dsession.py | 12 ++++++------ 3 files changed, 20 insertions(+), 13 deletions(-) create mode 100644 changelog/812.feature diff --git a/changelog/812.feature b/changelog/812.feature new file mode 100644 index 00000000..4fda9541 --- /dev/null +++ b/changelog/812.feature @@ -0,0 +1 @@ +Partially restore old initial batch distribution algorithm in ``LoadScheduling``. diff --git a/src/xdist/scheduler/load.py b/src/xdist/scheduler/load.py index f32caa55..195282dc 100644 --- a/src/xdist/scheduler/load.py +++ b/src/xdist/scheduler/load.py @@ -248,13 +248,19 @@ def schedule(self): # Send a batch of tests to run. If we don't have at least two # tests per node, we have to send them all so that we can send # shutdown signals and get all nodes working. - initial_batch = max(len(self.pending) // 4, 2 * len(self.nodes)) - - # distribute tests round-robin up to the batch size - # (or until we run out) - nodes = cycle(self.nodes) - for i in range(initial_batch): - self._send_tests(next(nodes), 1) + if len(self.pending) < 2 * len(self.nodes): + # distribute tests round-robin + nodes = cycle(self.nodes) + for i in range(len(self.pending)): + self._send_tests(next(nodes), 1) + else: + # how many items per node do we have about? + items_per_node = len(self.collection) // len(self.node2pending) + # take a fraction of tests for initial distribution + node_chunksize = max(items_per_node // 4, 2) + # and initialize each node with a chunk of tests + for node in self.nodes: + self._send_tests(node, node_chunksize) if not self.pending: # initial distribution sent all tests, start node shutdown diff --git a/testing/test_dsession.py b/testing/test_dsession.py index d3a57152..86273b8c 100644 --- a/testing/test_dsession.py +++ b/testing/test_dsession.py @@ -115,18 +115,18 @@ def test_schedule_batch_size(self, pytester: pytest.Pytester) -> None: # assert not sched.tests_finished sent1 = node1.sent sent2 = node2.sent - assert sent1 == [0, 2] - assert sent2 == [1, 3] + assert sent1 == [0, 1] + assert sent2 == [2, 3] assert sched.pending == [4, 5] assert sched.node2pending[node1] == sent1 assert sched.node2pending[node2] == sent2 assert len(sched.pending) == 2 sched.mark_test_complete(node1, 0) - assert node1.sent == [0, 2, 4] + assert node1.sent == [0, 1, 4] assert sched.pending == [5] - assert node2.sent == [1, 3] - sched.mark_test_complete(node1, 2) - assert node1.sent == [0, 2, 4, 5] + assert node2.sent == [2, 3] + sched.mark_test_complete(node1, 1) + assert node1.sent == [0, 1, 4, 5] assert not sched.pending def test_schedule_fewer_tests_than_nodes(self, pytester: pytest.Pytester) -> None: