From 8619f8934a2762fa35d80b792ad349cd1128c1e9 Mon Sep 17 00:00:00 2001
From: Egor Tensin <Egor.Tensin@gmail.com>
Date: Tue, 11 Jul 2023 21:17:10 +0200
Subject: test: add test for segfaulting CI script

The C code leaked out of src/, so I moved .clang-format and some compile
options to the root directory.

Also, I'm starting to hit test execution limits; I'm going to limit the
repositories used for stress testing.
---
 test/CMakeLists.txt         |   3 ++
 test/py/conftest.py         |  53 +++++++++++++------
 test/py/lib/test_repo.py    | 125 ++++++++++++++++++++++++++++++++++----------
 test/py/test_repo.py        |  11 +++-
 test/sigsegv/CMakeLists.txt |  11 ++++
 test/sigsegv/lib.c          |   5 ++
 test/sigsegv/lib.h          |  10 ++++
 test/sigsegv/main.c         |  16 ++++++
 8 files changed, 189 insertions(+), 45 deletions(-)
 create mode 100644 test/sigsegv/CMakeLists.txt
 create mode 100644 test/sigsegv/lib.c
 create mode 100644 test/sigsegv/lib.h
 create mode 100644 test/sigsegv/main.c

(limited to 'test')

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3aca9fa..7f4ff11 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(sigsegv)
+
 find_package(Python3 REQUIRED COMPONENTS Interpreter)
 
 set(python_test_args
@@ -7,6 +9,7 @@ set(python_test_args
     --server-binary "$<TARGET_FILE:server>"
     --worker-binary "$<TARGET_FILE:worker>"
     --client-binary "$<TARGET_FILE:client>"
+    --sigsegv-binary "$<TARGET_FILE:sigsegv>"
     --project-version "${PROJECT_VERSION}")
 
 function(add_python_tests name)
diff --git a/test/py/conftest.py b/test/py/conftest.py
index 0131c37..54ef444 100644
--- a/test/py/conftest.py
+++ b/test/py/conftest.py
@@ -8,7 +8,7 @@ import os
 
 from pytest import fixture
 
-from lib import test_repo
+from lib import test_repo as repo
 from lib.db import Database
 from lib.net import random_unused_port
 from lib.process import CmdLine
@@ -45,7 +45,7 @@ class ParamBinary(Param):
 
 
 BINARY_PARAMS = [
-    ParamBinary(name) for name in ('server', 'worker', 'client')
+    ParamBinary(name) for name in ('server', 'worker', 'client', 'sigsegv')
 ]
 
 PARAM_VALGRIND = ParamBinary('valgrind', required=False)
@@ -162,11 +162,16 @@ def worker_cmd(base_cmd_line, paths, server_port):
 
 
 @fixture
-def client_cmd(base_cmd_line, paths, server_port):
+def client(base_cmd_line, paths, server_port):
     args = ['--host', '127.0.0.1', '--port', server_port]
     return CmdLine.wrap(base_cmd_line, CmdLine(paths.client_binary, *args))
 
 
+@fixture
+def sigsegv(paths):
+    return CmdLine(paths.sigsegv_binary)
+
+
 @fixture
 def server(server_cmd):
     with server_cmd.run_async() as server:
@@ -184,19 +189,35 @@ def workers(worker_cmd):
 
 
 @fixture
-def client(client_cmd):
-    return client_cmd
-
-
-@fixture(params=[
-             test_repo.TestRepoOutputSimple,
-             test_repo.TestRepoOutputEmpty,
-             test_repo.TestRepoOutputLong,
-             test_repo.TestRepoOutputNull,
-         ],
-         ids=['output_simple', 'output_empty', 'output_long', 'output_null'])
-def test_repo(tmp_path, request):
-    return request.param(os.path.join(tmp_path, 'repo'))
+def repo_path(tmp_path):
+    return os.path.join(tmp_path, 'repo')
+
+
+ALL_REPOS = [
+    repo.TestRepoOutputSimple,
+    repo.TestRepoOutputEmpty,
+    repo.TestRepoOutputLong,
+    repo.TestRepoOutputNull,
+    repo.TestRepoSegfault,
+]
+
+
+def _make_repo(repo_path, paths, cls):
+    args = [repo_path]
+    if cls is repo.TestRepoSegfault:
+        args += [paths.sigsegv_binary]
+    return cls(*args)
+
+
+@fixture(params=ALL_REPOS, ids=[repo.codename() for repo in ALL_REPOS])
+def test_repo(repo_path, paths, request):
+    return _make_repo(repo_path, paths, request.param)
+
+
+@fixture(params=[repo for repo in ALL_REPOS if repo.enabled_for_stress_testing()],
+         ids=[repo.codename() for repo in ALL_REPOS if repo.enabled_for_stress_testing()])
+def stress_test_repo(repo_path, paths, request):
+    return _make_repo(repo_path, paths, request.param)
 
 
 class Env:
diff --git a/test/py/lib/test_repo.py b/test/py/lib/test_repo.py
index 0d722be..aca6dfa 100644
--- a/test/py/lib/test_repo.py
+++ b/test/py/lib/test_repo.py
@@ -40,10 +40,6 @@ class TestRepo(Repo):
     # Prevent Pytest from discovering test cases in this class:
     __test__ = False
 
-    def _format_ci_script(self):
-        runs_dir = shlex.quote(self.runs_dir)
-        return CI_SCRIPT.format(runs_dir=runs_dir)
-
     def __init__(self, path, ci_script='ci.sh'):
         super().__init__(path)
 
@@ -51,13 +47,37 @@ class TestRepo(Repo):
         os.makedirs(self.runs_dir, exist_ok=True)
 
         self.ci_script_path = os.path.join(self.path, ci_script)
-        with open(self.ci_script_path, mode='x') as f:
-            f.write(self._format_ci_script())
-        os.chmod(self.ci_script_path, 0o755)
 
+        self.write_ci_script()
         self.run('git', 'add', '--', ci_script)
         self.run('git', 'commit', '-q', '-m', 'add CI script')
 
+    @staticmethod
+    @abc.abstractmethod
+    def codename():
+        pass
+
+    @staticmethod
+    def enabled_for_stress_testing():
+        return False
+
+    @abc.abstractmethod
+    def run_exit_code_matches(self, ec):
+        pass
+
+    @abc.abstractmethod
+    def run_output_matches(self, output):
+        pass
+
+    def write_ci_script(self):
+        with open(self.ci_script_path, mode='x') as f:
+            f.write(self.format_ci_script())
+        os.chmod(self.ci_script_path, 0o755)
+
+    def format_ci_script(self):
+        runs_dir = shlex.quote(self.runs_dir)
+        return CI_SCRIPT.format(runs_dir=runs_dir)
+
     def count_run_files(self):
         return len([name for name in os.listdir(self.runs_dir) if os.path.isfile(os.path.join(self.runs_dir, name))])
 
@@ -68,32 +88,31 @@ class TestRepoOutput(TestRepo, abc.ABC):
     OUTPUT_SCRIPT_NAME = 'generate-output'
 
     def __init__(self, path):
+        self.output_script_path = os.path.join(path, TestRepoOutput.OUTPUT_SCRIPT_NAME)
         super().__init__(path)
 
-        self.output_script_path = os.path.join(self.path, TestRepoOutput.OUTPUT_SCRIPT_NAME)
-        with open(self.output_script_path, mode='x') as f:
-            f.write(self._format_output_script())
-        os.chmod(self.output_script_path, 0o755)
-
-        with open(self.ci_script_path, mode='a') as f:
-            f.write(self._format_ci_script_addition())
-
+        self.write_output_script()
         self.run('git', 'add', '--', TestRepoOutput.OUTPUT_SCRIPT_NAME)
-        self.run('git', 'add', '-u')
         self.run('git', 'commit', '-q', '-m', 'add output script')
 
-    @abc.abstractmethod
-    def _format_output_script(self):
-        pass
-
-    def _format_ci_script_addition(self):
-        return r'{output_script} | tee -a "$run_output_path"'.format(
+    def format_ci_script(self):
+        script = super().format_ci_script()
+        added = r'{output_script} | tee -a "$run_output_path"'.format(
             output_script=shlex.quote(self.output_script_path))
+        return f'{script}\n{added}\n'
+
+    def write_output_script(self):
+        with open(self.output_script_path, mode='x') as f:
+            f.write(self.format_output_script())
+        os.chmod(self.output_script_path, 0o755)
 
     @abc.abstractmethod
-    def run_output_matches(self, output):
+    def format_output_script(self):
         pass
 
+    def run_exit_code_matches(self, ec):
+        return ec == 0
+
 
 OUTPUT_SCRIPT_SIMPLE = r'''#!/bin/sh -e
 timestamp="$( date --iso-8601=ns )"
@@ -104,7 +123,15 @@ echo "A CI run happened at $timestamp"
 class TestRepoOutputSimple(TestRepoOutput):
     __test__ = False
 
-    def _format_output_script(self):
+    @staticmethod
+    def codename():
+        return 'output_simple'
+
+    @staticmethod
+    def enabled_for_stress_testing():
+        return True
+
+    def format_output_script(self):
         return OUTPUT_SCRIPT_SIMPLE
 
     def run_output_matches(self, output):
@@ -118,7 +145,11 @@ OUTPUT_SCRIPT_EMPTY = r'''#!/bin/sh
 class TestRepoOutputEmpty(TestRepoOutput):
     __test__ = False
 
-    def _format_output_script(self):
+    @staticmethod
+    def codename():
+        return 'output_empty'
+
+    def format_output_script(self):
         return OUTPUT_SCRIPT_EMPTY
 
     def run_output_matches(self, output):
@@ -144,7 +175,15 @@ class TestRepoOutputLong(TestRepoOutput):
         self.output = base64.encodebytes(random.randbytes(nb)).decode()
         super().__init__(*args, **kwargs)
 
-    def _format_output_script(self):
+    @staticmethod
+    def codename():
+        return 'output_long'
+
+    @staticmethod
+    def enabled_for_stress_testing():
+        return True
+
+    def format_output_script(self):
         return OUTPUT_SCRIPT_LONG.format(output=repr(self.output))
 
     def run_output_matches(self, output):
@@ -168,8 +207,40 @@ class TestRepoOutputNull(TestRepoOutput):
         self.output = TestRepoOutputNull.OUTPUT
         super().__init__(*args, **kwargs)
 
-    def _format_output_script(self):
+    @staticmethod
+    def codename():
+        return 'output_null'
+
+    def format_output_script(self):
         return OUTPUT_SCRIPT_NULL.format(output=repr(self.output))
 
     def run_output_matches(self, output):
         return output == self.output
+
+
+class TestRepoSegfault(TestRepo):
+    __test__ = False
+
+    def __init__(self, repo_path, prog_path):
+        self.prog_path = prog_path
+        self.prog_name = os.path.basename(prog_path)
+        super().__init__(repo_path)
+
+        shutil.copy(prog_path, self.path)
+        self.run('git', 'add', '--', self.prog_name)
+        self.run('git', 'commit', '-q', '-m', 'add test program')
+
+    @staticmethod
+    def codename():
+        return 'segfault'
+
+    def format_ci_script(self):
+        script = super().format_ci_script()
+        added = r'exec {prog}'.format(prog=shlex.quote(f'./{self.prog_name}'))
+        return f'{script}\n{added}\n'
+
+    def run_exit_code_matches(self, ec):
+        return ec < 0
+
+    def run_output_matches(self, output):
+        return "Started the test program.\n" == output.decode()
diff --git a/test/py/test_repo.py b/test/py/test_repo.py
index e3a498d..aa89261 100644
--- a/test/py/test_repo.py
+++ b/test/py/test_repo.py
@@ -62,6 +62,7 @@ def _test_repo_internal(env, repo, numof_processes, runs_per_process):
 
     for id, status, ec, output, url, rev in runs:
         assert status == 'finished', f'Invalid status for run {id}: {status}'
+        assert repo.run_exit_code_matches(ec), f"Exit code doesn't match: {ec}"
         assert repo.run_output_matches(output), f"Output doesn't match: {output}"
 
 
@@ -80,6 +81,12 @@ def my_parametrize(names, values, ids=None, **kwargs):
     return pytest.mark.parametrize(names, values, ids=ids, **kwargs)
 
 
+def test_sigsegv(sigsegv):
+    ec, output = sigsegv.try_run()
+    assert ec == -11
+    assert output == 'Started the test program.\n'
+
+
 @my_parametrize('runs_per_client', [1, 5])
 @my_parametrize('numof_clients', [1, 5])
 def test_repo(env, test_repo, numof_clients, runs_per_client):
@@ -89,5 +96,5 @@ def test_repo(env, test_repo, numof_clients, runs_per_client):
 @pytest.mark.stress
 @my_parametrize(('numof_clients', 'runs_per_client'),
                 [(10, 50), (1, 2000), (4, 500)])
-def test_repo_stress(env, test_repo, numof_clients, runs_per_client):
-    _test_repo_internal(env, test_repo, numof_clients, runs_per_client)
+def test_repo_stress(env, stress_test_repo, numof_clients, runs_per_client):
+    _test_repo_internal(env, stress_test_repo, numof_clients, runs_per_client)
diff --git a/test/sigsegv/CMakeLists.txt b/test/sigsegv/CMakeLists.txt
new file mode 100644
index 0000000..b92147d
--- /dev/null
+++ b/test/sigsegv/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Hopefully putting the NULL reference to a separate library should force the
+# sigsegv binary to crash. If I put it in main.c, surprisingly, Clang (with
+# -O2) produces a binary that doesn't crash: https://godbolt.org/z/joeqEEs3Y.
+# To force it to crash, the -fno-delete-null-pointer-checks flag is required:
+# https://godbolt.org/z/o3xEoeG3z. This is a bit hacky though; putting the
+# reference in a separate library should also do the trick.
+
+add_library(null lib.c)
+
+add_executable(sigsegv main.c)
+target_link_libraries(sigsegv PRIVATE null)
diff --git a/test/sigsegv/lib.c b/test/sigsegv/lib.c
new file mode 100644
index 0000000..1e136cc
--- /dev/null
+++ b/test/sigsegv/lib.c
@@ -0,0 +1,5 @@
+#include "lib.h"
+
+#include <stddef.h>
+
+struct data *data = NULL;
diff --git a/test/sigsegv/lib.h b/test/sigsegv/lib.h
new file mode 100644
index 0000000..1f162d5
--- /dev/null
+++ b/test/sigsegv/lib.h
@@ -0,0 +1,10 @@
+#ifndef __LIB_H__
+#define __LIB_H__
+
+struct data {
+	int x;
+};
+
+extern struct data *data;
+
+#endif
diff --git a/test/sigsegv/main.c b/test/sigsegv/main.c
new file mode 100644
index 0000000..428d168
--- /dev/null
+++ b/test/sigsegv/main.c
@@ -0,0 +1,16 @@
+#include "lib.h"
+
+#include <stdio.h>
+
+int main(void)
+{
+	puts("Started the test program.");
+	fflush(stdout);
+
+	printf("This will crash: %d.\n", data->x);
+
+	puts("You shouldn't see this.");
+	fflush(stdout);
+
+	return 0;
+}
-- 
cgit v1.2.3