forked from wesnoth/wesnoth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_wml_tests
executable file
·362 lines (322 loc) · 16 KB
/
run_wml_tests
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#!/usr/bin/env python3
# encoding: utf-8
"""
This script runs a sequence of wml unit test scenarios.
"""
import argparse, enum, os, re, subprocess, sys
class Verbosity(enum.IntEnum):
"""What to display depending on how many -v arguments were given on the command line."""
# This doesn't use enum.unique, more than one of these could have the same int level
NAMES_OF_TESTS_RUN = 1 # Show progress when running interactively, with a new line each time a new batch starts
SCRIPT_DEBUGGING = 2 # The command lines needed to run Wesnoth directly for a given test is printed
OUTPUT_OF_PASSING_TESTS = 3 # Print the output even when tests give the expected result
class UnexpectedTestStatusException(Exception):
"""Exception raised when a unit test doesn't return the expected result."""
pass
class UnitTestResult(enum.Enum):
"""Enum corresponding to game_launcher.hpp's unit_test_result"""
PASS = 0
FAIL = 1
TIMEOUT = 2
FAIL_LOADING_REPLAY = 3
FAIL_PLAYING_REPLAY = 4
FAIL_WML_EXCEPTION = 6
FAIL_BY_DEFEAT = 7
PASS_BY_VICTORY = 8
BROKE_STRICT_TEST_PASS = 9
BROKE_STRICT_TEST_FAIL = 10
BROKE_STRICT_TEST_FAIL_BY_DEFEAT = 11
BROKE_STRICT_TEST_PASS_BY_VICTORY = 12
def __str__(self):
return str(self.value) + ' ' + self.name
class TestCase:
"""Represents a single line of the wml_test_schedule."""
def __init__(self, status, name):
self.status = status
self.name = name
def __str__(self):
return "TestCase<{status}, {name}>".format(status=self.status, name=self.name)
class TestResultAccumulator:
passed = []
skipped = []
failed = []
crashed = []
def __init__(self, total):
self.total = total
def pass_test(self, batch):
"""These tests had the expected result - passed if UnitTestResult.PASS was expected, etc."""
self.passed += batch
def skip_test(self, batch):
"""These tests were not run at all. For example, if they expect UnitTestResult.TIMEOUT but the run requested no timeouts."""
self.skipped += batch
def fail_test(self, batch):
"""These tests did not crash, but did not have the expected result - they passed if UnitTestResult.FAIL was expected, etc."""
self.failed += batch
def crash_test(self, batch):
"""Segfaults and similar unusual exits from the program under test."""
self.crashed += batch
def __bool__(self):
"""Returns true if everything that was expected to run (based on the command line options) ran and passed."""
# The logic here is redundant, from the length of passed and skipped we should know that failed and crashed are both empty;
# however a false everything-passed result is much worse than a couple of extra checks here.
return len(self.passed) + len(self.skipped) == self.total and len(self.failed) == 0 and len(self.crashed) == 0
def __str__(self):
result = "Passed {count} of {total} tests\n".format(count=len(self.passed), total=self.total)
if self.skipped:
result += "Skipped batches containing {count} tests: ({names})\n".format(count=len(self.skipped),
names=", ".join([test.name for test in self.skipped]))
if self.failed:
result += "Failed batches containing {count} tests: ({names})\n".format(count=len(self.failed),
names=", ".join([test.name for test in self.failed]))
if self.crashed:
result += "Crashed during batches containing {count} tests: ({names})".format(count=len(self.crashed),
names=", ".join([test.name for test in self.crashed]))
return result;
class TestListParser:
"""Each line in the list of tests should be formatted:
<expected return code><space><name of unit test scenario>
For example:
0 test_functionality
Lines beginning # are treated as comments.
"""
def __init__(self, options):
self.verbose = options.verbose
self.filename = options.list
if options.filter is not None:
self.filter = re.compile(options.filter)
else:
self.filter = None
def get(self, batcher):
status_name_re = re.compile(r"^(\d+) ([\w-]+)$")
test_list = []
for line in open(self.filename, mode="rt"):
line = line.strip()
if line == "" or line.startswith("#"):
continue
x = status_name_re.match(line)
if x is None:
print("Could not parse test list file: ", line)
if self.filter and not self.filter.search(line):
continue
t = TestCase(UnitTestResult(int(x.groups()[0])), x.groups()[1])
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
print(t)
test_list.append(t)
return batcher(test_list), TestResultAccumulator(len(test_list))
def run_with_rerun_for_sdl_video(args, timeout):
"""A wrapper for subprocess.run with a workaround for the issue of travis+18.04
intermittently failing to initialise SDL.
"""
# Sanity check on the number of retries. It's a rare failure, a single retry would probably
# be enough.
sdl_retries = 0
while sdl_retries < 10:
res = subprocess.run(args, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
retry = False
if b"Could not initialize SDL_video" in res.stdout:
retry = True
if not retry:
return res
sdl_retries += 1
print("Could not initialise SDL_video error, attempt", sdl_retries)
class WesnothRunner:
def __init__(self, options):
self.verbose = options.verbose
if options.path in ["XCode", "xcode", "Xcode"]:
import glob
path_list = []
build = "Debug" if options.debug_bin else "Release"
pattern = os.path.join("~/Library/Developer/XCode/DerivedData/The_Battle_for_Wesnoth*/Build/Products",
build, "The Battle for Wesnoth.app/Contents/MacOS/The Battle for Wesnoth")
path_list.extend(glob.glob(os.path.expanduser(pattern)))
if len(path_list) == 0:
raise FileNotFoundError("Couldn't find your xcode build dir")
if len(path_list) > 1:
# seems better than choosing one at random
raise RuntimeError("Found more than one xcode build dir")
path = path_list[0]
else:
if options.path is None:
path = os.path.split(os.path.realpath(sys.argv[0]))[0]
else:
path = options.path
if os.path.isdir(path):
path += "/wesnoth"
if options.debug_bin:
path += "-debug"
self.common_args = [path, "--nobanner"]
if options.strict_mode:
self.common_args.append("--log-strict=warning")
if options.clean:
self.common_args.append("--noaddons")
if options.additional_arg is not None:
self.common_args.extend(options.additional_arg)
self.timeout = options.timeout
self.batch_timeout = options.batch_timeout
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
print("Options that will be used for all Wesnoth instances:", repr(self.common_args))
def run_tests(self, test_list, test_summary):
"""Run all of the tests in a single instance of Wesnoth"""
if len(test_list) == 0:
raise ValueError("Running an empty test list")
if len(test_list) > 1:
for test in test_list:
if test.status != UnitTestResult.PASS:
raise NotImplementedError("run_tests doesn't yet support batching tests with non-zero statuses")
expected_result = test_list[0].status
if expected_result == UnitTestResult.TIMEOUT and self.timeout == 0:
test_summary.skip_test(test_list)
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
print('Skipping test', test_list[0].name, 'because timeout is disabled')
return
if (
expected_result == UnitTestResult.BROKE_STRICT_TEST_PASS or
expected_result == UnitTestResult.BROKE_STRICT_TEST_FAIL or
expected_result == UnitTestResult.BROKE_STRICT_TEST_FAIL_BY_DEFEAT or
expected_result == UnitTestResult.BROKE_STRICT_TEST_PASS_BY_VICTORY
) and not options.strict_mode:
test_summary.skip_test(test_list)
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
print('Skipping test', test_list[0].name, 'because strict mode is disabled')
return
args = self.common_args.copy()
for test in test_list:
args.append("-u")
args.append(test.name)
if self.timeout == 0:
timeout = None
elif len(test_list) == 1:
timeout = self.timeout
else:
timeout = self.batch_timeout
if self.verbose >= Verbosity.NAMES_OF_TESTS_RUN:
if len(test_list) == 1:
print("Running test", test_list[0].name)
else:
print("Running {count} tests ({names})".format(count=len(test_list),
names=", ".join([test.name for test in test_list])))
if self.verbose >= Verbosity.SCRIPT_DEBUGGING:
print(repr(args))
try:
res = run_with_rerun_for_sdl_video(args, timeout)
except subprocess.TimeoutExpired as t:
if expected_result != UnitTestResult.TIMEOUT:
print("Timed out (killed by Python timeout implementation), while running batch ({names})".format(
names=", ".join([test.name for test in test_list])))
res = subprocess.CompletedProcess(args, UnitTestResult.TIMEOUT.value, t.output or b'')
if self.verbose >= Verbosity.OUTPUT_OF_PASSING_TESTS:
print(res.stdout.decode('utf-8'))
print("Result:", res.returncode)
if res.returncode < 0:
print("Wesnoth exited because of signal", -res.returncode)
if options.backtrace:
print("Launching GDB for a backtrace...")
gdb_args = ["gdb", "-q", "-batch", "-ex", "set style enabled on", "-ex", "start", "-ex", "continue", "-ex", "bt", "-ex", "quit", "--args"]
gdb_args.extend(args)
subprocess.run(gdb_args, timeout=240)
test_summary.crash_test(test_list)
raise UnexpectedTestStatusException()
returned_result = None
try:
returned_result = UnitTestResult(res.returncode)
except:
print("Wesnoth returned an unexpected value: ", res.returncode)
test_summary.crash_test(test_list)
raise UnexpectedTestStatusException()
if returned_result == expected_result:
test_summary.pass_test(test_list)
else:
if self.verbose < Verbosity.OUTPUT_OF_PASSING_TESTS:
# Batch mode only supports tests that should UnitTestResult.PASS, so the output
# here is likely to have many 'PASS TEST' lines with the failing test last.
# If support for batching other UnitTestResults is added then this needs to filter
# the output from the other tests.
print(res.stdout.decode('utf-8'))
print("Failure, Wesnoth returned", returned_result, "but we expected", expected_result)
test_summary.fail_test(test_list)
raise UnexpectedTestStatusException()
def test_batcher(test_list):
"""A generator function that collects tests into batches which a single
instance of Wesnoth can run.
"""
expected_to_pass = []
for test in test_list:
if test.status == UnitTestResult.PASS:
expected_to_pass.append(test)
else:
yield [test]
if len(expected_to_pass) == 0:
return
if options.batch_max == 0:
yield expected_to_pass
return
while len(expected_to_pass) > 0:
yield expected_to_pass[0:options.batch_max]
expected_to_pass = expected_to_pass[options.batch_max:]
def test_nobatcher(test_list):
"""A generator function that provides the same API as test_batcher but
emits the tests one at a time."""
for test in test_list:
yield [test]
if __name__ == '__main__':
ap = argparse.ArgumentParser()
# The options that are mandatory to support (because they're used in the Travis script)
# are the one-letter forms of verbose, clean, timeout and backtrace.
ap.add_argument("-v", "--verbose", action="count", default=0,
help="Verbose mode. Additional -v arguments increase the verbosity.")
ap.add_argument("-c", "--clean", action="store_true",
help="Clean mode. (Don't load any add-ons. Used for mainline tests.)")
ap.add_argument("-a", "--additional_arg", action="append",
help="Additional arguments to go to wesnoth. For options that start with a hyphen, '--add_argument --data-dir' will give an error, use '--add_argument=--data-dir' instead.")
ap.add_argument("-t", "--timeout", type=int, default=10,
help="New timer value to use, instead of 10s as default. The value 0 means no timer, and also skips tests that expect timeout.")
ap.add_argument("-bt", "--batch-timeout", type=int, default=300,
help="New timer value to use for batched tests, instead of 300s as default.")
ap.add_argument("-bm", "--batch-max", type=int, default=0,
help="Maximum number of tests to do in a batch. Default no limit.")
ap.add_argument("-bd", "--batch-disable", action="store_const", const=1, dest='batch_max',
help="Disable test batching, may be useful if debugging a small subset of tests. Equivalent to --batch-max=1")
ap.add_argument("-s", "--no-strict", dest="strict_mode", action="store_false",
help="Disable strict mode. By default, we run wesnoth with the option --log-strict=warning to ensure errors result in a failed test.")
ap.add_argument("-d", "--debug_bin", action="store_true",
help="Run wesnoth-debug binary instead of wesnoth.")
ap.add_argument("-f", "--filter",
help="Run the subset of tests whose name matches the given string. Regex patterns are supported.")
ap.add_argument("-g", "--backtrace", action="store_true",
help="If we encounter a crash, generate a backtrace using gdb. Must have gdb installed for this option.")
ap.add_argument("-p", "--path", metavar="dir",
help="Path to wesnoth binary. By default assume it is with this script."
+ " If running on a Mac, passing 'xcode' as the path will attempt to locate the binary in Xcode derived data directories.")
ap.add_argument("-l", "--list", metavar="filename",
help="Loads list of tests from the given file.",
default="wml_test_schedule")
# Workaround for argparse not accepting option values that start with a hyphen,
# for example "-a --user-data-dir". https://bugs.python.org/issue9334
# New callers can use "-a=--user-data-dir", but compatibility with the old version
# of run_wml_tests needs support for "-a --user-data-dir".
try:
while True:
i = sys.argv.index("-a")
sys.argv[i] = "=".join(["-a", sys.argv.pop(i + 1)])
except IndexError:
pass
except ValueError:
pass
options = ap.parse_args()
if options.verbose > 1:
print(repr(options))
batcher = test_nobatcher if options.batch_max == 1 else test_batcher
test_list, test_summary = TestListParser(options).get(batcher)
runner = WesnothRunner(options)
for batch in test_list:
try:
runner.run_tests(batch, test_summary)
except UnexpectedTestStatusException as e:
if test_summary:
raise RuntimeError("Logic error in run_wml_tests - a test has failed, but test_summary says everything passed")
if options.verbose > 0 or not test_summary:
print(test_summary)
else:
print("WML Unit Tests Result:", len(test_summary.passed), "of", test_summary.total, "tests passed")
if not test_summary:
sys.exit(1)