presubmit.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. #!/usr/bin/env python3
  2. # Copyright 2022 The ChromiumOS Authors
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. import os
  6. import subprocess
  7. import sys
  8. import traceback
  9. from concurrent.futures import ThreadPoolExecutor
  10. from dataclasses import dataclass
  11. from datetime import datetime, timedelta
  12. from fnmatch import fnmatch
  13. from pathlib import Path
  14. from time import sleep
  15. from typing import Callable, List, Sequence, NamedTuple, Optional, Union
  16. from impl.common import (
  17. Command,
  18. ParallelCommands,
  19. all_tracked_files,
  20. cmd,
  21. console,
  22. rich,
  23. strip_ansi_escape_sequences,
  24. verbose,
  25. )
  26. git = cmd("git")
  27. @dataclass
  28. class CheckContext(object):
  29. "Information passed to each check when it's called."
  30. # Whether or not --fix was set and checks should attempt to fix problems they encounter.
  31. fix: bool
  32. # All files that this check should cover (e.g. all python files on a python check).
  33. all_files: List[Path]
  34. # Those files of all_files that were modified locally.
  35. modified_files: List[Path]
  36. # Files that do not exist upstream and have been added locally.
  37. new_files: List[Path]
  38. class Check(NamedTuple):
  39. "Metadata for each check, definining on which files it should run."
  40. # Function to call for this check
  41. check_function: Callable[[CheckContext], Union[Command, None, List[Command]]]
  42. custom_name: Optional[str] = None
  43. # List of globs that this check should be triggered on
  44. files: List[str] = []
  45. python_tools: bool = False
  46. # List of globs to exclude from this check
  47. exclude: List[str] = []
  48. # Whether or not this check can fix issues.
  49. can_fix: bool = False
  50. # Which groups this check belongs to.
  51. groups: List[str] = []
  52. # Priority tasks usually take lonkger and are started first, and will show preliminary output.
  53. priority: bool = False
  54. @property
  55. def name(self):
  56. if self.custom_name:
  57. return self.custom_name
  58. name = self.check_function.__name__
  59. if name.startswith("check_"):
  60. return name[len("check_") :]
  61. return name
  62. @property
  63. def doc(self):
  64. if self.check_function.__doc__:
  65. return self.check_function.__doc__.strip()
  66. else:
  67. return None
  68. class Group(NamedTuple):
  69. "Metadata for a group of checks"
  70. name: str
  71. doc: str
  72. checks: List[str]
  73. def list_file_diff():
  74. """
  75. Lists files there were modified compared to the upstream branch.
  76. Falls back to all files tracked by git if there is no upstream branch.
  77. """
  78. upstream = git("rev-parse @{u}").stdout(check=False)
  79. if upstream:
  80. for line in git("diff --name-status", upstream).lines():
  81. parts = line.split("\t", 1)
  82. file = Path(parts[1].strip())
  83. if file.is_file():
  84. yield (parts[0].strip(), file)
  85. else:
  86. print("WARNING: Not tracking a branch. Checking all files.")
  87. for file in all_tracked_files():
  88. yield ("M", file)
  89. def should_run_check_on_file(check: Check, file: Path):
  90. "Returns true if `file` should be run on `check`."
  91. # Skip third_party except vmm_vhost.
  92. if str(file).startswith("third_party") and not str(file).startswith("third_party/vmm_vhost"):
  93. return False
  94. # Skip excluded files
  95. for glob in check.exclude:
  96. if fnmatch(str(file), glob):
  97. return False
  98. # Match python tools (no file-extension, but with a python shebang line)
  99. if check.python_tools:
  100. if fnmatch(str(file), "tools/*") and file.suffix == "" and file.is_file():
  101. if file.open(errors="ignore").read(32).startswith("#!/usr/bin/env python3"):
  102. return True
  103. # If no constraint is specified, match all files.
  104. if not check.files and not check.python_tools:
  105. return True
  106. # Otherwise, match only those specified by `files`.
  107. for glob in check.files:
  108. if fnmatch(str(file), glob):
  109. return True
  110. return False
  111. class Task(object):
  112. """
  113. Represents a task that needs to be executed to perform a `Check`.
  114. The task can be executed via `Task.execute`, which will update the state variables with
  115. status and progress information.
  116. This information can then be rendered from a separate thread via `Task.status_widget()`
  117. """
  118. def __init__(self, title: str, commands: Sequence[Command], priority: bool):
  119. "Display title."
  120. self.title = title
  121. "Commands to execute."
  122. self.commands = commands
  123. "Task is a priority check."
  124. self.priority = priority
  125. "List of log lines (stdout+stderr) produced by the task."
  126. self.log_lines: List[str] = []
  127. "Task was compleded, but may or not have been successful."
  128. self.done = False
  129. "True if the task completed successfully."
  130. self.success = False
  131. "Time the task was started."
  132. self.start_time = datetime.min
  133. "Duration the task took to execute. Only filled after completion."
  134. self.duration = timedelta.max
  135. "Spinner object for status_widget UI."
  136. self.spinner = rich.spinner.Spinner("point", title)
  137. def status_widget(self):
  138. "Returns a rich console object showing the currrent status of the task."
  139. duration = self.duration if self.done else datetime.now() - self.start_time
  140. title = f"[{duration.total_seconds():6.2f}s] [bold]{self.title}[/bold]"
  141. if self.done:
  142. status: str = "[green]OK [/green]" if self.success else "[red]ERR[/red]"
  143. title_widget = rich.text.Text.from_markup(f"{status} {title}")
  144. else:
  145. self.spinner.text = rich.text.Text.from_markup(title)
  146. title_widget = self.spinner
  147. if not self.priority:
  148. return title_widget
  149. last_lines = [
  150. self.log_lines[-3] if len(self.log_lines) >= 3 else "",
  151. self.log_lines[-2] if len(self.log_lines) >= 2 else "",
  152. self.log_lines[-1] if len(self.log_lines) >= 1 else "",
  153. ]
  154. return rich.console.Group(
  155. *(
  156. # Print last log lines without it's original colors
  157. rich.text.Text(
  158. "│ " + strip_ansi_escape_sequences(log_line),
  159. style="light_slate_grey",
  160. overflow="ellipsis",
  161. no_wrap=True,
  162. )
  163. for log_line in last_lines
  164. ),
  165. rich.text.Text("└ ", end="", style="light_slate_grey"),
  166. title_widget,
  167. rich.text.Text(),
  168. )
  169. def execute(self):
  170. "Execute the task while updating the status variables."
  171. try:
  172. self.start_time = datetime.now()
  173. success = True
  174. if verbose():
  175. for command in self.commands:
  176. self.log_lines.append(f"$ {command}")
  177. # Spawn all commands as separate processes
  178. processes = [
  179. command.popen(stdout=subprocess.PIPE, stderr=subprocess.STDOUT, errors="replace")
  180. for command in self.commands
  181. ]
  182. # The stdout is collected before we wait for the processes to exit so that the UI is
  183. # at least real-time for the first process. Note that in this way, the output for
  184. # other processes other than the first process are not real-time. In addition, we
  185. # can't proactively kill other processes in the same task if any process fails.
  186. for process in processes:
  187. assert process.stdout
  188. for line in iter(process.stdout.readline, ""):
  189. self.log_lines.append(line.strip())
  190. # Wait for all processes to finish and check return code
  191. for process in processes:
  192. if process.wait() != 0:
  193. success = False
  194. self.duration = datetime.now() - self.start_time
  195. self.success = success
  196. self.done = True
  197. except Exception:
  198. self.log_lines.append(traceback.format_exc())
  199. def print_logs(tasks: List[Task]):
  200. "Prints logs of all failed or unfinished tasks."
  201. for task in tasks:
  202. if not task.done:
  203. print()
  204. console.rule(f"{task.title} did not finish", style="yellow")
  205. for line in task.log_lines:
  206. print(line)
  207. if not task.log_lines:
  208. print(f"{task.title} did not output any logs")
  209. for task in tasks:
  210. if task.done and not task.success:
  211. console.rule(f"{task.title} failed", style="red")
  212. for line in task.log_lines:
  213. print(line)
  214. if not task.log_lines:
  215. print(f"{task.title} did not output any logs")
  216. def print_summary(tasks: List[Task]):
  217. "Prints a summary of all task results."
  218. console.rule("Summary")
  219. tasks.sort(key=lambda t: t.duration)
  220. for task in tasks:
  221. title = f"[{task.duration.total_seconds():6.2f}s] [bold]{task.title}[/bold]"
  222. status: str = "[green]OK [/green]" if task.success else "[red]ERR[/red]"
  223. console.print(f"{status} {title}")
  224. def execute_tasks_parallel(tasks: List[Task]):
  225. "Executes the list of tasks in parallel, while rendering live status updates."
  226. with ThreadPoolExecutor() as executor:
  227. try:
  228. # Since tasks are executed in subprocesses, we can use a thread pool to parallelize
  229. # despite the GIL.
  230. task_futures = [executor.submit(lambda: t.execute()) for t in tasks]
  231. # Render task updates while they are executing in the background.
  232. with rich.live.Live(refresh_per_second=30) as live:
  233. while True:
  234. live.update(
  235. rich.console.Group(
  236. *(t.status_widget() for t in tasks),
  237. rich.text.Text(),
  238. rich.text.Text.from_markup(
  239. "[green]Tip:[/green] Press CTRL-C to abort execution and see all logs."
  240. ),
  241. )
  242. )
  243. if all(future.done() for future in task_futures):
  244. break
  245. sleep(0.1)
  246. except KeyboardInterrupt:
  247. print_logs(tasks)
  248. # Force exit to skip waiting for the executor to shutdown. This will kill all
  249. # running subprocesses.
  250. os._exit(1) # type: ignore
  251. # Render error logs and summary after execution
  252. print_logs(tasks)
  253. print_summary(tasks)
  254. if any(not t.success for t in tasks):
  255. raise Exception("Some checks failed")
  256. def execute_tasks_serial(tasks: List[Task]):
  257. "Executes the list of tasks one-by-one"
  258. for task in tasks:
  259. console.rule(task.title)
  260. for command in task.commands:
  261. command.fg()
  262. console.print()
  263. def generate_plan(
  264. checks_list: List[Check],
  265. fix: bool,
  266. run_on_all_files: bool,
  267. ):
  268. "Generates a list of `Task`s to execute the checks provided in `checks_list`"
  269. all_files = [*all_tracked_files()]
  270. file_diff = [*list_file_diff()]
  271. new_files = [f for (s, f) in file_diff if s == "A"]
  272. if run_on_all_files:
  273. modified_files = all_files
  274. else:
  275. modified_files = [f for (s, f) in file_diff if s in ("M", "A")]
  276. tasks: List[Task] = []
  277. unsupported_checks: List[str] = []
  278. for check in checks_list:
  279. if fix and not check.can_fix:
  280. continue
  281. context = CheckContext(
  282. fix=fix,
  283. all_files=[f for f in all_files if should_run_check_on_file(check, f)],
  284. modified_files=[f for f in modified_files if should_run_check_on_file(check, f)],
  285. new_files=[f for f in new_files if should_run_check_on_file(check, f)],
  286. )
  287. if context.modified_files:
  288. maybe_commands = check.check_function(context)
  289. if maybe_commands is None:
  290. unsupported_checks.append(check.name)
  291. continue
  292. commands_list = maybe_commands if isinstance(maybe_commands, list) else [maybe_commands]
  293. title = f"fixing {check.name}" if fix else check.name
  294. tasks.append(Task(title, commands_list, check.priority))
  295. if unsupported_checks:
  296. console.print("[yellow]Warning:[/yellow] The following checks cannot be run:")
  297. for unsupported_check in unsupported_checks:
  298. console.print(f" - {unsupported_check}")
  299. console.print()
  300. console.print("[green]Tip:[/green] Use the dev container to run presubmits:")
  301. console.print()
  302. console.print(
  303. f" [blue] $ tools/dev_container tools/presubmit {' '.join(sys.argv[1:])}[/blue]"
  304. )
  305. console.print()
  306. if not os.access("/dev/kvm", os.W_OK):
  307. console.print("[yellow]Warning:[/yellow] Cannot access KVM. Integration tests are not run.")
  308. # Sort so that priority tasks are launched (and rendered) first
  309. tasks.sort(key=lambda t: (t.priority, t.title), reverse=True)
  310. return tasks
  311. def run_checks(
  312. checks_list: List[Check],
  313. fix: bool,
  314. run_on_all_files: bool,
  315. parallel: bool,
  316. ):
  317. """
  318. Runs all checks in checks_list.
  319. Arguments:
  320. fix: Run fixes instead of checks on `Check`s that support it.
  321. run_on_all_files: Do not use git delta, but run on all files.
  322. nightly_fmt: Use nightly version of rust tooling.
  323. parallel: Run tasks in parallel.
  324. """
  325. tasks = generate_plan(checks_list, fix, run_on_all_files)
  326. if len(tasks) == 1:
  327. parallel = False
  328. if parallel:
  329. execute_tasks_parallel(list(tasks))
  330. else:
  331. execute_tasks_serial(list(tasks))