testvm.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. # Copyright 2021 The ChromiumOS Authors
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. from enum import Enum
  5. import json
  6. import os
  7. import socket
  8. import subprocess
  9. import sys
  10. import time
  11. import typing
  12. from contextlib import closing
  13. from pathlib import Path
  14. from random import randrange
  15. from typing import Dict, List, Literal, Optional, Tuple
  16. from .common import CACHE_DIR, download_file, cmd, rich, console
  17. KVM_SUPPORT = os.access("/dev/kvm", os.W_OK)
  18. Arch = Literal["x86_64", "aarch64"]
  19. ARCH_OPTIONS = typing.cast(Tuple[Arch], typing.get_args(Arch))
  20. SCRIPT_DIR = Path(__file__).parent.resolve()
  21. SRC_DIR = SCRIPT_DIR.joinpath("testvm")
  22. ID_RSA = SRC_DIR.joinpath("id_rsa")
  23. BASE_IMG_VERSION = open(SRC_DIR.joinpath("version"), "r").read().strip()
  24. IMAGE_DIR_URL = "https://storage.googleapis.com/crosvm/testvm"
  25. def cargo_target_dir():
  26. # Do not call cargo if we have the environment variable specified. This
  27. # allows the script to be used when cargo is not available but the target
  28. # dir is known.
  29. env_target = os.environ.get("CARGO_TARGET_DIR")
  30. if env_target:
  31. return Path(env_target)
  32. text = subprocess.run(
  33. ["cargo", "metadata", "--no-deps", "--format-version=1"],
  34. check=True,
  35. capture_output=True,
  36. text=True,
  37. ).stdout
  38. metadata = json.loads(text)
  39. return Path(metadata["target_directory"])
  40. def data_dir(arch: Arch):
  41. return CACHE_DIR.joinpath("crosvm_tools").joinpath(arch)
  42. def pid_path(arch: Arch):
  43. return data_dir(arch).joinpath("pid")
  44. def ssh_port_path(arch: Arch):
  45. return data_dir(arch).joinpath("ssh_port")
  46. def log_path(arch: Arch):
  47. return data_dir(arch).joinpath("vm_log")
  48. def base_img_name(arch: Arch):
  49. return f"base-{arch}-{BASE_IMG_VERSION}.qcow2"
  50. def base_img_url(arch: Arch):
  51. return f"{IMAGE_DIR_URL}/{base_img_name(arch)}"
  52. def base_img_path(arch: Arch):
  53. return data_dir(arch).joinpath(base_img_name(arch))
  54. def rootfs_img_path(arch: Arch):
  55. return data_dir(arch).joinpath(f"rootfs-{arch}-{BASE_IMG_VERSION}.qcow2")
  56. def ssh_port(arch: Arch) -> int:
  57. # Default to fixed ports used by VMs started by previous versions of this script.
  58. # TODO(b/275717656): Remove after a while
  59. if not ssh_port_path(arch).exists():
  60. return SSH_PORTS[arch]
  61. return int(ssh_port_path(arch).read_text())
  62. ssh = cmd("ssh")
  63. qemu_img = cmd("qemu-img")
  64. # List of ports to use for SSH for each architecture
  65. # TODO(b/275717656): Remove after a while
  66. SSH_PORTS: Dict[Arch, int] = {
  67. "x86_64": 9000,
  68. "aarch64": 9001,
  69. }
  70. # QEMU arguments shared by all architectures
  71. SHARED_ARGS: List[str] = [
  72. "-display none",
  73. "-device virtio-net-pci,netdev=net0",
  74. "-smp 8",
  75. "-m 4G",
  76. ]
  77. # QEMU command for each architecture
  78. ARCH_TO_QEMU: Dict[Arch, cmd] = {
  79. "x86_64": cmd(
  80. "qemu-system-x86_64",
  81. "-cpu host",
  82. "-enable-kvm" if KVM_SUPPORT else None,
  83. *SHARED_ARGS,
  84. ),
  85. "aarch64": cmd(
  86. "qemu-system-aarch64",
  87. "-M virt",
  88. "-machine virt,virtualization=true,gic-version=3",
  89. "-cpu cortex-a57",
  90. "-bios /usr/share/qemu-efi-aarch64/QEMU_EFI.fd",
  91. *SHARED_ARGS,
  92. ),
  93. }
  94. def ssh_opts(arch: Arch) -> Dict[str, str]:
  95. return {
  96. "Port": str(ssh_port(arch)),
  97. "User": "crosvm",
  98. "StrictHostKeyChecking": "no",
  99. "UserKnownHostsFile": "/dev/null",
  100. "LogLevel": "ERROR",
  101. "IdentityFile": str(ID_RSA),
  102. }
  103. def ssh_cmd_args(arch: Arch):
  104. return [f"-o{k}={v}" for k, v in ssh_opts(arch).items()]
  105. def ssh_exec(arch: Arch, cmd: Optional[str] = None):
  106. os.chmod(ID_RSA, 0o600)
  107. ssh.with_args(
  108. "localhost",
  109. *ssh_cmd_args(arch),
  110. *(["-T", cmd] if cmd else []),
  111. ).fg(check=False)
  112. def ping_vm(arch: Arch):
  113. os.chmod(ID_RSA, 0o600)
  114. return ssh(
  115. "localhost",
  116. *ssh_cmd_args(arch),
  117. "-oConnectTimeout=1",
  118. "-T exit",
  119. ).success()
  120. def write_pid_file(arch: Arch, pid: int):
  121. with open(pid_path(arch), "w") as pid_file:
  122. pid_file.write(str(pid))
  123. def read_pid_file(arch: Arch):
  124. if not pid_path(arch).exists():
  125. return None
  126. with open(pid_path(arch), "r") as pid_file:
  127. return int(pid_file.read())
  128. def is_port_available(port: int):
  129. with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
  130. return sock.connect_ex(("127.0.0.1", port)) != 0
  131. def pick_ssh_port():
  132. for _ in range(5):
  133. port = randrange(1024, 32768)
  134. if is_port_available(port):
  135. return port
  136. raise Exception("Could not find a free port")
  137. def run_qemu(
  138. arch: Arch,
  139. hda: Path,
  140. background: bool = False,
  141. ):
  142. port = pick_ssh_port()
  143. qemu = ARCH_TO_QEMU[arch]
  144. if background:
  145. serial = f"file:{data_dir(arch).joinpath('vm_log')}"
  146. else:
  147. serial = "stdio"
  148. console.print(f"Booting {arch} VM with disk", hda)
  149. command = qemu.with_args(
  150. f"-hda {hda}",
  151. f"-serial {serial}",
  152. f"-netdev user,id=net0,hostfwd=tcp::{port}-:22",
  153. )
  154. if background:
  155. # Start qemu in a new session so it can outlive this process.
  156. process = command.popen(
  157. start_new_session=background, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
  158. )
  159. # Wait for 1s to see if the qemu is staying alive.
  160. assert process.stdout
  161. for _ in range(10):
  162. if process.poll() is not None:
  163. sys.stdout.write(process.stdout.read())
  164. print(f"'{command}' exited with code {process.returncode}")
  165. sys.exit(process.returncode)
  166. time.sleep(0.1)
  167. # Print any warnings qemu might produce.
  168. sys.stdout.write(process.stdout.read(0))
  169. sys.stdout.flush()
  170. process.stdout.close()
  171. # Save port and pid so we can manage the process later.
  172. ssh_port_path(arch).write_text(str(port))
  173. write_pid_file(arch, process.pid)
  174. else:
  175. command.fg()
  176. def run_vm(arch: Arch, background: bool = False):
  177. run_qemu(
  178. arch,
  179. rootfs_img_path(arch),
  180. background=background,
  181. )
  182. def is_running(arch: Arch):
  183. pid = read_pid_file(arch)
  184. if pid is None:
  185. return False
  186. # Send signal 0 to check if the process is alive
  187. try:
  188. os.kill(pid, 0)
  189. except OSError:
  190. return False
  191. return True
  192. def kill_vm(arch: Arch):
  193. pid = read_pid_file(arch)
  194. if pid:
  195. try:
  196. os.kill(pid, 9)
  197. # Ping with signal 0 until we get an OSError indicating the process has shutdown.
  198. while True:
  199. os.kill(pid, 0)
  200. except OSError:
  201. return
  202. def build_if_needed(arch: Arch, reset: bool = False):
  203. if reset and is_running(arch):
  204. print(f"Killing existing {arch} VM to perform reset...")
  205. kill_vm(arch)
  206. time.sleep(1)
  207. data_dir(arch).mkdir(parents=True, exist_ok=True)
  208. base_img = base_img_path(arch)
  209. if not base_img.exists():
  210. print(f"Downloading {arch} base image ({base_img_url(arch)})...")
  211. download_file(base_img_url(arch), base_img_path(arch))
  212. rootfs_img = rootfs_img_path(arch)
  213. if not rootfs_img.exists() or reset:
  214. # The rootfs is backed by the base image generated above. So we can
  215. # easily reset to a clean VM by rebuilding an empty rootfs image.
  216. print(f"Creating {arch} rootfs overlay...")
  217. qemu_img.with_args(
  218. "create",
  219. "-f qcow2",
  220. "-F qcow2",
  221. f"-b {base_img}",
  222. rootfs_img,
  223. "8G",
  224. ).fg(quiet=True)
  225. def up(arch: Arch, reset: bool = False, wait: bool = False, timeout: int = 120):
  226. "Starts the test vm if it's not already running. Optionally wait for it to be reachable."
  227. # Try waiting for the running VM, if it does not become reachable, kill it.
  228. if is_running(arch):
  229. if not wait:
  230. console.print(f"{arch} VM is running on port {ssh_port(arch)}")
  231. return
  232. if not wait_until_reachable(arch, timeout):
  233. if is_running(arch):
  234. print(f"{arch} VM is not reachable. Restarting it.")
  235. kill_vm(arch)
  236. else:
  237. print(f"{arch} VM stopped. Starting it again.")
  238. else:
  239. console.print(f"{arch} VM is running on port {ssh_port(arch)}")
  240. return
  241. build_if_needed(arch, reset)
  242. run_qemu(
  243. arch,
  244. rootfs_img_path(arch),
  245. background=True,
  246. )
  247. if wait:
  248. if wait_until_reachable(arch, timeout):
  249. console.print(f"{arch} VM is running on port {ssh_port(arch)}")
  250. else:
  251. raise Exception(f"Waiting for {arch} VM timed out.")
  252. def wait_until_reachable(arch: Arch, timeout: int = 120):
  253. "Blocks until the VM is ready to use."
  254. if not is_running(arch):
  255. return False
  256. if ping_vm(arch):
  257. return True
  258. with rich.live.Live(
  259. rich.spinner.Spinner("point", f"Waiting for {arch} VM to become reachable...")
  260. ):
  261. start_time = time.time()
  262. while (time.time() - start_time) < timeout:
  263. if not is_running(arch):
  264. return False
  265. if ping_vm(arch):
  266. return True
  267. return False
  268. class VmState(Enum):
  269. REACHABLE = "Reachable"
  270. RUNNING_NOT_REACHABLE = "Running, but not reachable"
  271. STOPPED = "Stopped"
  272. def state(arch: Arch):
  273. if is_running(arch):
  274. if ping_vm(arch):
  275. return VmState.REACHABLE
  276. else:
  277. return VmState.RUNNING_NOT_REACHABLE
  278. else:
  279. return VmState.STOPPED