Skip to content

Commit 4c7f36c

Browse files
committed
rework worker cache handling
bazelbuild#1: Include details such as the target architecture in the cache folder hash. Previously only the path to rustc and the compilation mode was used to calculate the hash, and this was likely the cause of ICEs. We now include all of the env vars passed into the request in the hash, so that separate folders are created for different crate names, crate versions, and target architectures. bazelbuild#2: A new worker was previously being created for every rustc invocation, because the env parameter contained crate-specific details that caused WorkerKey to change each time. Instead of passing the environment in directly, persist it to a file, and pass it to process-wrapper with --env-file instead. We also use the contents of this file in order to generate the hash mentioned above. This will mean you'll be limited to 4 workers by default; you may want to also pass in --worker_max_instances=Rustc=HOST_CPUS*0.5 or similar, as JIT startup is not a concern for the Rust workers, and parts of rustc's compilation process are unable to take advantage of more than 1 CPU core. bazelbuild#3: Instead of storing incremental files in $TEMP, the worker is now enabled by providing it with a folder that it should store its incremental files in, such as: bazel build --@rules_rust//worker:cache_root=/path/to/cache/files This mimics the behaviour of --disk_cache and --repository_cache, and makes it clear where the files are being persisted to.
1 parent b32648e commit 4c7f36c

File tree

5 files changed

+134
-58
lines changed

5 files changed

+134
-58
lines changed

rust/private/rustc.bzl

+46-6
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def construct_arguments(
340340
maker_path = None,
341341
aspect = False,
342342
emit = ["dep-info", "link"],
343-
use_worker = False):
343+
worker_env_file = None):
344344
"""Builds an Args object containing common rustc flags
345345
346346
Args:
@@ -361,7 +361,7 @@ def construct_arguments(
361361
maker_path (File): An optional clippy marker file
362362
aspect (bool): True if called in an aspect context.
363363
emit (list): Values for the --emit flag to rustc.
364-
use_worker (bool): If True, sets up the arguments in a worker-compatible fashion
364+
worker_env_file (File): If provided, arguments will be set up for use with a worker.
365365
366366
Returns:
367367
tuple: A tuple of the following items
@@ -376,11 +376,16 @@ def construct_arguments(
376376

377377
# Wrapper args first
378378
args = ctx.actions.args()
379-
if use_worker:
379+
if worker_env_file:
380380
# Write the args to a param file that will be used by Bazel to send messages to the worker.
381381
args.set_param_file_format("multiline")
382382
args.use_param_file("@%s", use_always = True)
383383

384+
# The worker needs its environment passed in as a file, which we'll write later. The worker
385+
# expects these arguments to come first.
386+
args.add("--env-file")
387+
args.add(worker_env_file)
388+
384389
for build_env_file in build_env_files:
385390
args.add("--env-file", build_env_file)
386391

@@ -536,7 +541,15 @@ def rustc_compile_action(
536541
- (DepInfo): The transitive dependencies of this crate.
537542
- (DefaultInfo): The output file for this crate, and its runfiles.
538543
"""
544+
539545
worker_binary = ctx.toolchains["@rules_rust//worker:toolchain_type"].worker_binary
546+
if worker_binary:
547+
worker_env_file = ctx.actions.declare_file(ctx.label.name + "_worker_env")
548+
worker_cache_root = ctx.toolchains["@rules_rust//worker:toolchain_type"].cache_root
549+
else:
550+
worker_env_file = None
551+
worker_cache_root = ""
552+
540553
cc_toolchain, feature_configuration = find_cc_toolchain(ctx)
541554

542555
dep_info, build_info = collect_deps(
@@ -573,7 +586,7 @@ def rustc_compile_action(
573586
out_dir,
574587
build_env_files,
575588
build_flags_files,
576-
use_worker = worker_binary != None,
589+
worker_env_file = worker_env_file,
577590
)
578591

579592
if hasattr(ctx.attr, "version") and ctx.attr.version != "0.0.0":
@@ -583,8 +596,35 @@ def rustc_compile_action(
583596

584597
if worker_binary != None:
585598
executable = worker_binary
586-
tools = [ctx.executable._process_wrapper]
587-
arguments = [ctx.executable._process_wrapper.path, toolchain.rustc.path, ctx.var["COMPILATION_MODE"], args]
599+
tools = [ctx.executable._process_wrapper, worker_env_file]
600+
601+
# Add compilation mode to the environment, so the worker includes it in the hash.
602+
env["WORKER_COMPILATION_MODE"] = ctx.var["COMPILATION_MODE"]
603+
604+
# We need to pass the environment in to the worker via a file, as it changes each time.
605+
ctx.actions.write(
606+
output = worker_env_file,
607+
content = "\n".join([k + "=" + v for (k, v) in env.items()]) + "\n",
608+
)
609+
610+
# All the CARGO_* env vars will be passed in via the env file, and we don't want to provide them
611+
# directly to the worker, as that will result in a new WorkerKey being generated each time,
612+
# preventing a worker from being reused. But we can't just clear out env, because on macOS, Bazel
613+
# looks for the presence of specific env vars to decide whether to inject extra required
614+
# variables like DEVELOPER_DIR.
615+
env = dict([
616+
(k, v)
617+
for (k, v) in env.items()
618+
if k.startswith("APPLE_") or k.startswith("XCODE_")
619+
])
620+
621+
arguments = [
622+
ctx.executable._process_wrapper.path,
623+
toolchain.rustc.path,
624+
worker_cache_root,
625+
# the final arg will be passed as a flagfile over the worker protocol
626+
args,
627+
]
588628
execution_requirements = {"supports-workers": "1"}
589629
else:
590630
# Not all execution platforms support a worker.

worker/BUILD

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
2-
load("//worker:toolchain.bzl", "worker_toolchain")
2+
load("//worker:toolchain.bzl", "string_flag", "worker_toolchain")
33
load(":bootstrap.bzl", "rust_cargo_binary")
4-
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
54

65
package(default_visibility = ["//visibility:public"])
76

@@ -14,13 +13,13 @@ bzl_library(
1413
srcs = glob(["**/*.bzl"]),
1514
)
1615

17-
bool_flag(
18-
name = "use_worker",
19-
build_setting_default = False,
20-
)
21-
2216
toolchain_type(name = "toolchain_type")
2317

18+
string_flag(
19+
name = "cache_root",
20+
build_setting_default = "",
21+
)
22+
2423
rust_cargo_binary(
2524
name = "rustc-worker",
2625
srcs = [
@@ -33,7 +32,7 @@ rust_cargo_binary(
3332

3433
worker_toolchain(
3534
name = "worker_toolchain",
36-
enabled = ":use_worker",
35+
cache_root = ":cache_root",
3736
worker_binary = ":rustc-worker",
3837
)
3938

worker/lib/mod.rs

+51-29
Original file line numberDiff line numberDiff line change
@@ -2,53 +2,67 @@ use protobuf::CodedInputStream;
22
use protobuf::CodedOutputStream;
33
use protobuf::Message;
44
use protobuf::ProtobufResult;
5-
use std::collections::hash_map::DefaultHasher;
65
use std::hash::Hash;
76
use std::hash::Hasher;
87
use std::io;
98
use std::io::BufRead;
109
use std::path::PathBuf;
10+
use std::{collections::hash_map::DefaultHasher, fs, path::Path};
1111

1212
mod worker_protocol;
1313
use worker_protocol::WorkRequest;
1414
use worker_protocol::WorkResponse;
1515

1616
pub struct Worker {
17-
program_path: PathBuf,
18-
incremental_dir: std::path::PathBuf,
17+
process_wrapper: PathBuf,
18+
rustc: PathBuf,
19+
cache_root: PathBuf,
1920
}
2021

2122
impl Worker {
22-
pub fn new<C: Into<String>>(
23-
program_path: PathBuf,
24-
rustc: PathBuf,
25-
compilation_mode: C,
26-
) -> io::Result<Self> {
27-
// The incremental cache directory includes the rustc wrapper's hash to discriminate
28-
// between multiple workspaces having the same name (usually __main__).
29-
let mut cache_path = std::env::temp_dir();
23+
pub fn new(process_wrapper: PathBuf, rustc: PathBuf, cache_root: PathBuf) -> Self {
24+
fs::create_dir_all(&cache_root).expect("unable to create cache dir");
25+
26+
Worker {
27+
process_wrapper,
28+
rustc,
29+
cache_root,
30+
}
31+
}
32+
33+
/// Get a worker dir unique to the provided rustc and environment.
34+
fn get_worker_dir(&self, env_file_path: &str) -> PathBuf {
35+
let env_file_text = fs::read_to_string(env_file_path).expect("unable to read env file");
36+
let env_file_base = Path::new(env_file_path)
37+
.file_name()
38+
.unwrap()
39+
.to_string_lossy();
40+
3041
let mut hasher = DefaultHasher::new();
31-
rustc.hash(&mut hasher);
32-
33-
cache_path.push(format!(
34-
"rustc-worker-{}-{}",
35-
hasher.finish(),
36-
compilation_mode.into()
37-
));
38-
std::fs::create_dir_all(&cache_path)?;
39-
Ok(Worker {
40-
program_path,
41-
incremental_dir: cache_path,
42-
})
42+
self.rustc.hash(&mut hasher);
43+
env_file_text.hash(&mut hasher);
44+
45+
let cache_path = self
46+
.cache_root
47+
.join(format!("{}-{}", env_file_base, hasher.finish()));
48+
fs::create_dir_all(&cache_path).expect("unable to create cache dir");
49+
50+
cache_path
4351
}
4452

4553
fn handle_request(&self, request: WorkRequest) -> ProtobufResult<WorkResponse> {
46-
let mut incremental_arg = std::ffi::OsString::from("incremental=");
47-
incremental_arg.push(&self.incremental_dir);
48-
let mut cmd = std::process::Command::new(&self.program_path);
49-
cmd.args(request.get_arguments());
54+
let args = request.get_arguments();
55+
let env_file_path = get_env_file_path(args);
56+
let worker_dir = self.get_worker_dir(&env_file_path);
57+
58+
let mut cmd = std::process::Command::new(&self.process_wrapper);
59+
cmd.args(args);
60+
5061
cmd.arg("--codegen");
62+
let mut incremental_arg = std::ffi::OsString::from("incremental=");
63+
incremental_arg.push(&worker_dir);
5164
cmd.arg(incremental_arg);
65+
5266
let output = cmd.output()?;
5367
Ok(WorkResponse {
5468
request_id: request.request_id,
@@ -84,16 +98,24 @@ impl Worker {
8498
&self,
8599
response_file_path: P,
86100
) -> io::Result<std::process::ExitStatus> {
87-
let file = std::io::BufReader::new(std::fs::File::open(response_file_path)?);
101+
let file = std::io::BufReader::new(fs::File::open(response_file_path)?);
88102

89-
let mut cmd = std::process::Command::new(&self.program_path);
103+
let mut cmd = std::process::Command::new(&self.process_wrapper);
90104
for line in file.lines() {
91105
cmd.arg(line?);
92106
}
93107
cmd.status()
94108
}
95109
}
96110

111+
fn get_env_file_path(args: &[String]) -> &String {
112+
let env_file = args.get(1).expect("missing env file");
113+
if !env_file.ends_with("_worker_env") {
114+
panic!("worker_env file not in expected place")
115+
}
116+
env_file
117+
}
118+
97119
#[cfg(test)]
98120
mod test {
99121
#[test]

worker/main.rs

+13-13
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::{fs, path::PathBuf};
2+
13
use protobuf::ProtobufResult;
24
mod lib;
35

@@ -6,15 +8,11 @@ fn main() -> ProtobufResult<()> {
68
// Always discard the executable name.
79
args.next().unwrap();
810

9-
let program = std::fs::canonicalize(args.next().expect("program name"))?;
10-
let rustc_path = std::fs::canonicalize(args.next().expect("rustc path"))?;
11-
let compilation_mode = args
12-
.next()
13-
.expect("compilation mode")
14-
.into_string()
15-
.expect("compilation mode must be valid utf-8");
16-
// TODO: program and rustc_path will combine when this is merged into rules_rust.
17-
let worker = lib::Worker::new(program, rustc_path, compilation_mode)?;
11+
let process_wrapper = fs::canonicalize(args.next().expect("process_wrapper name"))?;
12+
let rustc_path = fs::canonicalize(args.next().expect("rustc path"))?;
13+
let cache_root = PathBuf::from(args.next().expect("cache root"));
14+
15+
let worker = lib::Worker::new(process_wrapper, rustc_path, cache_root);
1816

1917
// If started as a persistent worker.
2018
if let Some(arg) = args.peek() {
@@ -27,17 +25,19 @@ fn main() -> ProtobufResult<()> {
2725
}
2826
}
2927

30-
// Spawn process as normal.
31-
// The process wrapper does not support response files.
28+
// Otherwise, spawn process as normal.
29+
30+
// The process wrapper does not support response files
3231
let response_file_arg = args
3332
.next()
3433
.unwrap()
3534
.into_string()
3635
.expect("response file path is valid utf-8");
36+
assert!(response_file_arg.starts_with('@'));
37+
let response_file_path = &response_file_arg[1..];
38+
3739
// The response file has to be the last (and only) argument left.
3840
assert!(args.peek().is_none(), "iterator should be consumed!");
39-
assert!(response_file_arg.starts_with("@"));
40-
let response_file_path = &response_file_arg[1..];
4141
let status = worker.once_with_response_file(response_file_path)?;
4242
std::process::exit(status.code().unwrap());
4343
}

worker/toolchain.bzl

+17-2
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,36 @@ Define a worker toolchain.
44

55
load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo")
66

7+
def _string_flag_impl(ctx):
8+
value = ctx.build_setting_value
9+
return BuildSettingInfo(value = value)
10+
11+
string_flag = rule(
12+
build_setting = config.string(flag = True),
13+
implementation = _string_flag_impl,
14+
)
15+
716
def _worker_toolchain_impl(ctx):
8-
if ctx.attr.enabled[BuildSettingInfo].value:
17+
root = ctx.attr.cache_root[BuildSettingInfo].value
18+
19+
if root:
920
binary = ctx.executable.worker_binary
1021
else:
1122
binary = None
1223

1324
toolchain_info = platform_common.ToolchainInfo(
1425
worker_binary = binary,
26+
cache_root = root,
1527
)
1628
return [toolchain_info]
1729

1830
worker_toolchain = rule(
1931
implementation = _worker_toolchain_impl,
2032
attrs = {
2133
"worker_binary": attr.label(allow_single_file = True, executable = True, cfg = "exec"),
22-
"enabled": attr.label(),
34+
"cache_root": attr.label(
35+
doc = """Root folder that the workers should store cache files into.
36+
If empty, worker is disabled.""",
37+
),
2338
},
2439
)

0 commit comments

Comments
 (0)