revmc/compiler/
mod.rs

1//! EVM bytecode compiler implementation.
2
3use crate::{Backend, Builder, Bytecode, EvmCompilerFn, EvmContext, EvmStack, Result};
4use revm_interpreter::{Gas, InputsImpl};
5use revm_primitives::{hardfork::SpecId, Bytes};
6use revmc_backend::{
7    eyre::ensure, Attribute, FunctionAttributeLocation, Linkage, OptimizationLevel,
8};
9use revmc_builtins::Builtins;
10use revmc_context::RawEvmCompilerFn;
11use std::{
12    fs,
13    io::{self, Write},
14    mem,
15    path::{Path, PathBuf},
16};
17
18// TODO: Somehow have a config to tell the backend to assume that stack stores are unobservable,
19// making it eliminate redundant stores for values outside the stack length when optimized away.
20// E.g. `PUSH0 POP` gets fully optimized away, but the `store i256 0, ptr %stack` will still get
21// emitted.
22// Use this when `stack` is passed in arguments.
23
24// TODO: Get rid of `cfg!(target_endian)` calls.
25
26// TODO: Test on big-endian hardware.
27// It probably doesn't work when loading Rust U256 into native endianness.
28
29mod translate;
30use translate::{FcxConfig, FunctionCx};
31
32/// EVM bytecode compiler.
33///
34/// This currently represents one single-threaded IR context and module, which can be used to
35/// compile multiple functions as JIT or AOT.
36///
37/// Functions can be incrementally added with [`translate`], and then either written to an object
38/// file with [`write_object`] when in AOT mode, or JIT-compiled with [`jit_function`].
39///
40/// Performing either of these operations finalizes the module, and no more functions can be added
41/// afterwards until [`clear`] is called, which will reset the module to its initial state.
42///
43/// [`translate`]: EvmCompiler::translate
44/// [`write_object`]: EvmCompiler::write_object
45/// [`jit_function`]: EvmCompiler::jit_function
46/// [`clear`]: EvmCompiler::clear
47#[allow(missing_debug_implementations)]
48pub struct EvmCompiler<B: Backend> {
49    name: Option<String>,
50    backend: B,
51    out_dir: Option<PathBuf>,
52    config: FcxConfig,
53    builtins: Builtins<B>,
54
55    dump_assembly: bool,
56    dump_unopt_assembly: bool,
57
58    finalized: bool,
59}
60
61impl<B: Backend> EvmCompiler<B> {
62    /// Creates a new instance of the compiler with the given backend.
63    pub fn new(backend: B) -> Self {
64        Self {
65            name: None,
66            backend,
67            out_dir: None,
68            config: FcxConfig::default(),
69            builtins: Builtins::new(),
70            dump_assembly: true,
71            dump_unopt_assembly: false,
72            finalized: false,
73        }
74    }
75
76    /// Sets the name of the module.
77    pub fn set_module_name(&mut self, name: impl Into<String>) {
78        let name = name.into();
79        self.backend.set_module_name(&name);
80        self.name = Some(name);
81    }
82
83    fn is_aot(&self) -> bool {
84        self.backend.is_aot()
85    }
86
87    fn is_jit(&self) -> bool {
88        !self.is_aot()
89    }
90
91    /// Returns the output directory.
92    pub fn out_dir(&self) -> Option<&Path> {
93        self.out_dir.as_deref()
94    }
95
96    /// Dumps intermediate outputs and other debug info to the given directory after compilation.
97    ///
98    /// Disables dumping if `output_dir` is `None`.
99    pub fn set_dump_to(&mut self, output_dir: Option<PathBuf>) {
100        self.backend.set_is_dumping(output_dir.is_some());
101        self.config.comments = output_dir.is_some();
102        self.out_dir = output_dir;
103    }
104
105    /// Dumps assembly to the output directory.
106    ///
107    /// This can be quite slow.
108    ///
109    /// Defaults to `true`.
110    pub fn dump_assembly(&mut self, yes: bool) {
111        self.dump_assembly = yes;
112    }
113
114    /// Dumps the unoptimized assembly to the output directory.
115    ///
116    /// This can be quite slow.
117    ///
118    /// Defaults to `false`.
119    pub fn dump_unopt_assembly(&mut self, yes: bool) {
120        self.dump_unopt_assembly = yes;
121    }
122
123    /// Returns the optimization level.
124    pub fn opt_level(&self) -> OptimizationLevel {
125        self.backend.opt_level()
126    }
127
128    /// Sets the optimization level.
129    ///
130    /// Note that some backends may not support setting the optimization level after initialization.
131    ///
132    /// Defaults to the backend's initial optimization level.
133    pub fn set_opt_level(&mut self, level: OptimizationLevel) {
134        self.backend.set_opt_level(level);
135    }
136
137    /// Sets whether to enable debug assertions.
138    ///
139    /// These are useful for debugging, but they do a moderate performance penalty due to the
140    /// insertion of extra checks and removal of certain assumptions.
141    ///
142    /// Defaults to `cfg!(debug_assertions)`.
143    pub fn debug_assertions(&mut self, yes: bool) {
144        self.backend.set_debug_assertions(yes);
145        self.config.debug_assertions = yes;
146    }
147
148    /// Sets whether to enable frame pointers.
149    ///
150    /// This is useful for profiling and debugging, but it incurs a very slight performance penalty.
151    ///
152    /// Defaults to `cfg!(debug_assertions)`.
153    pub fn frame_pointers(&mut self, yes: bool) {
154        self.config.frame_pointers = yes;
155    }
156
157    /// Sets whether to allocate the stack locally.
158    ///
159    /// If this is set to `true`, the stack pointer argument will be ignored and the stack will be
160    /// allocated in the function.
161    ///
162    /// This setting will fail at runtime if the bytecode suspends execution, as it cannot be
163    /// restored afterwards.
164    ///
165    /// Defaults to `false`.
166    pub fn local_stack(&mut self, yes: bool) {
167        self.config.local_stack = yes;
168    }
169
170    /// Sets whether to treat the stack length as observable outside the function.
171    ///
172    /// This also implies that the length is loaded in the beginning of the function, meaning
173    /// that a function can be executed with an initial stack.
174    ///
175    /// If this is set to `true`, the stack length must be passed in the arguments.
176    ///
177    /// This is useful to inspect the stack length after the function has been executed, but it does
178    /// incur a performance penalty as the length will be stored at all return sites.
179    ///
180    /// Defaults to `false`.
181    pub fn inspect_stack_length(&mut self, yes: bool) {
182        self.config.inspect_stack_length = yes;
183    }
184
185    /// Sets whether to enable stack bound checks.
186    ///
187    /// Defaults to `true`.
188    ///
189    /// # Safety
190    ///
191    /// Removing stack length checks may improve compilation speed and performance, but will result
192    /// in **undefined behavior** if the stack length overflows at runtime, rather than a
193    /// [`StackUnderflow`]/[`StackOverflow`] result.
194    ///
195    /// [`StackUnderflow`]: crate::interpreter::InstructionResult::StackUnderflow
196    /// [`StackOverflow`]: crate::interpreter::InstructionResult::StackOverflow
197    pub unsafe fn stack_bound_checks(&mut self, yes: bool) {
198        self.config.stack_bound_checks = yes;
199    }
200
201    /// Sets whether to track gas costs.
202    ///
203    /// Disabling this will greatly improves compilation speed and performance, at the cost of not
204    /// being able to check for gas exhaustion.
205    ///
206    /// Note that this does not disable gas usage in certain instructions, mainly the ones that
207    /// are implemented as builtins.
208    ///
209    /// Use with care, as executing a function with gas disabled may result in an infinite loop.
210    ///
211    /// Defaults to `true`.
212    pub fn gas_metering(&mut self, yes: bool) {
213        self.config.gas_metering = yes;
214    }
215
216    /// Translates the given EVM bytecode into an internal function.
217    ///
218    /// NOTE: `name` must be unique for each function, as it is used as the name of the final
219    /// symbol.
220    pub fn translate<'a>(
221        &mut self,
222        name: &str,
223        input: impl Into<EvmCompilerInput<'a>>,
224        spec_id: SpecId,
225    ) -> Result<B::FuncId> {
226        ensure!(cfg!(target_endian = "little"), "only little-endian is supported");
227        ensure!(!self.finalized, "cannot compile more functions after finalizing the module");
228        let bytecode = self.parse(input.into(), spec_id)?;
229        self.translate_inner(name, &bytecode)
230    }
231
232    /// (JIT) Compiles the given EVM bytecode into a JIT function.
233    ///
234    /// See [`translate`](Self::translate) for more information.
235    ///
236    /// # Safety
237    ///
238    /// The returned function pointer is owned by the module, and must not be called after the
239    /// module is cleared or the function is freed.
240    pub unsafe fn jit<'a>(
241        &mut self,
242        name: &str,
243        bytecode: impl Into<EvmCompilerInput<'a>>,
244        spec_id: SpecId,
245    ) -> Result<EvmCompilerFn> {
246        let id = self.translate(name, bytecode.into(), spec_id)?;
247        unsafe { self.jit_function(id) }
248    }
249
250    /// (JIT) Finalizes the module and JITs the given function.
251    ///
252    /// # Safety
253    ///
254    /// The returned function pointer is owned by the module, and must not be called after the
255    /// module is cleared or the function is freed.
256    pub unsafe fn jit_function(&mut self, id: B::FuncId) -> Result<EvmCompilerFn> {
257        ensure!(self.is_jit(), "cannot JIT functions during AOT compilation");
258        self.finalize()?;
259        let addr = self.backend.jit_function(id)?;
260        debug_assert!(addr != 0);
261        Ok(EvmCompilerFn::new(unsafe { std::mem::transmute::<usize, RawEvmCompilerFn>(addr) }))
262    }
263
264    /// (AOT) Writes the compiled object to the given file.
265    pub fn write_object_to_file(&mut self, path: &Path) -> Result<()> {
266        let file = fs::File::create(path)?;
267        let mut writer = io::BufWriter::new(file);
268        self.write_object(&mut writer)?;
269        writer.flush()?;
270        Ok(())
271    }
272
273    /// (AOT) Finalizes the module and writes the compiled object to the given writer.
274    pub fn write_object<W: io::Write>(&mut self, w: W) -> Result<()> {
275        ensure!(self.is_aot(), "cannot write AOT object during JIT compilation");
276        self.finalize()?;
277        self.backend.write_object(w)
278    }
279
280    /// (JIT) Frees the memory associated with a single function.
281    ///
282    /// Note that this will not reset the state of the internal module even if all functions are
283    /// freed with this function. Use [`clear`] to reset the module.
284    ///
285    /// [`clear`]: EvmCompiler::clear
286    ///
287    /// # Safety
288    ///
289    /// Because this function invalidates any pointers retrieved from the corresponding module, it
290    /// should only be used when none of the functions from that module are currently executing and
291    /// none of the `fn` pointers are called afterwards.
292    pub unsafe fn free_function(&mut self, id: B::FuncId) -> Result<()> {
293        self.backend.free_function(id)
294    }
295
296    /// Frees all functions and resets the state of the internal module, allowing for new functions
297    /// to be compiled.
298    ///
299    /// # Safety
300    ///
301    /// Because this function invalidates any pointers retrieved from the corresponding module, it
302    /// should only be used when none of the functions from that module are currently executing and
303    /// none of the `fn` pointers are called afterwards.
304    pub unsafe fn clear(&mut self) -> Result<()> {
305        self.builtins.clear();
306        self.finalized = false;
307        self.backend.free_all_functions()
308    }
309
310    /// Parses the given EVM bytecode. Not public API.
311    #[doc(hidden)] // Not public API.
312    pub fn parse<'a>(
313        &mut self,
314        input: EvmCompilerInput<'a>,
315        spec_id: SpecId,
316    ) -> Result<Bytecode<'a>> {
317        let EvmCompilerInput::Code(bytecode) = input;
318
319        let mut bytecode = Bytecode::new(bytecode, spec_id);
320        bytecode.analyze()?;
321        if let Some(dump_dir) = &self.dump_dir() {
322            Self::dump_bytecode(dump_dir, &bytecode)?;
323        }
324        Ok(bytecode)
325    }
326
327    #[instrument(name = "translate", level = "debug", skip_all)]
328    fn translate_inner(&mut self, name: &str, bytecode: &Bytecode<'_>) -> Result<B::FuncId> {
329        ensure!(self.backend.function_name_is_unique(name), "function name `{name}` is not unique");
330        let linkage = Linkage::Public;
331        let (bcx, id) = Self::make_builder(&mut self.backend, &self.config, name, linkage)?;
332        FunctionCx::translate(bcx, self.config, &mut self.builtins, bytecode)?;
333        Ok(id)
334    }
335
336    #[instrument(level = "debug", skip_all)]
337    fn finalize(&mut self) -> Result<()> {
338        if self.finalized {
339            return Ok(());
340        }
341        self.finalized = true;
342
343        if let Some(dump_dir) = &self.dump_dir() {
344            let path = dump_dir.join("unopt").with_extension(self.backend.ir_extension());
345            self.dump_ir(&path)?;
346
347            // Dump IR before verifying for better debugging.
348            self.verify_module()?;
349
350            if self.dump_assembly && self.dump_unopt_assembly {
351                let path = dump_dir.join("unopt.s");
352                self.dump_disasm(&path)?;
353            }
354        } else {
355            self.verify_module()?;
356        }
357
358        self.optimize_module()?;
359
360        if let Some(dump_dir) = &self.dump_dir() {
361            let path = dump_dir.join("opt").with_extension(self.backend.ir_extension());
362            self.dump_ir(&path)?;
363
364            if self.dump_assembly {
365                let path = dump_dir.join("opt.s");
366                self.dump_disasm(&path)?;
367            }
368        }
369
370        Ok(())
371    }
372
373    #[instrument(level = "debug", skip_all)]
374    fn make_builder<'a>(
375        backend: &'a mut B,
376        config: &FcxConfig,
377        name: &str,
378        linkage: Linkage,
379    ) -> Result<(B::Builder<'a>, B::FuncId)> {
380        fn size_align<T>(i: usize) -> (usize, usize, usize) {
381            (i, mem::size_of::<T>(), mem::align_of::<T>())
382        }
383
384        let i8 = backend.type_int(8);
385        let ptr = backend.type_ptr();
386        let (ret, params, param_names, ptr_attrs) = (
387            Some(i8),
388            &[ptr, ptr, ptr, ptr, ptr],
389            &[
390                "arg.gas.addr",
391                "arg.stack.addr",
392                "arg.stack_len.addr",
393                "arg.input.addr",
394                "arg.ecx.addr",
395            ],
396            &[
397                size_align::<Gas>(0),
398                size_align::<EvmStack>(1),
399                size_align::<usize>(2),
400                size_align::<InputsImpl>(3),
401                size_align::<EvmContext<'_>>(4),
402            ],
403        );
404        debug_assert_eq!(params.len(), param_names.len());
405        let (mut bcx, id) = backend.build_function(name, ret, params, param_names, linkage)?;
406
407        // Function attributes.
408        let function_attributes = default_attrs::for_fn()
409            .chain(config.frame_pointers.then_some(Attribute::AllFramePointers))
410            // We can unwind in panics, which are present only in debug assertions.
411            .chain((!config.debug_assertions).then_some(Attribute::NoUnwind));
412        for attr in function_attributes {
413            bcx.add_function_attribute(None, attr, FunctionAttributeLocation::Function);
414        }
415
416        // Pointer argument attributes.
417        if !config.debug_assertions {
418            for &(i, size, align) in ptr_attrs {
419                let attrs = default_attrs::for_sized_ptr((size, align))
420                    // `Gas` is aliased in `EvmContext`.
421                    .chain((i != 0).then_some(Attribute::NoAlias));
422                for attr in attrs {
423                    let loc = FunctionAttributeLocation::Param(i as _);
424                    bcx.add_function_attribute(None, attr, loc);
425                }
426            }
427        }
428
429        Ok((bcx, id))
430    }
431
432    #[instrument(level = "debug", skip_all)]
433    fn dump_ir(&mut self, path: &Path) -> Result<()> {
434        self.backend.dump_ir(path)
435    }
436
437    #[instrument(level = "debug", skip_all)]
438    fn dump_disasm(&mut self, path: &Path) -> Result<()> {
439        self.backend.dump_disasm(path)
440    }
441
442    #[instrument(level = "debug", skip_all)]
443    fn verify_module(&mut self) -> Result<()> {
444        self.backend.verify_module()
445    }
446
447    #[instrument(level = "debug", skip_all)]
448    fn optimize_module(&mut self) -> Result<()> {
449        self.backend.optimize_module()
450    }
451
452    #[instrument(level = "debug", skip_all)]
453    fn dump_bytecode(dump_dir: &Path, bytecode: &Bytecode<'_>) -> Result<()> {
454        {
455            let file = fs::File::create(dump_dir.join("bytecode.txt"))?;
456            let mut writer = io::BufWriter::new(file);
457            write!(writer, "{bytecode}")?;
458            writer.flush()?;
459        }
460
461        {
462            let file = fs::File::create(dump_dir.join("bytecode.dbg.txt"))?;
463            let mut writer = io::BufWriter::new(file);
464            writeln!(writer, "{bytecode:#?}")?;
465            writer.flush()?;
466        }
467
468        Ok(())
469    }
470
471    fn dump_dir(&self) -> Option<PathBuf> {
472        let mut dump_dir = self.out_dir.clone()?;
473        if let Some(name) = &self.name {
474            dump_dir.push(name.replace(char::is_whitespace, "_"));
475        }
476        if !dump_dir.exists() {
477            let _ = fs::create_dir_all(&dump_dir);
478        }
479        Some(dump_dir)
480    }
481}
482
483/// [`EvmCompiler`] input.
484#[allow(missing_debug_implementations)]
485pub enum EvmCompilerInput<'a> {
486    /// EVM bytecode.
487    Code(&'a [u8]),
488}
489
490impl<'a> From<&'a [u8]> for EvmCompilerInput<'a> {
491    fn from(code: &'a [u8]) -> Self {
492        EvmCompilerInput::Code(code)
493    }
494}
495
496impl<'a> From<&'a Vec<u8>> for EvmCompilerInput<'a> {
497    fn from(code: &'a Vec<u8>) -> Self {
498        EvmCompilerInput::Code(code)
499    }
500}
501
502impl<'a> From<&'a Bytes> for EvmCompilerInput<'a> {
503    fn from(code: &'a Bytes) -> Self {
504        EvmCompilerInput::Code(code)
505    }
506}
507
508#[allow(dead_code)]
509mod default_attrs {
510    use revmc_backend::Attribute;
511
512    pub(crate) fn for_fn() -> impl Iterator<Item = Attribute> {
513        [
514            Attribute::WillReturn,      // Always returns.
515            Attribute::NoSync,          // No thread synchronization.
516            Attribute::NativeTargetCpu, // Optimization.
517            Attribute::Speculatable,    // No undefined behavior.
518            Attribute::NoRecurse,       // Revm is not recursive.
519        ]
520        .into_iter()
521    }
522
523    pub(crate) fn for_param() -> impl Iterator<Item = Attribute> {
524        [Attribute::NoUndef].into_iter()
525    }
526
527    pub(crate) fn for_ptr() -> impl Iterator<Item = Attribute> {
528        for_param().chain([Attribute::NoCapture])
529    }
530
531    pub(crate) fn for_sized_ptr((size, align): (usize, usize)) -> impl Iterator<Item = Attribute> {
532        for_ptr().chain([Attribute::Dereferenceable(size as u64), Attribute::Align(align as u64)])
533    }
534
535    pub(crate) fn for_ptr_t<T>() -> impl Iterator<Item = Attribute> {
536        for_sized_ptr(size_align::<T>())
537    }
538
539    pub(crate) fn for_ref() -> impl Iterator<Item = Attribute> {
540        for_ptr().chain([Attribute::NonNull, Attribute::NoAlias])
541    }
542
543    pub(crate) fn for_sized_ref((size, align): (usize, usize)) -> impl Iterator<Item = Attribute> {
544        for_ref().chain([Attribute::Dereferenceable(size as u64), Attribute::Align(align as u64)])
545    }
546
547    pub(crate) fn for_ref_t<T>() -> impl Iterator<Item = Attribute> {
548        for_sized_ref(size_align::<T>())
549    }
550
551    pub(crate) fn size_align<T>() -> (usize, usize) {
552        (std::mem::size_of::<T>(), std::mem::align_of::<T>())
553    }
554}