Skip to main content

revmc/compiler/
mod.rs

1//! EVM bytecode compiler implementation.
2
3use crate::{Backend, Builder, Bytecode, EvmCompilerFn, EvmContext, EvmStack, Result};
4use revm_interpreter::{Gas, InputsImpl};
5use revm_primitives::{Bytes, hardfork::SpecId};
6use revmc_backend::{
7    Attribute, FunctionAttributeLocation, Linkage, OptimizationLevel, eyre::ensure,
8};
9use revmc_builtins::Builtins;
10use revmc_context::RawEvmCompilerFn;
11use std::{
12    fs,
13    io::{self, Write},
14    mem,
15    path::{Path, PathBuf},
16};
17
18// TODO: Somehow have a config to tell the backend to assume that stack stores are unobservable,
19// making it eliminate redundant stores for values outside the stack length when optimized away.
20// E.g. `PUSH0 POP` gets fully optimized away, but the `store i256 0, ptr %stack` will still get
21// emitted.
22// Use this when `stack` is passed in arguments.
23
24// TODO: Get rid of `cfg!(target_endian)` calls.
25
26// TODO: Test on big-endian hardware.
27// It probably doesn't work when loading Rust U256 into native endianness.
28
29mod translate;
30use translate::{FcxConfig, FunctionCx};
31
32/// EVM bytecode compiler.
33///
34/// This currently represents one single-threaded IR context and module, which can be used to
35/// compile multiple functions as JIT or AOT.
36///
37/// Functions can be incrementally added with [`translate`], and then either written to an object
38/// file with [`write_object`] when in AOT mode, or JIT-compiled with [`jit_function`].
39///
40/// Performing either of these operations finalizes the module, and no more functions can be added
41/// afterwards until [`clear`] is called, which will reset the module to its initial state.
42///
43/// [`translate`]: EvmCompiler::translate
44/// [`write_object`]: EvmCompiler::write_object
45/// [`jit_function`]: EvmCompiler::jit_function
46/// [`clear`]: EvmCompiler::clear
47#[allow(missing_debug_implementations)]
48pub struct EvmCompiler<B: Backend> {
49    name: Option<String>,
50    backend: B,
51    out_dir: Option<PathBuf>,
52    config: FcxConfig,
53    builtins: Builtins<B>,
54
55    dump_assembly: bool,
56    dump_unopt_assembly: bool,
57
58    finalized: bool,
59}
60
61impl<B: Backend> EvmCompiler<B> {
62    /// Creates a new instance of the compiler with the given backend.
63    pub fn new(backend: B) -> Self {
64        Self {
65            name: None,
66            backend,
67            out_dir: None,
68            config: FcxConfig::default(),
69            builtins: Builtins::new(),
70            dump_assembly: true,
71            dump_unopt_assembly: false,
72            finalized: false,
73        }
74    }
75
76    /// Sets the name of the module.
77    pub fn set_module_name(&mut self, name: impl Into<String>) {
78        let name = name.into();
79        self.backend.set_module_name(&name);
80        self.name = Some(name);
81    }
82
83    fn is_aot(&self) -> bool {
84        self.backend.is_aot()
85    }
86
87    fn is_jit(&self) -> bool {
88        !self.is_aot()
89    }
90
91    /// Returns the output directory.
92    pub fn out_dir(&self) -> Option<&Path> {
93        self.out_dir.as_deref()
94    }
95
96    /// Dumps intermediate outputs and other debug info to the given directory after compilation.
97    ///
98    /// Disables dumping if `output_dir` is `None`.
99    pub fn set_dump_to(&mut self, output_dir: Option<PathBuf>) {
100        self.backend.set_is_dumping(output_dir.is_some());
101        self.config.comments = output_dir.is_some();
102        self.out_dir = output_dir;
103    }
104
105    /// Dumps assembly to the output directory.
106    ///
107    /// This can be quite slow.
108    ///
109    /// Defaults to `true`.
110    pub fn dump_assembly(&mut self, yes: bool) {
111        self.dump_assembly = yes;
112    }
113
114    /// Dumps the unoptimized assembly to the output directory.
115    ///
116    /// This can be quite slow.
117    ///
118    /// Defaults to `false`.
119    pub fn dump_unopt_assembly(&mut self, yes: bool) {
120        self.dump_unopt_assembly = yes;
121    }
122
123    /// Returns the optimization level.
124    pub fn opt_level(&self) -> OptimizationLevel {
125        self.backend.opt_level()
126    }
127
128    /// Sets the optimization level.
129    ///
130    /// Note that some backends may not support setting the optimization level after initialization.
131    ///
132    /// Defaults to the backend's initial optimization level.
133    pub fn set_opt_level(&mut self, level: OptimizationLevel) {
134        self.backend.set_opt_level(level);
135    }
136
137    /// Sets whether to enable debug assertions.
138    ///
139    /// These are useful for debugging, but they do a moderate performance penalty due to the
140    /// insertion of extra checks and removal of certain assumptions.
141    ///
142    /// Defaults to `cfg!(debug_assertions)`.
143    pub fn debug_assertions(&mut self, yes: bool) {
144        self.backend.set_debug_assertions(yes);
145        self.config.debug_assertions = yes;
146    }
147
148    /// Sets whether to enable frame pointers.
149    ///
150    /// This is useful for profiling and debugging, but it incurs a very slight performance penalty.
151    ///
152    /// Defaults to `cfg!(debug_assertions)`.
153    pub fn frame_pointers(&mut self, yes: bool) {
154        self.config.frame_pointers = yes;
155    }
156
157    /// Sets whether to allocate the stack locally.
158    ///
159    /// If this is set to `true`, the stack pointer argument will be ignored and the stack will be
160    /// allocated in the function.
161    ///
162    /// This setting will fail at runtime if the bytecode suspends execution, as it cannot be
163    /// restored afterwards.
164    ///
165    /// Defaults to `false`.
166    pub fn local_stack(&mut self, yes: bool) {
167        self.config.local_stack = yes;
168    }
169
170    /// Sets whether to treat the stack length as observable outside the function.
171    ///
172    /// This also implies that the length is loaded in the beginning of the function, meaning
173    /// that a function can be executed with an initial stack.
174    ///
175    /// If this is set to `true`, the stack length must be passed in the arguments.
176    ///
177    /// This is useful to inspect the stack length after the function has been executed, but it does
178    /// incur a performance penalty as the length will be stored at all return sites.
179    ///
180    /// Defaults to `false`.
181    pub fn inspect_stack_length(&mut self, yes: bool) {
182        self.config.inspect_stack_length = yes;
183    }
184
185    /// Sets whether to enable stack bound checks.
186    ///
187    /// Defaults to `true`.
188    ///
189    /// # Safety
190    ///
191    /// Removing stack length checks may improve compilation speed and performance, but will result
192    /// in **undefined behavior** if the stack length overflows at runtime, rather than a
193    /// [`StackUnderflow`]/[`StackOverflow`] result.
194    ///
195    /// [`StackUnderflow`]: crate::interpreter::InstructionResult::StackUnderflow
196    /// [`StackOverflow`]: crate::interpreter::InstructionResult::StackOverflow
197    pub unsafe fn stack_bound_checks(&mut self, yes: bool) {
198        self.config.stack_bound_checks = yes;
199    }
200
201    /// Sets whether to track gas costs.
202    ///
203    /// Disabling this will greatly improves compilation speed and performance, at the cost of not
204    /// being able to check for gas exhaustion.
205    ///
206    /// Note that this does not disable gas usage in certain instructions, mainly the ones that
207    /// are implemented as builtins.
208    ///
209    /// Use with care, as executing a function with gas disabled may result in an infinite loop.
210    ///
211    /// Defaults to `true`.
212    pub fn gas_metering(&mut self, yes: bool) {
213        self.config.gas_metering = yes;
214    }
215
216    /// Translates the given EVM bytecode into an internal function.
217    ///
218    /// NOTE: `name` must be unique for each function, as it is used as the name of the final
219    /// symbol.
220    pub fn translate<'a>(
221        &mut self,
222        name: &str,
223        input: impl Into<EvmCompilerInput<'a>>,
224        spec_id: SpecId,
225    ) -> Result<B::FuncId> {
226        ensure!(cfg!(target_endian = "little"), "only little-endian is supported");
227        ensure!(!self.finalized, "cannot compile more functions after finalizing the module");
228        let bytecode = self.parse(input.into(), spec_id)?;
229        self.translate_inner(name, &bytecode)
230    }
231
232    /// (JIT) Compiles the given EVM bytecode into a JIT function.
233    ///
234    /// See [`translate`](Self::translate) for more information.
235    ///
236    /// # Safety
237    ///
238    /// The returned function pointer is owned by the module, and must not be called after the
239    /// module is cleared or the function is freed.
240    pub unsafe fn jit<'a>(
241        &mut self,
242        name: &str,
243        bytecode: impl Into<EvmCompilerInput<'a>>,
244        spec_id: SpecId,
245    ) -> Result<EvmCompilerFn> {
246        let id = self.translate(name, bytecode.into(), spec_id)?;
247        unsafe { self.jit_function(id) }
248    }
249
250    /// (JIT) Finalizes the module and JITs the given function.
251    ///
252    /// # Safety
253    ///
254    /// The returned function pointer is owned by the module, and must not be called after the
255    /// module is cleared or the function is freed.
256    pub unsafe fn jit_function(&mut self, id: B::FuncId) -> Result<EvmCompilerFn> {
257        ensure!(self.is_jit(), "cannot JIT functions during AOT compilation");
258        self.finalize()?;
259        let addr = self.backend.jit_function(id)?;
260        debug_assert!(addr != 0);
261        Ok(EvmCompilerFn::new(unsafe { std::mem::transmute::<usize, RawEvmCompilerFn>(addr) }))
262    }
263
264    /// (AOT) Writes the compiled object to the given file.
265    pub fn write_object_to_file(&mut self, path: &Path) -> Result<()> {
266        let file = fs::File::create(path)?;
267        let mut writer = io::BufWriter::new(file);
268        self.write_object(&mut writer)?;
269        writer.flush()?;
270        Ok(())
271    }
272
273    /// (AOT) Finalizes the module and writes the compiled object to the given writer.
274    pub fn write_object<W: io::Write>(&mut self, w: W) -> Result<()> {
275        ensure!(self.is_aot(), "cannot write AOT object during JIT compilation");
276        self.finalize()?;
277        self.backend.write_object(w)
278    }
279
280    /// (JIT) Frees the memory associated with a single function.
281    ///
282    /// Note that this will not reset the state of the internal module even if all functions are
283    /// freed with this function. Use [`clear`] to reset the module.
284    ///
285    /// [`clear`]: EvmCompiler::clear
286    ///
287    /// # Safety
288    ///
289    /// Because this function invalidates any pointers retrieved from the corresponding module, it
290    /// should only be used when none of the functions from that module are currently executing and
291    /// none of the `fn` pointers are called afterwards.
292    pub unsafe fn free_function(&mut self, id: B::FuncId) -> Result<()> {
293        self.backend.free_function(id)
294    }
295
296    /// Clears the IR module, freeing memory used by IR representations.
297    ///
298    /// This does **not** free JIT-compiled machine code, so previously obtained function pointers
299    /// remain valid. The module is left in a state where new functions can be translated.
300    pub fn clear_ir(&mut self) -> Result<()> {
301        self.builtins.clear();
302        self.finalized = false;
303        self.backend.clear_ir()
304    }
305
306    /// Frees all functions and resets the state of the internal module, allowing for new functions
307    /// to be compiled.
308    ///
309    /// # Safety
310    ///
311    /// Because this function invalidates any pointers retrieved from the corresponding module, it
312    /// should only be used when none of the functions from that module are currently executing and
313    /// none of the `fn` pointers are called afterwards.
314    pub unsafe fn clear(&mut self) -> Result<()> {
315        self.builtins.clear();
316        self.finalized = false;
317        self.backend.free_all_functions()
318    }
319
320    /// Parses the given EVM bytecode. Not public API.
321    #[doc(hidden)] // Not public API.
322    pub fn parse<'a>(
323        &mut self,
324        input: EvmCompilerInput<'a>,
325        spec_id: SpecId,
326    ) -> Result<Bytecode<'a>> {
327        let EvmCompilerInput::Code(bytecode) = input;
328
329        let mut bytecode = Bytecode::new(bytecode, spec_id);
330        bytecode.analyze()?;
331        if let Some(dump_dir) = &self.dump_dir() {
332            Self::dump_bytecode(dump_dir, &bytecode)?;
333        }
334        Ok(bytecode)
335    }
336
337    #[instrument(name = "translate", level = "debug", skip_all)]
338    #[doc(hidden)] // Not public API.
339    pub fn translate_inner(&mut self, name: &str, bytecode: &Bytecode<'_>) -> Result<B::FuncId> {
340        ensure!(self.backend.function_name_is_unique(name), "function name `{name}` is not unique");
341        let linkage = Linkage::Public;
342        let (bcx, id) = Self::make_builder(&mut self.backend, &self.config, name, linkage)?;
343        FunctionCx::translate(bcx, self.config, &mut self.builtins, bytecode)?;
344        Ok(id)
345    }
346
347    #[instrument(level = "debug", skip_all)]
348    fn finalize(&mut self) -> Result<()> {
349        if self.finalized {
350            return Ok(());
351        }
352        self.finalized = true;
353
354        if let Some(dump_dir) = &self.dump_dir() {
355            let path = dump_dir.join("unopt").with_extension(self.backend.ir_extension());
356            self.dump_ir(&path)?;
357
358            // Dump IR before verifying for better debugging.
359            self.verify_module()?;
360
361            if self.dump_assembly && self.dump_unopt_assembly {
362                let path = dump_dir.join("unopt.s");
363                self.dump_disasm(&path)?;
364            }
365        } else {
366            self.verify_module()?;
367        }
368
369        self.optimize_module()?;
370
371        if let Some(dump_dir) = &self.dump_dir() {
372            let path = dump_dir.join("opt").with_extension(self.backend.ir_extension());
373            self.dump_ir(&path)?;
374
375            if self.dump_assembly {
376                let path = dump_dir.join("opt.s");
377                self.dump_disasm(&path)?;
378            }
379        }
380
381        Ok(())
382    }
383
384    #[instrument(level = "debug", skip_all)]
385    fn make_builder<'a>(
386        backend: &'a mut B,
387        config: &FcxConfig,
388        name: &str,
389        linkage: Linkage,
390    ) -> Result<(B::Builder<'a>, B::FuncId)> {
391        fn size_align<T>(i: usize) -> (usize, usize, usize) {
392            (i, mem::size_of::<T>(), mem::align_of::<T>())
393        }
394
395        let i8 = backend.type_int(8);
396        let ptr = backend.type_ptr();
397        let (ret, params, param_names, ptr_attrs) = (
398            Some(i8),
399            &[ptr, ptr, ptr, ptr, ptr],
400            &[
401                "arg.gas.addr",
402                "arg.stack.addr",
403                "arg.stack_len.addr",
404                "arg.input.addr",
405                "arg.ecx.addr",
406            ],
407            &[
408                size_align::<Gas>(0),
409                size_align::<EvmStack>(1),
410                size_align::<usize>(2),
411                size_align::<InputsImpl>(3),
412                size_align::<EvmContext<'_>>(4),
413            ],
414        );
415        debug_assert_eq!(params.len(), param_names.len());
416        let (mut bcx, id) = backend.build_function(name, ret, params, param_names, linkage)?;
417
418        // Function attributes.
419        let function_attributes = default_attrs::for_fn()
420            .chain(config.frame_pointers.then_some(Attribute::AllFramePointers))
421            // We can unwind in panics, which are present only in debug assertions.
422            .chain((!config.debug_assertions).then_some(Attribute::NoUnwind));
423        for attr in function_attributes {
424            bcx.add_function_attribute(None, attr, FunctionAttributeLocation::Function);
425        }
426
427        // Pointer argument attributes.
428        if !config.debug_assertions {
429            for &(i, size, align) in ptr_attrs {
430                let attrs = default_attrs::for_sized_ptr((size, align))
431                    // `Gas` and `InputsImpl` are reachable through `EvmContext` and can alias
432                    // parameters 0 and 3. Keep `noalias` only for stack and stack_len.
433                    .chain(matches!(i, 1 | 2).then_some(Attribute::NoAlias));
434                for attr in attrs {
435                    let loc = FunctionAttributeLocation::Param(i as _);
436                    bcx.add_function_attribute(None, attr, loc);
437                }
438            }
439        }
440
441        Ok((bcx, id))
442    }
443
444    #[instrument(level = "debug", skip_all)]
445    fn dump_ir(&mut self, path: &Path) -> Result<()> {
446        self.backend.dump_ir(path)
447    }
448
449    #[instrument(level = "debug", skip_all)]
450    fn dump_disasm(&mut self, path: &Path) -> Result<()> {
451        self.backend.dump_disasm(path)
452    }
453
454    #[instrument(level = "debug", skip_all)]
455    fn verify_module(&mut self) -> Result<()> {
456        self.backend.verify_module()
457    }
458
459    #[instrument(level = "debug", skip_all)]
460    fn optimize_module(&mut self) -> Result<()> {
461        self.backend.optimize_module()
462    }
463
464    #[instrument(level = "debug", skip_all)]
465    fn dump_bytecode(dump_dir: &Path, bytecode: &Bytecode<'_>) -> Result<()> {
466        {
467            let file = fs::File::create(dump_dir.join("bytecode.txt"))?;
468            let mut writer = io::BufWriter::new(file);
469            write!(writer, "{bytecode}")?;
470            writer.flush()?;
471        }
472
473        {
474            let file = fs::File::create(dump_dir.join("bytecode.dbg.txt"))?;
475            let mut writer = io::BufWriter::new(file);
476            writeln!(writer, "{bytecode:#?}")?;
477            writer.flush()?;
478        }
479
480        {
481            let file = fs::File::create(dump_dir.join("bytecode.dot"))?;
482            let mut writer = io::BufWriter::new(file);
483            let mut dot = String::new();
484            bytecode.write_dot(&mut dot).map_err(|e| revmc_backend::eyre::eyre!("{e}"))?;
485            writer.write_all(dot.as_bytes())?;
486            writer.flush()?;
487        }
488
489        Ok(())
490    }
491
492    fn dump_dir(&self) -> Option<PathBuf> {
493        let mut dump_dir = self.out_dir.clone()?;
494        if let Some(name) = &self.name {
495            dump_dir.push(name.replace(char::is_whitespace, "_"));
496        }
497        if !dump_dir.exists() {
498            let _ = fs::create_dir_all(&dump_dir);
499        }
500        Some(dump_dir)
501    }
502}
503
504/// [`EvmCompiler`] input.
505#[allow(missing_debug_implementations)]
506pub enum EvmCompilerInput<'a> {
507    /// EVM bytecode.
508    Code(&'a [u8]),
509}
510
511impl<'a> From<&'a [u8]> for EvmCompilerInput<'a> {
512    fn from(code: &'a [u8]) -> Self {
513        EvmCompilerInput::Code(code)
514    }
515}
516
517impl<'a> From<&'a Vec<u8>> for EvmCompilerInput<'a> {
518    fn from(code: &'a Vec<u8>) -> Self {
519        EvmCompilerInput::Code(code)
520    }
521}
522
523impl<'a> From<&'a Bytes> for EvmCompilerInput<'a> {
524    fn from(code: &'a Bytes) -> Self {
525        EvmCompilerInput::Code(code)
526    }
527}
528
529#[allow(dead_code)]
530mod default_attrs {
531    use revmc_backend::Attribute;
532
533    pub(crate) fn for_fn() -> impl Iterator<Item = Attribute> {
534        [
535            Attribute::WillReturn,      // Always returns.
536            Attribute::NoSync,          // No thread synchronization.
537            Attribute::NativeTargetCpu, // Optimization.
538            Attribute::NoRecurse,       // Revm is not recursive.
539        ]
540        .into_iter()
541    }
542
543    pub(crate) fn for_param() -> impl Iterator<Item = Attribute> {
544        [Attribute::NoUndef].into_iter()
545    }
546
547    pub(crate) fn for_ptr() -> impl Iterator<Item = Attribute> {
548        for_param().chain([Attribute::NoCapture])
549    }
550
551    pub(crate) fn for_sized_ptr((size, align): (usize, usize)) -> impl Iterator<Item = Attribute> {
552        for_ptr().chain([Attribute::Dereferenceable(size as u64), Attribute::Align(align as u64)])
553    }
554
555    pub(crate) fn for_ptr_t<T>() -> impl Iterator<Item = Attribute> {
556        for_sized_ptr(size_align::<T>())
557    }
558
559    pub(crate) fn for_ref() -> impl Iterator<Item = Attribute> {
560        for_ptr().chain([Attribute::NonNull, Attribute::NoAlias])
561    }
562
563    pub(crate) fn for_sized_ref((size, align): (usize, usize)) -> impl Iterator<Item = Attribute> {
564        for_ref().chain([Attribute::Dereferenceable(size as u64), Attribute::Align(align as u64)])
565    }
566
567    pub(crate) fn for_ref_t<T>() -> impl Iterator<Item = Attribute> {
568        for_sized_ref(size_align::<T>())
569    }
570
571    pub(crate) fn size_align<T>() -> (usize, usize) {
572        (std::mem::size_of::<T>(), std::mem::align_of::<T>())
573    }
574}