Skip to main content

revmc_codegen/bytecode/
mod.rs

1//! Internal EVM bytecode and opcode representation.
2
3use crate::FxHashMap;
4use bitvec::vec::BitVec;
5use oxc_index::IndexVec;
6use revm_bytecode::opcode as op;
7use revm_primitives::{U256, hardfork::SpecId};
8use revmc_backend::Result;
9use smallvec::SmallVec;
10use std::{borrow::Cow, cell::RefCell};
11
12pub(crate) use revm_context_interface::cfg::GasParams;
13
14mod passes;
15pub(crate) use passes::{
16    Block, Cfg, GasSection, MemorySection, MemorySectionAnalysis, SectionsAnalysis, Snapshots,
17    StackSection,
18};
19
20mod asm;
21pub use asm::parse_asm;
22
23mod fmt;
24
25mod interner;
26pub(crate) use interner::Interner;
27
28mod info;
29pub use info::*;
30
31mod opcode;
32pub use opcode::*;
33
34/// Noop opcode used to test suspend-resume.
35#[cfg(any(feature = "__fuzzing", test))]
36pub(crate) const TEST_SUSPEND: u8 = 0x25;
37
38/// Implements `Display` for a nonmax index type using a format string.
39macro_rules! impl_index_display {
40    ($ty:ty, $fmt:literal) => {
41        impl std::fmt::Display for $ty {
42            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43                write!(f, $fmt, self.index())
44            }
45        }
46    };
47}
48pub(crate) use impl_index_display;
49
50oxc_index::define_nonmax_u32_index_type! {
51    /// An EVM instruction index into [`Bytecode`] instructions.
52    ///
53    /// Also known as `ic`, or instruction counter; not to be confused with SSA `inst`s.
54    pub(crate) struct Inst;
55}
56impl_index_display!(Inst, "ic{}");
57
58oxc_index::define_nonmax_u32_index_type! {
59    /// Index into the deduplicated U256 constant pool.
60    pub(crate) struct U256Idx;
61}
62impl_index_display!(U256Idx, "{}");
63
64/// Type alias for the bytecode-level U256 constant interner.
65pub(crate) type U256Interner = Interner<U256Idx, U256, alloy_primitives::map::FbBuildHasher<32>>;
66
67/// Compact handle to a `U256` constant.
68///
69/// Either a `u32` inline value or a reference into a [`U256Interner`].
70#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
71pub(crate) struct U256Imm(u32);
72
73impl U256Imm {
74    /// High bit set means the handle is an inline `u8` value.
75    const INLINE_TAG: u32 = 1 << 31;
76
77    /// Decodes a handle previously produced by [`Self::to_raw`].
78    #[inline]
79    pub(crate) fn from_raw(raw: u32) -> Self {
80        Self(raw)
81    }
82
83    /// Encodes this handle as a `u32` for storage in [`InstData::data`].
84    #[inline]
85    pub(crate) fn to_raw(self) -> u32 {
86        self.0
87    }
88
89    /// Constructs a handle for `value`, inlining `u8`-sized values and otherwise interning.
90    #[inline]
91    pub(crate) fn new(value: U256, interner: &mut U256Interner) -> Self {
92        if let Ok(x) = u32::try_from(value)
93            && x & Self::INLINE_TAG == 0
94        {
95            Self(Self::INLINE_TAG | x)
96        } else {
97            let idx = interner.intern(value).index() as u32;
98            debug_assert!(
99                idx & Self::INLINE_TAG == 0,
100                "U256 interner exceeded 2^31 entries; bump `U256Imm` encoding",
101            );
102            Self(idx)
103        }
104    }
105
106    /// Returns the underlying `U256` value.
107    #[inline]
108    pub(crate) fn get(self, interner: &U256Interner) -> U256 {
109        if self.0 & Self::INLINE_TAG != 0 {
110            U256::from(self.0 & !Self::INLINE_TAG)
111        } else {
112            *interner.get(U256Idx::from_usize(self.0 as usize))
113        }
114    }
115
116    /// Returns the inline `u32` value, if any.
117    #[inline]
118    pub(crate) fn get_inline(self) -> Option<u32> {
119        (self.0 & Self::INLINE_TAG != 0).then_some(self.0 & !Self::INLINE_TAG)
120    }
121}
122
123impl std::fmt::Debug for U256Imm {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        match self.get_inline() {
126            Some(x) => write!(f, "U256Imm::Inline({x})"),
127            None => write!(f, "U256Imm::Idx({})", self.0),
128        }
129    }
130}
131
132bitflags::bitflags! {
133    /// Controls which analysis passes run during [`Bytecode::analyze`].
134    #[derive(Clone, Copy, Debug)]
135    pub(crate) struct AnalysisConfig: u8 {
136        /// Run block deduplication.
137        const DEDUP = 1 << 0;
138        /// The stack is observable outside the function (`inspect_stack` mode).
139        /// When set, DSE must not assume diverging terminators kill the stack.
140        const INSPECT_STACK = 1 << 1;
141        /// Run dead store elimination.
142        const DSE = 1 << 2;
143
144        /// All passes enabled.
145        const ALL = Self::DEDUP.bits() | Self::DSE.bits();
146    }
147}
148
149impl Default for AnalysisConfig {
150    #[inline]
151    fn default() -> Self {
152        Self::ALL
153    }
154}
155
156/// Default compiler gas limit for compile-time evaluation (100k gas).
157pub(crate) const DEFAULT_COMPILER_GAS_LIMIT: u64 = 100_000;
158
159/// EVM bytecode.
160#[doc(hidden)] // Not public API.
161pub struct Bytecode<'a> {
162    /// The original bytecode.
163    pub(crate) code: Cow<'a, [u8]>,
164    /// The instructions.
165    insts: IndexVec<Inst, InstData>,
166    /// `JUMPDEST` opcode map. `jumpdests[pc]` is `true` if `code[pc] == op::JUMPDEST`.
167    jumpdests: BitVec,
168    /// The [`SpecId`].
169    pub(crate) spec_id: SpecId,
170    /// Gas parameters for dynamic gas folding. Defaults to `GasParams::new_spec(spec_id)`.
171    pub(crate) gas_params: GasParams,
172    /// Whether the bytecode contains dynamic jumps.
173    has_dynamic_jumps: bool,
174    /// Whether the bytecode may suspend execution.
175    may_suspend: bool,
176    /// Mapping from program counter to instruction.
177    pc_to_inst: FxHashMap<u32, Inst>,
178    /// Mapping from instruction index to its program counter (`code[pc]` is the opcode byte).
179    ///
180    /// `JUMPDEST` and `PC` mirror this in `data` for hot codegen access; everything else
181    /// reads it through [`Bytecode::pc`].
182    inst_to_pc: IndexVec<Inst, u32>,
183
184    /// Deduplicated constant pool for U256 values.
185    u256_interner: RefCell<U256Interner>,
186
187    /// Per-instruction operand snapshots computed by block analysis.
188    snapshots: Snapshots,
189    /// Per-instruction memory sections computed by section analysis.
190    memory_sections: IndexVec<Inst, MemorySection>,
191    /// Multi-target jump table: maps a JUMP/JUMPI instruction to its set of known targets.
192    /// Only populated for jumps resolved to multiple targets by block analysis.
193    multi_jump_targets: FxHashMap<Inst, SmallVec<[Inst; 4]>>,
194
195    /// Instruction index to 1-based line number in the formatted dump, built during formatting.
196    inst_lines: RefCell<IndexVec<Inst, u32>>,
197
198    /// Dead-block redirects: maps the first instruction of a dead/merged block to the target
199    /// instruction. Used mainly for fallthrough jumps.
200    pub(crate) redirects: FxHashMap<Inst, Inst>,
201    /// Basic-block CFG, rebuilt by [`Bytecode::rebuild_cfg`].
202    cfg: Cfg,
203    /// Controls which analysis passes are enabled.
204    pub(crate) config: AnalysisConfig,
205    /// Gas budget for compile-time evaluation of user-supplied bytecode.
206    ///
207    /// The compiler evaluates EVM operations at compile time during analysis passes. Without a
208    /// budget, adversarial bytecode (e.g. thousands of `EXP(U256::MAX, U256::MAX)`) can make
209    /// compilation arbitrarily slow. This limit uses the EVM gas schedule to bound work.
210    ///
211    /// When exhausted, further compile-time evaluation is skipped (values remain dynamic).
212    /// Defaults to 100k gas.
213    pub(crate) compiler_gas_limit: u64,
214    /// Cumulative compiler gas consumed so far.
215    pub(crate) compiler_gas_used: u64,
216}
217
218impl<'a> Bytecode<'a> {
219    pub(crate) fn new(
220        code: impl Into<Cow<'a, [u8]>>,
221        spec_id: SpecId,
222        gas_params: Option<GasParams>,
223    ) -> Self {
224        Self::new_mono(code.into(), spec_id, gas_params)
225    }
226
227    #[cfg(test)]
228    pub(crate) fn test(code: impl Into<Cow<'a, [u8]>>) -> Self {
229        Self::new(code, crate::tests::DEF_SPEC, None)
230    }
231
232    #[instrument(name = "Bytecode::new", level = "debug", skip_all)]
233    fn new_mono(code: Cow<'a, [u8]>, spec_id: SpecId, gas_params: Option<GasParams>) -> Self {
234        let gas_params = gas_params.unwrap_or_else(|| GasParams::new_spec(spec_id));
235        let mut insts = IndexVec::with_capacity(code.len() + 8);
236        let mut inst_to_pc = IndexVec::with_capacity(code.len() + 8);
237        let mut jumpdests = BitVec::repeat(false, code.len());
238        let mut pc_to_inst = FxHashMap::with_capacity_and_hasher(code.len(), Default::default());
239        let mut u256_interner = U256Interner::new();
240        let op_infos = op_info_map(spec_id);
241        let mut iter = OpcodesIter::new(&code, spec_id).with_pc();
242        while let Some((pc, Opcode { opcode, immediate })) = iter.next() {
243            let inst: Inst = insts.next_idx();
244            pc_to_inst.insert(pc as u32, inst);
245            inst_to_pc.push(pc as u32);
246
247            if opcode == op::JUMPDEST {
248                jumpdests.set(pc, true)
249            }
250
251            let mut flags = InstFlags::empty();
252            let info = op_infos[opcode as usize];
253            if info.is_unknown() {
254                flags |= InstFlags::UNKNOWN;
255            }
256            if info.is_disabled() {
257                flags |= InstFlags::DISABLED;
258            }
259            let (base_gas, stack_io) = if info.is_unknown() || info.is_disabled() {
260                (0, (0, 0))
261            } else {
262                (info.base_gas(), compute_stack_io(opcode, immediate))
263            };
264
265            let data = match opcode {
266                op::PUSH0..=op::PUSH32 | op::DUPN | op::SWAPN | op::EXCHANGE => {
267                    let imm_len = min_imm_len(opcode) as usize;
268                    // `OpcodesIter` returns `None` for truncated EOF immediates; recover
269                    // the available bytes directly from `code` and right-pad with zeros
270                    // per EVM spec.
271                    let start = pc + 1;
272                    let avail = code.get(start..).map(|s| &s[..s.len().min(imm_len)]);
273                    let value = match avail {
274                        Some(slice) if slice.len() == imm_len => U256::from_be_slice(slice),
275                        Some(slice) if !slice.is_empty() => {
276                            let mut padded = [0u8; 32];
277                            padded[..slice.len()].copy_from_slice(slice);
278                            U256::from_be_slice(&padded[..imm_len])
279                        }
280                        _ => U256::ZERO,
281                    };
282                    U256Imm::new(value, &mut u256_interner).to_raw()
283                }
284                op::JUMPDEST | op::PC => {
285                    let pc = pc as u32;
286                    debug_assert!(pc & InstData::JUMPDEST_REACHABLE == 0, "pc too large");
287                    pc
288                }
289                _ => 0,
290            };
291
292            let gas_section = GasSection::default();
293            let stack_section = StackSection::default();
294
295            insts.push(InstData {
296                opcode,
297                flags,
298                base_gas,
299                stack_io,
300                data,
301                gas_section,
302                stack_section,
303            });
304
305            // EIP-8024: JUMPDEST analysis is unchanged by DUPN/SWAPN/EXCHANGE. Their
306            // immediate byte is NOT masked, so 0x5b in that position is a valid jump
307            // target. When the immediate is 0x5b, rewind the iterator so it is re-yielded
308            // and processed as a normal JUMPDEST by the loop.
309            if matches!(opcode, op::DUPN | op::SWAPN | op::EXCHANGE)
310                && !info.is_unknown()
311                && !info.is_disabled()
312                && immediate == Some(&[op::JUMPDEST])
313            {
314                // SAFETY: we just consumed the 1-byte immediate.
315                unsafe { iter.rewind(1) };
316            }
317        }
318
319        // Pad code to ensure there is at least one diverging instruction.
320        if insts.last().is_none_or(|last| last.can_fall_through()) {
321            trace!("adding STOP padding");
322            insts.push(InstData::new(op::STOP));
323            inst_to_pc.push(code.len() as u32);
324        }
325
326        Self {
327            code,
328            insts,
329            jumpdests,
330            spec_id,
331            gas_params,
332            has_dynamic_jumps: false,
333            may_suspend: false,
334            snapshots: Snapshots::default(),
335            memory_sections: IndexVec::new(),
336            u256_interner: RefCell::new(u256_interner),
337            multi_jump_targets: FxHashMap::default(),
338            pc_to_inst,
339            inst_to_pc,
340            inst_lines: RefCell::new(IndexVec::new()),
341            redirects: FxHashMap::default(),
342            cfg: Cfg::default(),
343            config: AnalysisConfig::default(),
344            compiler_gas_limit: 100_000,
345            compiler_gas_used: 0,
346        }
347    }
348
349    /// Takes the instruction-to-line map built during formatting.
350    ///
351    /// If the bytecode has not been formatted yet, formats it first to build the map.
352    pub(crate) fn take_inst_lines(&self) -> IndexVec<Inst, u32> {
353        if self.inst_lines.borrow().is_empty() {
354            self.collect_lines();
355        }
356        self.inst_lines.take()
357    }
358
359    /// Returns an iterator over the opcodes.
360    #[inline]
361    #[allow(dead_code)]
362    pub(crate) fn opcodes(&self) -> OpcodesIter<'_> {
363        OpcodesIter::new(&self.code, self.spec_id)
364    }
365
366    /// Returns the instruction at the given instruction counter.
367    #[inline]
368    #[track_caller]
369    pub(crate) fn inst(&self, inst: Inst) -> &InstData {
370        &self.insts[inst]
371    }
372
373    /// Returns a mutable reference the instruction at the given instruction counter.
374    #[inline]
375    #[track_caller]
376    #[allow(dead_code)]
377    pub(crate) fn inst_mut(&mut self, inst: Inst) -> &mut InstData {
378        &mut self.insts[inst]
379    }
380
381    /// Returns the opcode + immediate slice for the given instruction.
382    #[inline]
383    pub(crate) fn opcode(&self, inst: Inst) -> Opcode<'_> {
384        Opcode { opcode: self.inst(inst).opcode, immediate: self.get_imm(inst) }
385    }
386
387    /// Returns an iterator over the instructions.
388    #[inline]
389    pub(crate) fn iter_insts(&self) -> impl DoubleEndedIterator<Item = (Inst, &InstData)> + Clone {
390        self.iter_all_insts().filter(|(_, data)| !data.is_dead_code())
391    }
392
393    /// Returns an iterator over the instructions.
394    #[inline]
395    #[allow(dead_code)]
396    pub(crate) fn iter_mut_insts(
397        &mut self,
398    ) -> impl DoubleEndedIterator<Item = (Inst, &mut InstData)> {
399        self.iter_mut_all_insts().filter(|(_, data)| !data.is_dead_code())
400    }
401
402    /// Returns an iterator over all the instructions, including dead code.
403    #[inline]
404    pub(crate) fn iter_all_insts(
405        &self,
406    ) -> impl DoubleEndedIterator<Item = (Inst, &InstData)> + ExactSizeIterator + Clone {
407        self.insts.iter_enumerated()
408    }
409
410    /// Returns an iterator over all the instructions, including dead code.
411    #[inline]
412    #[allow(dead_code)]
413    pub(crate) fn iter_mut_all_insts(
414        &mut self,
415    ) -> impl DoubleEndedIterator<Item = (Inst, &mut InstData)> + ExactSizeIterator {
416        self.insts.iter_mut_enumerated()
417    }
418
419    /// Runs a list of analysis passes on the instructions.
420    #[instrument(level = "debug", skip_all)]
421    pub(crate) fn analyze(&mut self) -> Result<()> {
422        self.recompute_has_dynamic_jumps();
423        self.mark_dead_code();
424        self.rebuild_cfg();
425
426        self.block_analysis_local();
427        self.mark_dead_code();
428        self.rebuild_cfg();
429
430        let local_snapshots = self.snapshots.clone();
431
432        self.block_analysis(&local_snapshots);
433        self.mark_dead_code();
434        self.rebuild_cfg();
435
436        if self.config.contains(AnalysisConfig::DEDUP) {
437            self.dedup_blocks(&local_snapshots);
438            self.mark_dead_code();
439            self.rebuild_cfg();
440        }
441        drop(local_snapshots);
442
443        if self.config.contains(AnalysisConfig::DSE) {
444            self.dead_store_elim();
445        }
446
447        self.calc_may_suspend();
448
449        self.construct_sections();
450        self.construct_memory_sections();
451
452        debug!(
453            compiler_gas_used = self.compiler_gas_used,
454            compiler_gas_limit = self.compiler_gas_limit,
455            "constant folding gas budget",
456        );
457
458        if tracing::enabled!(tracing::Level::TRACE) {
459            self.log_ir_stats();
460            self.log_const_input_stats();
461        }
462
463        Ok(())
464    }
465
466    /// Mark unreachable instructions as `DEAD_CODE` to not generate any code for them.
467    ///
468    /// This pass is technically unnecessary as the backend will very likely optimize any
469    /// unreachable code that we generate, but this is trivial for us to do and significantly speeds
470    /// up code generation.
471    ///
472    /// We can simply mark all instructions that are between diverging instructions and
473    /// `JUMPDEST`s.
474    #[instrument(name = "dce", level = "debug", skip_all)]
475    fn mark_dead_code(&mut self) {
476        let mut iter = self.insts.iter_mut_enumerated();
477        while let Some((i, data)) = iter.next() {
478            if !data.can_fall_through() {
479                let mut end = i;
480                let mut any_new = false;
481                for (j, data) in &mut iter {
482                    end = j;
483                    if data.is_reachable_jumpdest(self.has_dynamic_jumps) {
484                        break;
485                    }
486                    if !data.flags.contains(InstFlags::DEAD_CODE) {
487                        any_new = true;
488                    }
489                    data.flags |= InstFlags::DEAD_CODE;
490                }
491                let start = i + 1;
492                if any_new && end > start {
493                    debug!("found dead code: {start}..{end}");
494                }
495            }
496        }
497    }
498
499    /// Calculates whether the bytecode suspend suspend execution.
500    ///
501    /// This can only happen if the bytecode contains `*CALL*` or `*CREATE*` instructions.
502    #[instrument(name = "suspend", level = "debug", skip_all)]
503    fn calc_may_suspend(&mut self) {
504        let may_suspend = self.iter_insts().any(|(_, data)| data.may_suspend());
505        self.may_suspend = may_suspend;
506    }
507
508    /// Constructs the sections in the bytecode.
509    #[instrument(name = "sections", level = "debug", skip_all)]
510    fn construct_sections(&mut self) {
511        let mut analysis = SectionsAnalysis::default();
512        for inst in self.insts.indices() {
513            if !self.inst(inst).is_dead_code() {
514                analysis.process(self, inst);
515            }
516        }
517        analysis.finish(self);
518    }
519
520    /// Constructs the memory sections in the bytecode.
521    #[instrument(name = "memory_sections", level = "debug", skip_all)]
522    fn construct_memory_sections(&mut self) {
523        self.memory_sections = MemorySectionAnalysis::new(self).run(self);
524    }
525
526    /// Returns the immediate value of the given instruction, if any.
527    ///
528    /// For truncated immediates at EOF, returns the available bytes (which may be shorter than
529    /// `imm_len`). Returns `None` only when `imm_len` is 0 or `start` is completely out of bounds.
530    pub(crate) fn get_imm(&self, inst: Inst) -> Option<&[u8]> {
531        let data = self.inst(inst);
532        let imm_len = data.imm_len() as usize;
533        if imm_len == 0 {
534            return None;
535        }
536        let start = self.pc(inst) as usize + 1;
537        let end = (start + imm_len).min(self.code.len());
538        if start >= end {
539            return None;
540        }
541        Some(&self.code[start..end])
542    }
543
544    /// Returns the program counter (offset into the original bytecode) of the given instruction.
545    #[inline]
546    pub(crate) fn pc(&self, inst: Inst) -> u32 {
547        self.inst_to_pc[inst]
548    }
549
550    /// Returns the value of a PUSH instruction, right-padding truncated EOF immediates with zeros
551    /// per EVM spec.
552    #[cfg(test)]
553    pub(crate) fn get_push_value(&self, data: &InstData) -> U256 {
554        debug_assert!(matches!(data.opcode, op::PUSH0..=op::PUSH32));
555        data.imm().get(&self.u256_interner.borrow())
556    }
557
558    /// Returns the length of the original bytecode in bytes.
559    #[inline]
560    pub(crate) fn codesize(&self) -> usize {
561        self.code.len()
562    }
563
564    /// Returns `true` if the given program counter is a valid jump destination.
565    fn is_valid_jump(&self, pc: usize) -> bool {
566        self.jumpdests.get(pc).as_deref().copied() == Some(true)
567    }
568
569    /// Returns `true` if the bytecode has dynamic jumps.
570    pub(crate) fn has_dynamic_jumps(&self) -> bool {
571        self.has_dynamic_jumps
572    }
573
574    /// Returns `true` if the bytecode may suspend execution, to be resumed later.
575    pub(crate) fn may_suspend(&self) -> bool {
576        self.may_suspend
577    }
578
579    /// Returns `true` if the stack argument's contents are observed by the caller after the
580    /// function returns (either through `inspect_stack` mode or because execution may suspend).
581    pub(crate) fn stack_observed(&self) -> bool {
582        self.config.contains(AnalysisConfig::INSPECT_STACK) || self.may_suspend()
583    }
584
585    /// Returns `true` if any dead-block redirects exist.
586    pub(crate) fn has_redirects(&self) -> bool {
587        !self.redirects.is_empty()
588    }
589
590    /// Returns `true` if the instruction is diverging.
591    pub(crate) fn is_instr_diverging(&self, inst: Inst) -> bool {
592        self.insts[inst].is_diverging()
593    }
594
595    /// Converts a program counter (`self.code[pc]`) to an instruction (`self.inst(inst)`).
596    #[inline]
597    pub(crate) fn pc_to_inst(&self, pc: usize) -> Inst {
598        match self.pc_to_inst.get(&(pc as u32)) {
599            Some(&inst) => inst,
600            None => panic!("pc out of bounds: {pc}"),
601        }
602    }
603
604    /// Returns the multi-target jump table entry for the given instruction, if any.
605    pub(crate) fn multi_jump_targets(&self, inst: Inst) -> Option<&[Inst]> {
606        self.multi_jump_targets.get(&inst).map(|v| v.as_slice())
607    }
608
609    /// Returns the known constant value of a stack operand at the given instruction.
610    ///
611    /// `depth` 0 is TOS (first popped by this instruction), 1 is second, etc.
612    /// Returns `None` if the value is unknown or the analysis didn't cover this instruction.
613    #[allow(dead_code)]
614    pub(crate) fn const_operand(&self, inst: Inst, depth: usize) -> Option<U256> {
615        let snap = &self.snapshots.inputs[inst];
616        let imm = snap.get(snap.len().checked_sub(1 + depth)?)?.as_const()?;
617        Some(imm.get(&self.u256_interner.borrow()))
618    }
619
620    /// Returns the known constant output value of the given instruction.
621    ///
622    /// Returns `None` if the output is unknown, the instruction has no output, or the
623    /// analysis didn't cover this instruction.
624    pub(crate) fn const_output(&self, inst: Inst) -> Option<U256> {
625        let imm = self.snapshots.outputs[inst]?.as_const()?;
626        Some(imm.get(&self.u256_interner.borrow()))
627    }
628
629    /// Returns the first constant EVM memory access `(offset, len)` for the given instruction.
630    #[allow(dead_code)]
631    pub(crate) fn const_memory_access(&self, inst: Inst) -> Option<(Option<u64>, Option<u64>)> {
632        self.const_memory_accesses(inst).into_iter().flatten().next()
633    }
634
635    /// Returns the constant EVM memory accesses `(offset, len)` for the given instruction.
636    pub(crate) fn const_memory_accesses(
637        &self,
638        inst: Inst,
639    ) -> [Option<(Option<u64>, Option<u64>)>; 2] {
640        let data = self.inst(inst);
641        if data.flags.intersects(InstFlags::DISABLED | InstFlags::UNKNOWN) {
642            return [None, None];
643        }
644
645        let operand =
646            |depth| self.const_operand(inst, depth).and_then(|value| u64::try_from(value).ok());
647        match data.opcode {
648            op::KECCAK256 | op::RETURN | op::REVERT | op::LOG0 => {
649                [Some((operand(0), operand(1))), None]
650            }
651            op::MLOAD | op::MSTORE => [Some((operand(0), Some(32))), None],
652            op::MSTORE8 => [Some((operand(0), Some(1))), None],
653            op::CALLDATACOPY | op::CODECOPY | op::RETURNDATACOPY => {
654                [Some((operand(0), operand(2))), None]
655            }
656            op::EXTCODECOPY => [Some((operand(1), operand(3))), None],
657            op::MCOPY => [
658                Some((operand(0).zip(operand(1)).map(|(dst, src)| dst.max(src)), operand(2))),
659                None,
660            ],
661            op::LOG1 => [Some((operand(1), operand(2))), None],
662            op::LOG2 => [Some((operand(2), operand(3))), None],
663            op::LOG3 => [Some((operand(3), operand(4))), None],
664            op::LOG4 => [Some((operand(4), operand(5))), None],
665            op::CREATE | op::CREATE2 => [Some((operand(1), operand(2))), None],
666            op::CALL | op::CALLCODE => {
667                [Some((operand(3), operand(4))), Some((operand(5), operand(6)))]
668            }
669            op::DELEGATECALL | op::STATICCALL => {
670                [Some((operand(2), operand(3))), Some((operand(4), operand(5)))]
671            }
672            _ => [None, None],
673        }
674    }
675
676    /// Returns the memory section for the given instruction.
677    #[allow(dead_code)]
678    pub(crate) fn memory_section(&self, inst: Inst) -> MemorySection {
679        self.memory_sections.get(inst).copied().unwrap_or_default()
680    }
681
682    /// Logs per-opcode constant-input statistics at trace level.
683    #[inline(never)]
684    fn log_const_input_stats(&self) {
685        use op::*;
686        use std::fmt::Write;
687
688        // per_input[depth] = [total, const_count, fits_usize_count].
689        struct Entry {
690            inputs: usize,
691            outputs: usize,
692            total: u32,
693            all_const: u32,
694            const_output: u32,
695            per_input: Vec<[u32; 3]>,
696        }
697        let mut stats = [const { None }; 256];
698        for (inst, data) in self.iter_insts() {
699            let (inputs, outputs) = data.stack_io();
700            if matches!(data.opcode, PUSH0..=PUSH32 | DUP1..=DUP16 | SWAP1..=SWAP16 | DUPN | SWAPN)
701            {
702                continue;
703            }
704            let entry = stats[data.opcode as usize].get_or_insert_with(|| Entry {
705                inputs: inputs as usize,
706                outputs: outputs as usize,
707                total: 0,
708                all_const: 0,
709                const_output: 0,
710                per_input: vec![[0u32; 3]; inputs as usize],
711            });
712            entry.total += 1;
713            let mut all = true;
714            for depth in 0..inputs as usize {
715                entry.per_input[depth][0] += 1;
716                if let Some(val) = self.const_operand(inst, depth) {
717                    entry.per_input[depth][1] += 1;
718                    if val <= usize::MAX {
719                        entry.per_input[depth][2] += 1;
720                    }
721                } else {
722                    all = false;
723                }
724            }
725            if all {
726                entry.all_const += 1;
727            }
728            if self.const_output(inst).is_some() {
729                entry.const_output += 1;
730            }
731        }
732
733        let mut buf = String::from("const input stats:");
734        for (opcode, entry) in stats.iter().enumerate() {
735            let Some(entry) = entry else { continue };
736            if entry.total == 0 {
737                continue;
738            }
739            let name = OpCode::new(opcode as u8)
740                .map_or_else(|| format!("0x{opcode:02x}"), |o| o.as_str().to_string());
741            let all_pct = entry.all_const as f64 / entry.total as f64 * 100.0;
742            let _ = write!(
743                buf,
744                "\n  {name:<16} 0x{opcode:02x}, {total} occ, {inputs} inputs, all_const={ac}/{total} ({all_pct:.0}%)",
745                opcode = opcode,
746                total = entry.total,
747                inputs = entry.inputs,
748                ac = entry.all_const,
749            );
750            if entry.outputs > 0 {
751                let co = entry.const_output;
752                let co_pct = co as f64 / entry.total as f64 * 100.0;
753                let _ = write!(buf, ", const_output={co}/{t} ({co_pct:.0}%)", t = entry.total);
754            }
755            for (i, [t, c, usize_c]) in entry.per_input.iter().enumerate() {
756                if *t > 0 {
757                    let pct = *c as f64 / *t as f64 * 100.0;
758                    let usize_pct = *usize_c as f64 / *t as f64 * 100.0;
759                    let _ = write!(
760                        buf,
761                        "\n    [{i}]: const={c}/{t} ({pct:.0}%), fits_usize={usize_c}/{t} ({usize_pct:.0}%)",
762                    );
763                }
764            }
765        }
766        trace!("{buf}");
767    }
768
769    /// Collects IR statistics: instruction counts and block size distribution.
770    fn ir_stats(&self) -> IrStats {
771        let total = self.insts.len();
772        let mut live = 0usize;
773        let mut noops = 0usize;
774        let mut dead = 0usize;
775        let mut suspends = 0usize;
776        for (_inst, data) in self.iter_all_insts() {
777            if data.is_dead_code() {
778                dead += 1;
779            } else {
780                live += 1;
781                if data.flags.contains(InstFlags::NOOP) {
782                    noops += 1;
783                }
784                if data.may_suspend() {
785                    suspends += 1;
786                }
787            }
788        }
789
790        let mut lens: Vec<usize> = self.cfg.blocks.iter().map(|data| data.insts().len()).collect();
791        let n = lens.len();
792        lens.sort_unstable();
793        let block_min = lens.first().copied().unwrap_or(0);
794        let block_max = lens.last().copied().unwrap_or(0);
795        let sum: usize = lens.iter().sum();
796        let block_avg = if n > 0 { sum as f64 / n as f64 } else { 0.0 };
797        let block_median = if n > 0 { lens[n / 2] } else { 0 };
798
799        IrStats {
800            total,
801            live,
802            dead,
803            noops,
804            suspends,
805            blocks: n,
806            block_min,
807            block_max,
808            block_avg,
809            block_median,
810        }
811    }
812
813    /// Logs IR statistics and top 5 longest blocks at trace level.
814    #[inline(never)]
815    fn log_ir_stats(&self) {
816        use std::fmt::Write;
817
818        let s = self.ir_stats();
819        trace!(
820            total_insts = s.total,
821            live = s.live,
822            dead = s.dead,
823            noops = s.noops,
824            suspends = s.suspends,
825            blocks = s.blocks,
826            block_min = s.block_min,
827            block_max = s.block_max,
828            block_avg = format_args!("{:.1}", s.block_avg),
829            block_median = s.block_median,
830            "ir stats",
831        );
832
833        let mut lens: Vec<(Block, usize)> = self
834            .cfg
835            .blocks
836            .iter_enumerated()
837            .map(|(block, data)| (block, data.insts().len()))
838            .collect();
839        lens.sort_unstable_by_key(|b| std::cmp::Reverse(b.1));
840        lens.truncate(5);
841        let mut buf = String::from("top 5 longest blocks:");
842        for (block, len) in &lens {
843            let data = &self.cfg.blocks[*block];
844            let _ = write!(buf, "\n  {block} (pc={}, len={len})", self.pc(data.insts.start));
845        }
846        trace!("{buf}");
847    }
848
849    /// Returns the name for a basic block.
850    pub(crate) fn op_block_name(&self, inst: Option<Inst>, name: &str) -> String {
851        use std::fmt::Write;
852
853        let Some(inst) = inst else {
854            return format!("entry.{name}");
855        };
856        let data = self.inst(inst);
857
858        let mut s = String::with_capacity(64);
859        if let Some(block) = self.cfg.inst_to_block[inst] {
860            let _ = write!(s, "{block}.");
861        }
862        let _ = write!(s, "{inst}.{}", data.to_op());
863        if !name.is_empty() {
864            let _ = write!(s, ".{name}");
865        }
866        s
867    }
868}
869
870/// Summary IR statistics for a compiled bytecode.
871pub(crate) struct IrStats {
872    pub(crate) total: usize,
873    pub(crate) live: usize,
874    pub(crate) dead: usize,
875    pub(crate) noops: usize,
876    pub(crate) suspends: usize,
877    pub(crate) blocks: usize,
878    pub(crate) block_min: usize,
879    pub(crate) block_max: usize,
880    pub(crate) block_avg: f64,
881    pub(crate) block_median: usize,
882}
883
884/// A single instruction in the bytecode.
885#[derive(Clone, Default)]
886pub(crate) struct InstData {
887    /// The opcode byte.
888    pub(crate) opcode: u8,
889    /// Flags.
890    pub(crate) flags: InstFlags,
891    /// The base gas cost of the opcode.
892    ///
893    /// This may not be the final/full gas cost of the opcode as it may also have a dynamic cost.
894    base_gas: u16,
895    /// Stack inputs and outputs.
896    stack_io: (u8, u8),
897    /// Instruction-specific data, populated at parse time and refined by analysis.
898    /// Interpretation depends on `opcode`; access via the typed methods on [`InstData`]:
899    /// - `PUSH0..=PUSH32`, `DUPN | SWAPN | EXCHANGE`: encoded [`U256Imm`].
900    /// - `PC`: program counter of this instruction.
901    /// - `JUMPDEST`: program counter in low 31 bits, reachable flag in high bit
902    ///   ([`InstData::JUMPDEST_REACHABLE`]).
903    /// - `JUMP | JUMPI` with [`InstFlags::STATIC_JUMP`]: target instruction index.
904    /// - otherwise: unused.
905    pub(crate) data: u32,
906    /// The gas section this instruction belongs to.
907    pub(crate) gas_section: GasSection,
908    /// The stack section this instruction belongs to.
909    pub(crate) stack_section: StackSection,
910}
911
912impl PartialEq<u8> for InstData {
913    #[inline]
914    fn eq(&self, other: &u8) -> bool {
915        self.opcode == *other
916    }
917}
918
919impl PartialEq<InstData> for u8 {
920    #[inline]
921    fn eq(&self, other: &InstData) -> bool {
922        *self == other.opcode
923    }
924}
925
926impl InstData {
927    /// Creates a new instruction data with the given opcode byte.
928    /// Note that this may not be a valid instruction.
929    #[inline]
930    fn new(opcode: u8) -> Self {
931        Self { opcode, stack_io: stack_io(opcode), ..Default::default() }
932    }
933
934    /// Returns the length of the immediate data of this instruction.
935    #[inline]
936    pub(crate) const fn imm_len(&self) -> u8 {
937        min_imm_len(self.opcode)
938    }
939
940    /// Returns the number of input and output stack elements of this instruction.
941    #[inline]
942    pub(crate) fn stack_io(&self) -> (u8, u8) {
943        self.stack_io
944    }
945
946    /// Converts this instruction to a raw opcode. Note that the immediate data is not resolved.
947    #[inline]
948    pub(crate) const fn to_op(&self) -> Opcode<'static> {
949        Opcode { opcode: self.opcode, immediate: None }
950    }
951
952    /// Returns `true` if this instruction is a jump instruction (`JUMP`/`JUMPI`).
953    #[inline]
954    pub(crate) fn is_jump(&self) -> bool {
955        matches!(self.opcode, op::JUMP | op::JUMPI)
956    }
957
958    /// Returns `true` if this instruction is a jump instruction (`JUMP`/`JUMPI`), and the
959    /// target known statically.
960    #[inline]
961    pub(crate) fn is_static_jump(&self) -> bool {
962        self.is_jump() && self.flags.contains(InstFlags::STATIC_JUMP)
963    }
964
965    /// Returns `true` if this instruction is a `JUMPDEST`.
966    #[inline]
967    pub(crate) const fn is_jumpdest(&self) -> bool {
968        self.opcode == op::JUMPDEST
969    }
970
971    /// Returns `true` if this instruction is a reachable `JUMPDEST`.
972    #[inline]
973    pub(crate) fn is_reachable_jumpdest(&self, has_dynamic_jumps: bool) -> bool {
974        self.is_jumpdest() && (has_dynamic_jumps || self.is_jumpdest_reachable())
975    }
976
977    /// Returns the [`U256Imm`] of a `PUSH*`/`DUPN`/`SWAPN`/`EXCHANGE` instruction.
978    #[inline]
979    pub(crate) fn imm(&self) -> U256Imm {
980        debug_assert!(matches!(
981            self.opcode,
982            op::PUSH0..=op::PUSH32 | op::DUPN | op::SWAPN | op::EXCHANGE
983        ));
984        U256Imm::from_raw(self.data)
985    }
986
987    /// Returns the inline `u8` immediate of a `DUPN`/`SWAPN`/`EXCHANGE` instruction.
988    #[inline]
989    pub(crate) fn imm_byte(&self) -> u8 {
990        debug_assert!(matches!(self.opcode, op::DUPN | op::SWAPN | op::EXCHANGE));
991        // Parse-time encoding for these opcodes always produces an inline `u8`.
992        self.imm().get_inline().unwrap().try_into().unwrap()
993    }
994
995    /// Returns the program counter stored inline for a `PC` instruction.
996    #[inline]
997    pub(crate) fn pc_imm(&self) -> u32 {
998        debug_assert_eq!(self.opcode, op::PC);
999        self.data
1000    }
1001
1002    /// High bit of `data` set when a `JUMPDEST` is a reachable static jump target.
1003    pub(crate) const JUMPDEST_REACHABLE: u32 = 1 << 31;
1004
1005    /// Returns the program counter of a `JUMPDEST`.
1006    #[inline]
1007    pub(crate) fn jumpdest_pc(&self) -> u32 {
1008        debug_assert_eq!(self.opcode, op::JUMPDEST);
1009        self.data & !Self::JUMPDEST_REACHABLE
1010    }
1011
1012    /// Returns `true` if this `JUMPDEST` is the target of a resolved static jump.
1013    #[inline]
1014    pub(crate) fn is_jumpdest_reachable(&self) -> bool {
1015        debug_assert_eq!(self.opcode, op::JUMPDEST);
1016        self.data & Self::JUMPDEST_REACHABLE != 0
1017    }
1018
1019    /// Marks this `JUMPDEST` as a reachable static jump target.
1020    #[inline]
1021    pub(crate) fn set_jumpdest_reachable(&mut self) {
1022        debug_assert_eq!(self.opcode, op::JUMPDEST);
1023        self.data |= Self::JUMPDEST_REACHABLE;
1024    }
1025
1026    /// Returns the static target of a `JUMP`/`JUMPI` with [`InstFlags::STATIC_JUMP`].
1027    #[inline]
1028    pub(crate) fn static_jump_target(&self) -> Inst {
1029        debug_assert!(self.is_static_jump());
1030        Inst::from_usize(self.data as usize)
1031    }
1032
1033    /// Sets the static jump target. Caller must also set [`InstFlags::STATIC_JUMP`].
1034    #[inline]
1035    pub(crate) fn set_static_jump_target(&mut self, target: Inst) {
1036        debug_assert!(self.is_jump());
1037        self.data = target.index() as u32;
1038    }
1039
1040    /// Returns `true` if this instruction starts a new stack section.
1041    #[inline]
1042    pub(crate) fn is_stack_section_head(&self) -> bool {
1043        self.flags.contains(InstFlags::STACK_SECTION_HEAD)
1044    }
1045
1046    /// Returns `true` if this instruction is dead code.
1047    pub(crate) fn is_dead_code(&self) -> bool {
1048        self.flags.contains(InstFlags::DEAD_CODE)
1049    }
1050
1051    /// Returns `true` if this instruction requires to know `gasleft()`.
1052    /// Note that this does not include CALL and CREATE.
1053    #[inline]
1054    pub(crate) fn requires_gasleft(&self, spec_id: SpecId) -> bool {
1055        // For SSTORE, see `revm_interpreter::gas::sstore_cost`.
1056        self.opcode == op::GAS
1057            || (self.opcode == op::SSTORE && spec_id.is_enabled_in(SpecId::ISTANBUL))
1058    }
1059
1060    /// Returns `true` if execution can fall through to the next sequential instruction.
1061    #[inline]
1062    pub(crate) fn can_fall_through(&self) -> bool {
1063        !self.is_diverging() && self.opcode != op::JUMP
1064    }
1065
1066    /// Returns `true` if we know that this instruction will branch or stop execution.
1067    #[inline]
1068    pub(crate) fn is_branching(&self) -> bool {
1069        self.is_jump() || self.is_diverging()
1070    }
1071
1072    /// Returns `true` if we know that this instruction will stop execution.
1073    #[inline]
1074    pub(crate) fn is_diverging(&self) -> bool {
1075        #[cfg(test)]
1076        if self.opcode == TEST_SUSPEND {
1077            return false;
1078        }
1079
1080        (self.opcode == op::JUMP && self.flags.contains(InstFlags::INVALID_JUMP))
1081            || self.flags.contains(InstFlags::DISABLED)
1082            || self.flags.contains(InstFlags::UNKNOWN)
1083            || matches!(
1084                self.opcode,
1085                op::STOP | op::RETURN | op::REVERT | op::INVALID | op::SELFDESTRUCT
1086            )
1087    }
1088
1089    /// Returns `true` if this instruction may suspend execution.
1090    #[inline]
1091    pub(crate) const fn may_suspend(&self) -> bool {
1092        #[cfg(test)]
1093        if self.opcode == TEST_SUSPEND {
1094            return true;
1095        }
1096
1097        matches!(
1098            self.opcode,
1099            op::CALL | op::CALLCODE | op::DELEGATECALL | op::STATICCALL | op::CREATE | op::CREATE2
1100        )
1101    }
1102}
1103
1104bitflags::bitflags! {
1105    /// [`InstrData`] flags.
1106    #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
1107    pub(crate) struct InstFlags: u8 {
1108        /// The `JUMP`/`JUMPI` target is known at compile time.
1109        const STATIC_JUMP = 1 << 0;
1110        /// The jump target is known to be invalid.
1111        /// Always returns [`InstructionResult::InvalidJump`] at runtime.
1112        const INVALID_JUMP = 1 << 1;
1113        /// The jump has multiple known targets (see `Bytecode::multi_jump_targets`).
1114        /// The target value is still on the stack and must be popped and switched on at runtime.
1115        const MULTI_JUMP = 1 << 2;
1116
1117        /// The instruction is disabled in this EVM version.
1118        /// Always returns [`InstructionResult::NotActivated`] at runtime.
1119        const DISABLED = 1 << 3;
1120        /// The instruction is unknown.
1121        /// Always returns [`InstructionResult::NotFound`] at runtime.
1122        const UNKNOWN = 1 << 4;
1123
1124        /// Instruction is a no-op: skip generating logic, but keep the gas calculation.
1125        const NOOP = 1 << 5;
1126        /// This instruction starts a new stack section.
1127        const STACK_SECTION_HEAD = 1 << 6;
1128        /// Don't generate any code.
1129        const DEAD_CODE = 1 << 7;
1130    }
1131}
1132
1133fn bitvec_as_bytes<T: bitvec::store::BitStore, O: bitvec::order::BitOrder>(
1134    bitvec: &BitVec<T, O>,
1135) -> &[u8] {
1136    slice_as_bytes(bitvec.as_raw_slice())
1137}
1138
1139fn slice_as_bytes<T>(a: &[T]) -> &[u8] {
1140    unsafe { std::slice::from_raw_parts(a.as_ptr().cast(), std::mem::size_of_val(a)) }
1141}
1142
1143#[cfg(test)]
1144mod tests {
1145    use super::*;
1146    use revm_bytecode::opcode::OPCODE_INFO;
1147
1148    #[test]
1149    fn test_suspend_is_free() {
1150        assert_eq!(OPCODE_INFO[TEST_SUSPEND as usize], None);
1151    }
1152
1153    #[test]
1154    fn truncated_push_imm_right_pads_with_zeros() {
1155        // PUSH2 followed by a single byte 0x42 — truncated at EOF.
1156        // EVM spec: missing bytes are zero, so the value is 0x4200.
1157        let code = [op::PUSH2, 0x42];
1158        let bc = Bytecode::test(&code);
1159        let inst = Inst::from_usize(0);
1160        let data = bc.inst(inst);
1161        assert_eq!(bc.get_imm(inst), Some([0x42].as_slice()));
1162        assert_eq!(bc.get_push_value(data), U256::from(0x4200));
1163
1164        // PUSH3 followed by two bytes — truncated at EOF.
1165        let code = [op::PUSH3, 0xAB, 0xCD];
1166        let bc = Bytecode::test(&code);
1167        let inst = Inst::from_usize(0);
1168        let data = bc.inst(inst);
1169        assert_eq!(bc.get_imm(inst), Some([0xAB, 0xCD].as_slice()));
1170        assert_eq!(bc.get_push_value(data), U256::from(0xABCD00));
1171
1172        // PUSH1 with no immediate bytes at all.
1173        let code = [op::PUSH1];
1174        let bc = Bytecode::test(&code);
1175        let inst = Inst::from_usize(0);
1176        let data = bc.inst(inst);
1177        assert_eq!(bc.get_imm(inst), None);
1178        assert_eq!(bc.get_push_value(data), U256::ZERO);
1179
1180        // Non-truncated PUSH2 — full immediate.
1181        let code = [op::PUSH2, 0x42, 0xFF];
1182        let bc = Bytecode::test(&code);
1183        let data = bc.inst(Inst::from_usize(0));
1184        assert_eq!(bc.get_push_value(data), U256::from(0x42FF));
1185    }
1186}