Skip to main content

revmc_codegen/bytecode/passes/
memory_sections.rs

1use crate::bytecode::{Block, Bytecode, Inst, InstFlags};
2use bitvec::vec::BitVec;
3use core::fmt;
4use oxc_index::{Idx, IndexVec, index_vec};
5use std::collections::VecDeque;
6
7/// Memory-size facts before a memory access.
8#[derive(Clone, Copy, Default, PartialEq, Eq)]
9pub(crate) struct MemorySection {
10    /// A lower bound on memory size before the instruction executes.
11    pub(crate) known_size: u64,
12    /// The exact memory size required by this instruction, if known.
13    pub(crate) required_size: u64,
14    /// The exact size to pass directly to `mresize`.
15    ///
16    /// This is non-zero only when analysis proves that memory is smaller than this instruction's
17    /// exact required size before the instruction executes, so codegen can call `mresize` without
18    /// first loading and comparing `ecx.mem_len`.
19    pub(crate) direct_resize_size: u64,
20}
21
22impl fmt::Debug for MemorySection {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        if self.is_empty() {
25            f.write_str("MemorySection::EMPTY")
26        } else {
27            f.debug_struct("MemorySection")
28                .field("known_size", &self.known_size)
29                .field("required_size", &self.required_size)
30                .field("direct_resize_size", &self.direct_resize_size)
31                .finish()
32        }
33    }
34}
35
36impl MemorySection {
37    /// Returns `true` if the section is empty.
38    #[inline]
39    pub(crate) fn is_empty(self) -> bool {
40        self == Self::default()
41    }
42}
43
44#[derive(Clone)]
45struct IndexBitSet<I: Idx> {
46    bits: BitVec,
47    _marker: std::marker::PhantomData<fn() -> I>,
48}
49
50impl<I: Idx> IndexBitSet<I> {
51    fn new(len: usize) -> Self {
52        Self { bits: BitVec::repeat(false, len), _marker: std::marker::PhantomData }
53    }
54
55    fn insert(&mut self, index: I) {
56        self.bits.set(index.index(), true);
57    }
58
59    fn remove(&mut self, index: I) {
60        self.bits.set(index.index(), false);
61    }
62
63    fn contains(&self, index: I) -> bool {
64        self.bits[index.index()]
65    }
66
67    fn iter(&self) -> impl Iterator<Item = I> + '_ {
68        self.bits.iter_ones().map(I::from_usize)
69    }
70}
71
72/// FIFO worklist with deduplication.
73struct Worklist {
74    queue: VecDeque<Block>,
75    in_queue: IndexBitSet<Block>,
76}
77
78impl Worklist {
79    fn new(size: usize) -> Self {
80        Self { queue: VecDeque::new(), in_queue: IndexBitSet::new(size) }
81    }
82
83    fn push(&mut self, id: Block) {
84        if !self.in_queue.contains(id) {
85            self.in_queue.insert(id);
86            self.queue.push_back(id);
87        }
88    }
89
90    fn pop(&mut self) -> Option<Block> {
91        let id = self.queue.pop_front()?;
92        self.in_queue.remove(id);
93        Some(id)
94    }
95}
96
97#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
98struct BlockMemoryState {
99    /// Lower-bound memory size known at block entry.
100    known_size: Option<u64>,
101    /// Exact memory size known at block entry.
102    ///
103    /// This is dropped to `None` at joins with different predecessor sizes, loops, and unknown
104    /// accesses. It is used only to prove direct `mresize` calls.
105    exact_known_size: Option<u64>,
106}
107
108impl BlockMemoryState {
109    fn join(&mut self, incoming: Self) -> bool {
110        let old = *self;
111        *self = join_state(*self, incoming);
112        *self != old
113    }
114}
115
116/// Memory section analysis state.
117pub(crate) struct MemorySectionAnalysis {
118    /// Minimum memory size required by all constant-size accesses in each block.
119    block_min_required_sizes: IndexVec<Block, u64>,
120    /// Exact memory size required by all accesses in each block, if all are known.
121    block_exact_required_sizes: IndexVec<Block, Option<u64>>,
122    /// Memory facts known at each block entry.
123    block_entry_states: IndexVec<Block, BlockMemoryState>,
124    dynamic_jump_blocks: IndexBitSet<Block>,
125    dynamic_jump_targets: IndexBitSet<Block>,
126    sections: IndexVec<Inst, MemorySection>,
127}
128
129impl MemorySectionAnalysis {
130    pub(crate) fn new(bytecode: &Bytecode<'_>) -> Self {
131        let mut dynamic_jump_blocks = IndexBitSet::new(bytecode.cfg.blocks.len());
132        let mut dynamic_jump_targets = IndexBitSet::new(bytecode.cfg.blocks.len());
133        if bytecode.has_dynamic_jumps() {
134            for bid in bytecode.cfg.blocks.indices() {
135                let block = &bytecode.cfg.blocks[bid];
136                let head = bytecode.inst(block.insts.start);
137                if head.is_reachable_jumpdest(true) {
138                    dynamic_jump_targets.insert(bid);
139                }
140
141                let term = bytecode.inst(block.terminator());
142                if term.is_jump()
143                    && !term.is_static_jump()
144                    && !term.flags.contains(InstFlags::INVALID_JUMP)
145                {
146                    dynamic_jump_blocks.insert(bid);
147                }
148            }
149        }
150
151        Self {
152            block_min_required_sizes: index_vec![0; bytecode.cfg.blocks.len()],
153            block_exact_required_sizes: index_vec![Some(0); bytecode.cfg.blocks.len()],
154            block_entry_states: index_vec![BlockMemoryState::default(); bytecode.cfg.blocks.len()],
155            dynamic_jump_blocks,
156            dynamic_jump_targets,
157            sections: index_vec![MemorySection::default(); bytecode.insts.len()],
158        }
159    }
160
161    /// Runs memory section analysis.
162    pub(crate) fn run(mut self, bytecode: &Bytecode<'_>) -> IndexVec<Inst, MemorySection> {
163        self.compute_block_memory_summaries(bytecode);
164        self.compute_block_entry_states(bytecode);
165        self.save_sections(bytecode);
166        self.sections
167    }
168
169    fn compute_block_memory_summaries(&mut self, bytecode: &Bytecode<'_>) {
170        for bid in bytecode.cfg.blocks.indices() {
171            let block = &bytecode.cfg.blocks[bid];
172            let mut min_required_size = 0;
173            let mut exact_required_size = Some(0);
174            for inst in block.insts() {
175                for (offset, len) in bytecode.const_memory_accesses(inst).into_iter().flatten() {
176                    min_required_size =
177                        min_required_size.max(min_memory_size_for_access(offset, len));
178                    exact_required_size = exact_required_size
179                        .zip(exact_memory_size_for_access(offset, len))
180                        .map(|(block_size, access_size)| block_size.max(access_size));
181                }
182            }
183            self.block_min_required_sizes[bid] = min_required_size;
184            self.block_exact_required_sizes[bid] = exact_required_size;
185        }
186    }
187
188    fn compute_block_entry_states(&mut self, bytecode: &Bytecode<'_>) {
189        let entry = Block::from_usize(0);
190        self.block_entry_states[entry] =
191            BlockMemoryState { known_size: Some(0), exact_known_size: Some(0) };
192
193        let dynamic_jump_targets = self.dynamic_jump_targets.clone();
194        let mut dynamic_exit_state = BlockMemoryState::default();
195
196        let mut worklist = Worklist::new(bytecode.cfg.blocks.len());
197        worklist.push(entry);
198
199        while let Some(bid) = worklist.pop() {
200            let exit_state = self.block_exit_state(bid);
201            if exit_state.known_size.is_none() {
202                continue;
203            }
204
205            for &succ in &bytecode.cfg.blocks[bid].succs {
206                self.update_entry_state(succ, exit_state, &mut worklist);
207            }
208            if self.dynamic_jump_blocks.contains(bid) && dynamic_exit_state.join(exit_state) {
209                for succ in dynamic_jump_targets.iter() {
210                    self.update_entry_state(succ, dynamic_exit_state, &mut worklist);
211                }
212            }
213        }
214    }
215
216    fn update_entry_state(
217        &mut self,
218        bid: Block,
219        incoming: BlockMemoryState,
220        worklist: &mut Worklist,
221    ) {
222        if self.block_entry_states[bid].join(incoming) {
223            worklist.push(bid);
224        }
225    }
226
227    fn block_exit_state(&self, bid: Block) -> BlockMemoryState {
228        let state = self.block_entry_states[bid];
229        let known_size =
230            state.known_size.map(|known_size| known_size.max(self.block_min_required_sizes[bid]));
231        let exact_known_size = state
232            .exact_known_size
233            .zip(self.block_exact_required_sizes[bid])
234            .map(|(exact_known_size, required_size)| exact_known_size.max(required_size));
235        BlockMemoryState { known_size, exact_known_size }
236    }
237
238    fn save_sections(&mut self, bytecode: &Bytecode<'_>) {
239        for bid in bytecode.cfg.blocks.indices() {
240            let state = self.block_entry_states[bid];
241            let Some(mut known_size) = state.known_size else { continue };
242            let mut exact_known_size = state.exact_known_size;
243            let block = &bytecode.cfg.blocks[bid];
244
245            for inst in block.insts() {
246                for (offset, len) in bytecode.const_memory_accesses(inst).into_iter().flatten() {
247                    let exact_required_size = exact_memory_size_for_access(offset, len);
248                    let min_required_size = min_memory_size_for_access(offset, len);
249                    trace!(
250                        %bid,
251                        %inst,
252                        pc = bytecode.pc(inst),
253                        opcode = %bytecode.inst(inst).to_op(),
254                        ?offset,
255                        ?len,
256                        known_size,
257                        min_required_size,
258                        ?exact_required_size,
259                        "memory access"
260                    );
261                    if known_size != 0 || exact_required_size.is_some_and(|size| size != 0) {
262                        let section = &mut self.sections[inst];
263                        if section.known_size == 0 && section.required_size == 0 {
264                            section.known_size = known_size;
265                        }
266                        if let Some(exact_required_size) = exact_required_size {
267                            section.required_size = section.required_size.max(exact_required_size);
268                            if exact_known_size.is_some_and(|size| size < exact_required_size) {
269                                section.direct_resize_size =
270                                    section.direct_resize_size.max(exact_required_size);
271                            }
272                        }
273                    }
274                    known_size = known_size.max(min_required_size);
275                    exact_known_size = exact_known_size.zip(exact_required_size).map(
276                        |(exact_known_size, exact_required_size)| {
277                            exact_known_size.max(exact_required_size)
278                        },
279                    );
280                }
281            }
282        }
283    }
284}
285
286fn join_state(
287    entry_state: BlockMemoryState,
288    pred_exit_state: BlockMemoryState,
289) -> BlockMemoryState {
290    let Some(pred_known_size) = pred_exit_state.known_size else { return entry_state };
291    let Some(entry_known_size) = entry_state.known_size else { return pred_exit_state };
292
293    let known_size = Some(entry_known_size.min(pred_known_size));
294    let exact_known_size = match (entry_state.exact_known_size, pred_exit_state.exact_known_size) {
295        (Some(entry_size), Some(pred_size)) if entry_size == pred_size => Some(entry_size),
296        _ => None,
297    };
298    BlockMemoryState { known_size, exact_known_size }
299}
300
301#[inline]
302fn exact_memory_size_for_access(offset: Option<u64>, len: Option<u64>) -> Option<u64> {
303    if matches!(len, Some(0)) {
304        return Some(0);
305    }
306    let size = offset?.saturating_add(len?);
307    Some(round_memory_size(size))
308}
309
310fn min_memory_size_for_access(offset: Option<u64>, len: Option<u64>) -> u64 {
311    match (offset, len) {
312        (_, Some(0) | None) => 0,
313        (Some(offset), Some(len)) => round_memory_size(offset.saturating_add(len)),
314        (None, Some(len)) => round_memory_size(len),
315    }
316}
317
318#[inline]
319fn round_memory_size(size: u64) -> u64 {
320    size.saturating_add(31) / 32 * 32
321}
322
323#[cfg(test)]
324mod tests {
325    use super::{MemorySection, exact_memory_size_for_access, min_memory_size_for_access};
326    use crate::bytecode::{
327        Bytecode, Inst,
328        passes::block_analysis::tests::{analyze_asm, analyze_asm_spec},
329    };
330    use revm_bytecode::opcode as op;
331    use revm_primitives::hardfork::SpecId;
332
333    fn nth_inst(bytecode: &Bytecode<'_>, opcode: u8, n: usize) -> Inst {
334        bytecode
335            .iter_all_insts()
336            .filter_map(|(inst, data)| (data.opcode == opcode).then_some(inst))
337            .nth(n)
338            .unwrap()
339    }
340
341    fn section(bytecode: &Bytecode<'_>, inst: Inst) -> MemorySection {
342        bytecode.memory_section(inst)
343    }
344
345    fn assert_section(bytecode: &Bytecode<'_>, inst: Inst, known_size: u64, required_size: u64) {
346        let direct_resize_size = if known_size < required_size { required_size } else { 0 };
347        assert_eq!(
348            section(bytecode, inst),
349            MemorySection { known_size, required_size, direct_resize_size }
350        );
351    }
352
353    #[test]
354    fn exact_memory_size_rounds_up_to_words() {
355        assert_eq!(exact_memory_size_for_access(Some(0), Some(0)), Some(0));
356        assert_eq!(exact_memory_size_for_access(Some(0), Some(1)), Some(32));
357        assert_eq!(exact_memory_size_for_access(Some(1), Some(32)), Some(64));
358        assert_eq!(exact_memory_size_for_access(Some(64), Some(32)), Some(96));
359    }
360
361    #[test]
362    fn unknown_offset_or_len_has_no_exact_required_size() {
363        assert_eq!(exact_memory_size_for_access(None, Some(32)), None);
364        assert_eq!(exact_memory_size_for_access(Some(32), None), None);
365        assert_eq!(exact_memory_size_for_access(None, None), None);
366        assert_eq!(exact_memory_size_for_access(None, Some(0)), Some(0));
367    }
368
369    #[test]
370    fn unknown_accesses_have_conservative_min_required_size() {
371        assert_eq!(min_memory_size_for_access(None, Some(32)), 32);
372        assert_eq!(min_memory_size_for_access(Some(32), None), 0);
373        assert_eq!(min_memory_size_for_access(None, None), 0);
374        assert_eq!(min_memory_size_for_access(None, Some(0)), 0);
375    }
376
377    #[test]
378    fn same_block_accesses_use_previous_known_size() {
379        let bytecode = analyze_asm("PUSH0 PUSH 64 MSTORE PUSH0 MLOAD STOP");
380        let mstore = nth_inst(&bytecode, op::MSTORE, 0);
381        let mload = nth_inst(&bytecode, op::MLOAD, 0);
382
383        assert_section(&bytecode, mstore, 0, 96);
384        assert_section(&bytecode, mload, 96, 32);
385    }
386
387    #[test]
388    fn known_size_propagates_across_blocks() {
389        let bytecode = analyze_asm(
390            "
391            PUSH0
392            PUSH 64
393            MSTORE
394            PUSH %target
395            JUMP
396        target:
397            JUMPDEST
398            PUSH0
399            MLOAD
400            STOP
401        ",
402        );
403        let mload = nth_inst(&bytecode, op::MLOAD, 0);
404        assert_section(&bytecode, mload, 96, 32);
405    }
406
407    #[test]
408    fn exact_size_propagates_across_blocks() {
409        let bytecode = analyze_asm(
410            "
411            PUSH0
412            PUSH 64
413            MSTORE
414            PUSH %target
415            JUMP
416        target:
417            JUMPDEST
418            PUSH0
419            PUSH 128
420            MSTORE
421            STOP
422        ",
423        );
424        let second_mstore = nth_inst(&bytecode, op::MSTORE, 1);
425        assert_section(&bytecode, second_mstore, 96, 160);
426    }
427
428    #[test]
429    fn branch_join_uses_minimum_predecessor_size() {
430        let bytecode = analyze_asm(
431            "
432            PUSH0
433            PUSH 64
434            MSTORE
435            PUSH 1
436            PUSH %large
437            JUMPI
438            PUSH %join
439            JUMP
440        large:
441            JUMPDEST
442            PUSH0
443            PUSH 128
444            MSTORE
445        join:
446            JUMPDEST
447            PUSH0
448            MLOAD
449            STOP
450        ",
451        );
452        let mload = nth_inst(&bytecode, op::MLOAD, 0);
453        assert_section(&bytecode, mload, 96, 32);
454    }
455
456    #[test]
457    fn loop_backedge_does_not_inflate_entry_known_size() {
458        let bytecode = analyze_asm(
459            "
460        loop:
461            JUMPDEST
462            PUSH 128
463            MLOAD
464            PUSH0
465            PUSH 49984
466            MSTORE
467            PUSH %loop
468            JUMP
469        ",
470        );
471        let mload = nth_inst(&bytecode, op::MLOAD, 0);
472        let mstore = nth_inst(&bytecode, op::MSTORE, 0);
473
474        assert_eq!(
475            section(&bytecode, mload),
476            MemorySection { known_size: 0, required_size: 160, direct_resize_size: 0 }
477        );
478        assert_eq!(
479            section(&bytecode, mstore),
480            MemorySection { known_size: 160, required_size: 50016, direct_resize_size: 0 }
481        );
482    }
483
484    #[test]
485    fn unknown_nonzero_access_does_not_create_exact_section() {
486        let bytecode = analyze_asm(
487            "
488            PUSH0
489            PUSH 64
490            MSTORE
491            PUSH0
492            CALLDATALOAD
493            PUSH0
494            SWAP1
495            MSTORE
496            STOP
497        ",
498        );
499        let dynamic_mstore = nth_inst(&bytecode, op::MSTORE, 1);
500
501        assert_eq!(bytecode.const_memory_access(dynamic_mstore), Some((None, Some(32))));
502        assert_section(&bytecode, dynamic_mstore, 96, 0);
503    }
504
505    #[test]
506    fn unknown_offset_known_len_propagates_minimum_size() {
507        let bytecode = analyze_asm(
508            "
509            PUSH0
510            CALLDATALOAD
511            PUSH0
512            SWAP1
513            MSTORE
514            PUSH0
515            MLOAD
516            STOP
517        ",
518        );
519        let mload = nth_inst(&bytecode, op::MLOAD, 0);
520
521        assert_section(&bytecode, mload, 32, 32);
522    }
523
524    #[test]
525    fn zero_len_access_is_exact_even_with_unknown_offset() {
526        let bytecode = analyze_asm("PUSH0 PUSH0 PUSH0 CALLDATALOAD CALLDATACOPY STOP");
527        let copy = nth_inst(&bytecode, op::CALLDATACOPY, 0);
528
529        assert_eq!(bytecode.const_memory_access(copy), Some((None, Some(0))));
530        assert_eq!(section(&bytecode, copy), MemorySection::default());
531    }
532
533    #[test]
534    fn log0_uses_memory_offset_and_len() {
535        let bytecode = analyze_asm("PUSH 32 PUSH 64 LOG0 STOP");
536        let log0 = nth_inst(&bytecode, op::LOG0, 0);
537
538        assert_eq!(bytecode.const_memory_access(log0), Some((Some(64), Some(32))));
539        assert_section(&bytecode, log0, 0, 96);
540    }
541
542    #[test]
543    fn call_tracks_input_and_output_memory_ranges() {
544        let bytecode = analyze_asm(
545            "
546            PUSH 64     ; out len
547            PUSH 96     ; out offset
548            PUSH 32     ; in len
549            PUSH 0      ; in offset
550            PUSH 0      ; value
551            PUSH 1      ; to
552            PUSH 50000  ; gas
553            CALL
554            STOP
555        ",
556        );
557        let call = nth_inst(&bytecode, op::CALL, 0);
558
559        assert_eq!(
560            bytecode.const_memory_accesses(call),
561            [Some((Some(0), Some(32))), Some((Some(96), Some(64)))]
562        );
563        assert_section(&bytecode, call, 0, 160);
564    }
565
566    #[test]
567    fn staticcall_tracks_input_and_output_memory_ranges() {
568        let bytecode = analyze_asm(
569            "
570            PUSH 32     ; out len
571            PUSH 128    ; out offset
572            PUSH 64     ; in len
573            PUSH 0      ; in offset
574            PUSH 1      ; to
575            PUSH 50000  ; gas
576            STATICCALL
577            STOP
578        ",
579        );
580        let call = nth_inst(&bytecode, op::STATICCALL, 0);
581
582        assert_eq!(
583            bytecode.const_memory_accesses(call),
584            [Some((Some(0), Some(64))), Some((Some(128), Some(32)))]
585        );
586        assert_section(&bytecode, call, 0, 160);
587    }
588
589    #[test]
590    fn create_tracks_initcode_memory_range() {
591        let bytecode = analyze_asm("PUSH 32 PUSH 64 PUSH0 CREATE STOP");
592        let create = nth_inst(&bytecode, op::CREATE, 0);
593
594        assert_eq!(bytecode.const_memory_access(create), Some((Some(64), Some(32))));
595        assert_section(&bytecode, create, 0, 96);
596    }
597
598    #[test]
599    fn disabled_memory_opcode_is_ignored() {
600        let bytecode = analyze_asm_spec("PUSH 32 PUSH0 PUSH0 MCOPY STOP", SpecId::SHANGHAI);
601        let mcopy = nth_inst(&bytecode, op::MCOPY, 0);
602
603        assert_eq!(bytecode.const_memory_access(mcopy), None);
604        assert_eq!(section(&bytecode, mcopy), MemorySection::default());
605    }
606}