revmc_codegen/bytecode/passes/
dedup.rs

1//! Block deduplication pass.
2//!
3//! Identifies structurally identical non-fallthrough blocks (same opcode + immediate sequence)
4//! and eliminates duplicates by marking them as dead code and redirecting predecessors to a
5//! single canonical copy.
6
7use super::block_analysis::{Block, BlockData, Snapshots};
8use crate::bytecode::{Bytecode, Inst, InstData, InstFlags};
9use alloy_primitives::map::HashMap;
10use oxc_index::IndexVec;
11use revm_bytecode::opcode as op;
12use smallvec::SmallVec;
13
14/// Dedup key: instruction-level structural fingerprint, CFG successor blocks, and terminator
15/// jump kind.
16///
17/// The fingerprint encodes each instruction's opcode plus its (interned) immediate so that
18/// PUSH operations with the same value, and DUPN/SWAPN/EXCHANGE with the same operand, hash
19/// equal across blocks. Raw bytes are intentionally not consulted — all immediate data is
20/// pre-interned at parse time on `InstData`.
21///
22/// Structural equality alone is insufficient for JUMP-terminated blocks because block
23/// analysis may resolve identical copies to different static targets depending on incoming
24/// stack context (e.g. different return-address values), so successors are part of the key.
25///
26/// The `is_multi_jump` discriminator prevents MULTI_JUMP dispatcher blocks from colliding
27/// with STATIC_JUMP blocks that happen to share the same fingerprint and successor set.
28#[derive(Clone, PartialEq, Eq, Hash)]
29struct DedupKey {
30    /// Instruction fingerprint: per-instruction `(opcode, immediate)` tuples encoded as bytes.
31    fingerprint: SmallVec<[u8; 32]>,
32    /// CFG successors for this block. Two structurally identical blocks are only merged when
33    /// their successor sets also match.
34    succs: SmallVec<[Block; 4]>,
35    /// Whether the block's terminator is a MULTI_JUMP.
36    is_multi_jump: bool,
37}
38
39impl<'a> Bytecode<'a> {
40    /// Deduplicate structurally identical non-fallthrough blocks.
41    ///
42    /// Eligible blocks are those whose terminator cannot fall through (diverging instructions
43    /// like REVERT/STOP/RETURN, or unconditional JUMP). For every group of byte-identical
44    /// blocks, keeps one canonical copy while marking them as dead code.
45    /// A redirect entry is stored in [`Bytecode::redirects`] so the translator can map
46    /// dead instructions to the canonical block's IR block. The CFG is rebuilt after each
47    /// iteration so successor/predecessor lists stay consistent.
48    ///
49    /// `local_snapshots` are the block-local snapshots computed by `block_analysis_local`
50    /// (before the global fixpoint). After merging, the canonical block's snapshots are
51    /// restored to these local values because the global snapshots may be context-specific
52    /// and stale once multiple predecessors are merged.
53    #[instrument(name = "dedup", level = "debug", skip_all)]
54    pub(crate) fn dedup_blocks(&mut self, local_snapshots: &Snapshots) {
55        let mut key_to_blocks = HashMap::<DedupKey, SmallVec<[Block; 4]>>::default();
56        let mut total_deduped = 0usize;
57        loop {
58            key_to_blocks.clear();
59            let deduped = self.dedup_blocks_once(&mut key_to_blocks, local_snapshots);
60            total_deduped += deduped;
61            if deduped == 0 {
62                break;
63            }
64            // Compress redirect chains so that earlier redirects (e.g. t2 -> t1) are
65            // updated when their target gets deduped in a later round (t1 -> t0).
66            // Without this, rebuild_cfg resolves edges only one hop, leaving stale
67            // intermediate targets in DedupKey.succs (preventing valid merges) and
68            // in translate.rs inst_entries (causing InvalidJump on valid bytecode).
69            for inst in self.redirects.keys().copied().collect::<Vec<_>>() {
70                let mut target = self.redirects[&inst];
71                let original = target;
72                while let Some(&next) = self.redirects.get(&target) {
73                    target = next;
74                }
75                if target != original {
76                    self.redirects.insert(inst, target);
77                }
78            }
79            self.rebuild_cfg();
80        }
81        debug!(deduped = total_deduped, "finished");
82    }
83
84    /// Single dedup iteration. Returns the number of blocks deduped.
85    fn dedup_blocks_once(
86        &mut self,
87        key_to_blocks: &mut HashMap<DedupKey, SmallVec<[Block; 4]>>,
88        local_snapshots: &Snapshots,
89    ) -> usize {
90        for bid in self.cfg.blocks.indices() {
91            let block = &self.cfg.blocks[bid];
92            let term = &self.insts[block.terminator()];
93
94            // Only dedup blocks whose terminator cannot fall through (diverging or
95            // unconditional JUMP). JUMPI blocks fall through to position-dependent targets.
96            if term.can_fall_through() {
97                continue;
98            }
99
100            // Reachable JUMPDESTs with dynamic jumps cannot be deduped because any
101            // JUMPDEST could be reached from an unresolved dynamic JUMP with an
102            // arbitrary stack context.
103            let first = &self.insts[block.insts.start];
104            if self.has_dynamic_jumps && first.is_reachable_jumpdest(true) {
105                continue;
106            }
107
108            // Skip blocks containing PC — it returns the current program counter,
109            // so identical bytecode at different positions produces different results.
110            if block.insts().any(|i| self.insts[i].opcode == op::PC) {
111                continue;
112            }
113
114            // Skip unresolved dynamic JUMPs — any target is possible at runtime.
115            if term.opcode == op::JUMP && !term.flags.contains(InstFlags::STATIC_JUMP) {
116                continue;
117            }
118
119            let fingerprint = block_fingerprint(&self.insts, block);
120            if fingerprint.is_empty() {
121                continue;
122            }
123
124            let is_multi_jump = term.flags.contains(InstFlags::MULTI_JUMP);
125            key_to_blocks
126                .entry(DedupKey { fingerprint, succs: block.succs.clone(), is_multi_jump })
127                .or_default()
128                .push(bid);
129        }
130
131        let mut deduped = 0usize;
132        for group in key_to_blocks.values() {
133            if group.len() < 2 {
134                continue;
135            }
136            let (&canonical, dups) = group.split_first().unwrap();
137            let canonical_first_inst = self.cfg.blocks[canonical].insts.start;
138
139            // Restore block-local snapshots for the canonical block. The global
140            // fixpoint may have recorded context-specific constants that become stale
141            // once the canonical block serves multiple predecessors after merging.
142            // Block-local constants (computed without incoming stack state) remain valid.
143            self.snapshots.restore_from(self.cfg.blocks[canonical].insts(), local_snapshots);
144
145            for &dup in dups {
146                deduped += 1;
147                trace!("deduped: {from} -> {to}", from = dup, to = canonical);
148
149                // Mark all instructions in the duplicate block as dead.
150                let dup_block = &self.cfg.blocks[dup];
151                for i in dup_block.insts() {
152                    self.insts[i].flags |= InstFlags::DEAD_CODE;
153                }
154
155                let dup_first_inst = dup_block.insts.start;
156                self.redirects.insert(dup_first_inst, canonical_first_inst);
157
158                // If the duplicate was a reachable JUMPDEST, the canonical must be too
159                // so that sections/gas-checks treat it as a jump target entry point.
160                if self.insts[dup_first_inst].is_jumpdest()
161                    && self.insts[dup_first_inst].is_jumpdest_reachable()
162                    && self.insts[canonical_first_inst].is_jumpdest()
163                {
164                    self.insts[canonical_first_inst].set_jumpdest_reachable();
165                }
166
167                // Merge multi-jump targets from the duplicate into the canonical block.
168                // Without this, valid case PCs unique to the duplicate dispatcher would
169                // be lost and fall into the default InvalidJump path at runtime.
170                let canonical_term = self.cfg.blocks[canonical].terminator();
171                let dup_term = dup_block.terminator();
172                if self.insts[dup_term].flags.contains(InstFlags::MULTI_JUMP)
173                    && let Some(dup_targets) = self.multi_jump_targets.remove(&dup_term)
174                {
175                    let canonical_targets =
176                        self.multi_jump_targets.get_mut(&canonical_term).unwrap();
177                    for t in dup_targets {
178                        if !canonical_targets.contains(&t) {
179                            canonical_targets.push(t);
180                        }
181                    }
182                }
183
184                // Redirect predecessors that reach the duplicate via static jumps.
185                for &pred in &dup_block.preds {
186                    let term_inst = self.cfg.blocks[pred].terminator();
187                    let term = &self.insts[term_inst];
188                    let is_static = term.is_static_jump()
189                        && !term.flags.contains(InstFlags::INVALID_JUMP)
190                        && !term.flags.contains(InstFlags::MULTI_JUMP)
191                        && term.static_jump_target() == dup_first_inst;
192                    if is_static {
193                        self.insts[term_inst].set_static_jump_target(canonical_first_inst);
194                    }
195                    // Multi-jump target PCs are not rewritten here: each carries a
196                    // distinct PC that callers push as a return address. The targets
197                    // were already merged into the canonical block above, and
198                    // `inst_entries` redirects (translate.rs) map the dead
199                    // instruction's IR entry to the canonical one.
200                }
201            }
202        }
203
204        deduped
205    }
206}
207
208/// Builds a structural fingerprint of a block from instruction data, without consulting the
209/// raw bytecode.
210///
211/// For each non-dead instruction in the block, encodes the opcode followed by the immediate
212/// payload (interned U256Idx for `PUSH*`, immediate byte for `DUPN`/`SWAPN`/`EXCHANGE`).
213/// JUMP/JUMPI carry no immediate and so contribute only their opcode; jump targets are
214/// already factored into the surrounding `DedupKey` via the block's CFG successors.
215fn block_fingerprint(insts: &IndexVec<Inst, InstData>, block: &BlockData) -> SmallVec<[u8; 32]> {
216    let mut buf: SmallVec<[u8; 32]> = SmallVec::new();
217    for i in block.insts() {
218        let data = &insts[i];
219        buf.push(data.opcode);
220        if data.imm_len() > 0 {
221            let mut imm = &data.data.to_ne_bytes()[..];
222            while let [0, rest @ ..] = imm {
223                imm = rest;
224            }
225            buf.extend_from_slice(imm);
226        }
227    }
228    buf
229}
230
231#[cfg(test)]
232mod tests {
233    use crate::bytecode::{
234        AnalysisConfig, InstFlags, passes::block_analysis::tests::analyze_asm_with,
235    };
236
237    #[test]
238    fn dedup_identical_revert_blocks() {
239        // Two JUMPI branches each fall through to an identical `PUSH1 0x00 / DUP1 / REVERT`.
240        let bytecode = analyze_asm_with(
241            "
242            PUSH0
243            CALLDATALOAD
244            PUSH %bb2
245            JUMPI
246            ; revert A
247            PUSH1 0x00
248            DUP1
249            REVERT
250        bb2:
251            JUMPDEST
252            PUSH0
253            CALLDATALOAD
254            PUSH %bb4
255            JUMPI
256            ; revert B (identical to A)
257            PUSH1 0x00
258            DUP1
259            REVERT
260        bb4:
261            JUMPDEST
262            STOP
263        ",
264            AnalysisConfig::DEDUP,
265        );
266
267        // One of the two `PUSH1 0x00 / DUP1 / REVERT` blocks should be redirected.
268        assert_eq!(
269            bytecode.redirects.len(),
270            1,
271            "expected exactly 1 redirect for 2 identical revert blocks",
272        );
273    }
274
275    #[test]
276    fn dedup_preserves_unique_blocks() {
277        // Two different diverging blocks — they should NOT be deduplicated.
278        let bytecode = analyze_asm_with(
279            "
280            PUSH0
281            CALLDATALOAD
282            PUSH %target
283            JUMPI
284            ; revert with 0
285            PUSH1 0x00
286            DUP1
287            REVERT
288        target:
289            JUMPDEST
290            STOP
291        ",
292            AnalysisConfig::DEDUP,
293        );
294
295        assert!(bytecode.redirects.is_empty(), "should not dedup different blocks");
296    }
297
298    #[test]
299    fn dedup_three_identical_blocks() {
300        // Three identical `PUSH1 0x00 / DUP1 / REVERT` blocks.
301        let bytecode = analyze_asm_with(
302            "
303            PUSH0
304            CALLDATALOAD
305            PUSH %bb2
306            JUMPI
307            ; revert A
308            PUSH1 0x00
309            DUP1
310            REVERT
311        bb2:
312            JUMPDEST
313            PUSH0
314            CALLDATALOAD
315            PUSH %bb4
316            JUMPI
317            ; revert B
318            PUSH1 0x00
319            DUP1
320            REVERT
321        bb4:
322            JUMPDEST
323            PUSH0
324            CALLDATALOAD
325            PUSH %bb6
326            JUMPI
327            ; revert C
328            PUSH1 0x00
329            DUP1
330            REVERT
331        bb6:
332            JUMPDEST
333            STOP
334        ",
335            AnalysisConfig::DEDUP,
336        );
337
338        let redirect_count = bytecode.redirects.len();
339        assert_eq!(redirect_count, 2, "expected 2 redirects, got {redirect_count}");
340    }
341
342    #[test]
343    fn dedup_skips_pc_opcode() {
344        // Two identical `PC / PUSH1 0x00 / REVERT` blocks — PC is position-dependent,
345        // so they must NOT be deduplicated.
346        let bytecode = analyze_asm_with(
347            "
348            PUSH0
349            CALLDATALOAD
350            PUSH %bb2
351            JUMPI
352            ; PC + revert A
353            PC
354            PUSH1 0x00
355            REVERT
356        bb2:
357            JUMPDEST
358            PUSH0
359            CALLDATALOAD
360            PUSH %bb4
361            JUMPI
362            ; PC + revert B (same bytes, different PC value)
363            PC
364            PUSH1 0x00
365            REVERT
366        bb4:
367            JUMPDEST
368            STOP
369        ",
370            AnalysisConfig::DEDUP,
371        );
372
373        assert!(bytecode.redirects.is_empty(), "should not dedup blocks containing PC");
374    }
375
376    #[test]
377    fn dedup_jump_same_target() {
378        // Two byte-identical fallthrough JUMP tails resolved to the same target.
379        let bytecode = analyze_asm_with(
380            "
381            CALLDATASIZE
382            PUSH %path1
383            JUMPI
384            PUSH0
385            PUSH1 0xFF
386            JUMPI
387            PUSH %target
388            JUMP
389        path1:
390            JUMPDEST
391            PUSH0
392            PUSH1 0xFF
393            JUMPI
394            PUSH %target
395            JUMP
396        target:
397            JUMPDEST
398            STOP
399        ",
400            AnalysisConfig::DEDUP,
401        );
402        assert_eq!(bytecode.redirects.len(), 1, "same-target JUMP tails should be deduped");
403    }
404
405    #[test]
406    fn dedup_jump_different_targets() {
407        // Two byte-identical non-JUMPDEST JUMP tails with different resolved static targets.
408        // Must NOT be merged — the JUMP target is context-sensitive.
409        // Regression test for:
410        // revmc-dedup-non-jumpdest-fallthrough-jump-tail-static-target-confusion
411        let bytecode = analyze_asm_with(
412            "
413            CALLDATASIZE
414            PUSH %path1
415            JUMPI
416
417            PUSH %ret0
418            PUSH0
419            PUSH1 0xFF
420            JUMPI
421            PUSH1 0x42
422            SWAP1
423            JUMP
424
425        path1:
426            JUMPDEST
427            PUSH %ret1
428            PUSH0
429            PUSH1 0xFF
430            JUMPI
431            PUSH1 0x42
432            SWAP1
433            JUMP
434
435        ret0:
436            JUMPDEST
437            POP
438            PUSH1 0xAA
439            STOP
440
441        ret1:
442            JUMPDEST
443            POP
444            PUSH1 0xBB
445            STOP
446        ",
447            AnalysisConfig::DEDUP,
448        );
449
450        assert!(
451            bytecode.redirects.is_empty(),
452            "different-target JUMP tails must not be deduped, got {} redirect(s)",
453            bytecode.redirects.len(),
454        );
455    }
456
457    #[test]
458    fn dedup_weth() {
459        let code =
460            revm_primitives::hex::decode(include_str!("../../../../../data/weth.rt.hex").trim())
461                .unwrap();
462        let mut bytecode = crate::Bytecode::test(code);
463        bytecode.config = AnalysisConfig::DEDUP;
464        bytecode.analyze().unwrap();
465
466        assert_eq!(bytecode.redirects.len(), 13);
467    }
468
469    #[test]
470    fn dedup_skips_dynamic_jump_sstore_tail() {
471        // Two non-JUMPDEST byte-identical `SSTORE ; JUMP` tails with unresolved dynamic
472        // JUMP terminators must NOT be deduped.
473        let bytecode = analyze_asm_with(
474            "
475            PUSH0
476            CALLDATALOAD
477            PUSH1 0x20
478            CALLDATALOAD
479            PUSH %path_b
480            JUMPI
481
482            ; path A
483            PUSH1 0x11
484            PUSH1 0x01
485            PUSH0
486            PUSH %not_taken_a
487            JUMPI
488            SSTORE
489            JUMP
490
491        not_taken_a:
492            JUMPDEST
493            INVALID
494
495        path_b:
496            JUMPDEST
497            PUSH1 0x22
498            PUSH1 0x02
499            PUSH0
500            PUSH %not_taken_b
501            JUMPI
502            SSTORE
503            JUMP
504
505        not_taken_b:
506            JUMPDEST
507            INVALID
508
509        done:
510            JUMPDEST
511            STOP
512        ",
513            AnalysisConfig::DEDUP,
514        );
515
516        assert!(
517            bytecode.redirects.is_empty(),
518            "should not dedup blocks ending in unresolved dynamic JUMP",
519        );
520    }
521
522    #[test]
523    fn dedup_multi_jump_source_merges_targets() {
524        // Two byte-identical MULTI_JUMP dispatcher blocks (dispatcher_a, dispatcher_b)
525        // each route to two identical JUMPDEST+STOP return blocks. After the return
526        // blocks are deduped, the two dispatchers become identical in (bytes, succs).
527        // Dedup must merge their multi_jump_targets so the canonical switch emits cases
528        // for ALL valid return PCs.
529        let bytecode = analyze_asm_with(
530            "
531            ; Branch on first calldata word.
532            PUSH0
533            CALLDATALOAD
534            PUSH %path_b
535            JUMPI
536
537            ; Branch on second calldata word.
538            PUSH1 0x20
539            CALLDATALOAD
540            PUSH %path_a1
541            JUMPI
542            PUSH %ret0
543            PUSH %dispatcher_a
544            JUMP
545        path_a1:
546            JUMPDEST
547            PUSH %ret1
548            PUSH %dispatcher_a
549            JUMP
550
551        path_b:
552            JUMPDEST
553            PUSH1 0x20
554            CALLDATALOAD
555            PUSH %path_b1
556            JUMPI
557            PUSH %ret2
558            PUSH %dispatcher_b
559            JUMP
560        path_b1:
561            JUMPDEST
562            PUSH %ret3
563            PUSH %dispatcher_b
564            JUMP
565
566        ret0:
567            JUMPDEST
568            STOP
569        ret1:
570            JUMPDEST
571            STOP
572        ret2:
573            JUMPDEST
574            STOP
575        ret3:
576            JUMPDEST
577            STOP
578
579        dispatcher_a:
580            JUMPDEST
581            JUMP
582        dispatcher_b:
583            JUMPDEST
584            JUMP
585        ",
586            AnalysisConfig::DEDUP,
587        );
588
589        // The four STOP blocks are deduped (3 redirects), and one dispatcher is deduped
590        // (1 redirect) = 4 total.
591        assert_eq!(bytecode.redirects.len(), 4);
592
593        // Find the canonical dispatcher's multi_jump_targets — it must contain all 4
594        // original return PCs (possibly redirected to the canonical STOP inst).
595        let canonical_dispatcher_term = bytecode
596            .multi_jump_targets
597            .keys()
598            .find(|&&inst| !bytecode.insts[inst].is_dead_code())
599            .expect("should have a live MULTI_JUMP dispatcher");
600        let targets = &bytecode.multi_jump_targets[canonical_dispatcher_term];
601        assert_eq!(
602            targets.len(),
603            4,
604            "canonical dispatcher should have 4 multi-jump targets (merged), got {targets:?}",
605        );
606    }
607
608    #[test]
609    fn dedup_clears_stale_noop_flags() {
610        // Regression test for stale NOOP flags surviving dedup.
611        //
612        // Two byte-identical RETURN tails reached via different paths:
613        // - Path 0 pushes known constants (0x11, 0x22) → DSE can mark ADD as NOOP because the
614        //   output is a known constant (0x33).
615        // - Path 1 pushes CALLDATASIZE twice → ADD output is dynamic.
616        //
617        // After dedup merges the tails, the canonical block's snapshots are restored
618        // to local (which lack the incoming constants), but DSE re-runs and must NOT
619        // mark ADD as NOOP since its output is no longer a known constant.
620        let bytecode = analyze_asm_with(
621            "
622            CALLDATASIZE
623            PUSH %path1
624            JUMPI
625
626            ; Path 0: known constants.
627            PUSH1 0x11
628            PUSH1 0x22
629            PUSH %tail0
630            JUMP
631
632        path1:
633            JUMPDEST
634            ; Path 1: dynamic values.
635            CALLDATASIZE
636            CALLDATASIZE
637            PUSH %tail1
638            JUMP
639
640        tail0:
641            JUMPDEST
642            ADD
643            PUSH0
644            MSTORE
645            PUSH1 0x20
646            PUSH0
647            RETURN
648
649        tail1:
650            JUMPDEST
651            ADD
652            PUSH0
653            MSTORE
654            PUSH1 0x20
655            PUSH0
656            RETURN
657        ",
658            AnalysisConfig::DEDUP,
659        );
660
661        // Dedup should have merged the two tails.
662        assert_eq!(bytecode.redirects.len(), 1);
663
664        // The canonical block's ADD must NOT be marked NOOP — its output is dynamic
665        // under the merged (local) snapshots.
666        let canonical_start = *bytecode.redirects.values().next().unwrap();
667        let add_inst = canonical_start + 1; // JUMPDEST, ADD
668        assert!(
669            !bytecode.insts[add_inst].flags.contains(InstFlags::NOOP),
670            "canonical ADD must not be NOOP after dedup (stale flag from context-specific DSE)",
671        );
672    }
673
674    #[test]
675    fn dedup_invalidates_stale_const_snapshots() {
676        // Two byte-identical RETURN tails reached with different incoming constants.
677        // After dedup the canonical block must NOT retain stale const snapshots.
678        let bytecode = analyze_asm_with(
679            "
680            CALLDATASIZE
681            PUSH %path1
682            JUMPI
683
684            PUSH1 0xAA
685            PUSH %tail0
686            JUMP
687
688        path1:
689            JUMPDEST
690            PUSH1 0xBB
691            PUSH %tail1
692            JUMP
693
694        tail0:
695            JUMPDEST
696            PUSH1 0x01
697            ADD
698            PUSH0
699            MSTORE
700            PUSH1 0x20
701            PUSH0
702            RETURN
703
704        tail1:
705            JUMPDEST
706            PUSH1 0x01
707            ADD
708            PUSH0
709            MSTORE
710            PUSH1 0x20
711            PUSH0
712            RETURN
713        ",
714            AnalysisConfig::DEDUP,
715        );
716
717        // Dedup should have merged the two tails.
718        assert_eq!(bytecode.redirects.len(), 1);
719
720        // The canonical block's ADD must not carry a stale const_output
721        // (it would be 0xAB from the first context, but the second expects 0xBC).
722        let canonical_start = *bytecode.redirects.values().next().unwrap();
723        let add_inst = canonical_start + 2; // JUMPDEST, PUSH1, ADD
724        assert!(
725            bytecode.const_output(add_inst).is_none(),
726            "canonical ADD should have no const_output after dedup (was stale)",
727        );
728    }
729}
revmc_codegen/bytecode/passes/dedup.rs

revmc_codegen/bytecode/passes/
dedup.rs