revmc_codegen/bytecode/passes/dedup.rs
1//! Block deduplication pass.
2//!
3//! Identifies structurally identical non-fallthrough blocks (same opcode + immediate sequence)
4//! and eliminates duplicates by marking them as dead code and redirecting predecessors to a
5//! single canonical copy.
6
7use super::block_analysis::{Block, BlockData, Snapshots};
8use crate::bytecode::{Bytecode, Inst, InstData, InstFlags};
9use alloy_primitives::map::HashMap;
10use oxc_index::IndexVec;
11use revm_bytecode::opcode as op;
12use smallvec::SmallVec;
13
14/// Dedup key: instruction-level structural fingerprint, CFG successor blocks, and terminator
15/// jump kind.
16///
17/// The fingerprint encodes each instruction's opcode plus its (interned) immediate so that
18/// PUSH operations with the same value, and DUPN/SWAPN/EXCHANGE with the same operand, hash
19/// equal across blocks. Raw bytes are intentionally not consulted — all immediate data is
20/// pre-interned at parse time on `InstData`.
21///
22/// Structural equality alone is insufficient for JUMP-terminated blocks because block
23/// analysis may resolve identical copies to different static targets depending on incoming
24/// stack context (e.g. different return-address values), so successors are part of the key.
25///
26/// The `is_multi_jump` discriminator prevents MULTI_JUMP dispatcher blocks from colliding
27/// with STATIC_JUMP blocks that happen to share the same fingerprint and successor set.
28#[derive(Clone, PartialEq, Eq, Hash)]
29struct DedupKey {
30 /// Instruction fingerprint: per-instruction `(opcode, immediate)` tuples encoded as bytes.
31 fingerprint: SmallVec<[u8; 32]>,
32 /// CFG successors for this block. Two structurally identical blocks are only merged when
33 /// their successor sets also match.
34 succs: SmallVec<[Block; 4]>,
35 /// Whether the block's terminator is a MULTI_JUMP.
36 is_multi_jump: bool,
37}
38
39impl<'a> Bytecode<'a> {
40 /// Deduplicate structurally identical non-fallthrough blocks.
41 ///
42 /// Eligible blocks are those whose terminator cannot fall through (diverging instructions
43 /// like REVERT/STOP/RETURN, or unconditional JUMP). For every group of byte-identical
44 /// blocks, keeps one canonical copy while marking them as dead code.
45 /// A redirect entry is stored in [`Bytecode::redirects`] so the translator can map
46 /// dead instructions to the canonical block's IR block. The CFG is rebuilt after each
47 /// iteration so successor/predecessor lists stay consistent.
48 ///
49 /// `local_snapshots` are the block-local snapshots computed by `block_analysis_local`
50 /// (before the global fixpoint). After merging, the canonical block's snapshots are
51 /// restored to these local values because the global snapshots may be context-specific
52 /// and stale once multiple predecessors are merged.
53 #[instrument(name = "dedup", level = "debug", skip_all)]
54 pub(crate) fn dedup_blocks(&mut self, local_snapshots: &Snapshots) {
55 let mut key_to_blocks = HashMap::<DedupKey, SmallVec<[Block; 4]>>::default();
56 let mut total_deduped = 0usize;
57 loop {
58 key_to_blocks.clear();
59 let deduped = self.dedup_blocks_once(&mut key_to_blocks, local_snapshots);
60 total_deduped += deduped;
61 if deduped == 0 {
62 break;
63 }
64 // Compress redirect chains so that earlier redirects (e.g. t2 -> t1) are
65 // updated when their target gets deduped in a later round (t1 -> t0).
66 // Without this, rebuild_cfg resolves edges only one hop, leaving stale
67 // intermediate targets in DedupKey.succs (preventing valid merges) and
68 // in translate.rs inst_entries (causing InvalidJump on valid bytecode).
69 for inst in self.redirects.keys().copied().collect::<Vec<_>>() {
70 let mut target = self.redirects[&inst];
71 let original = target;
72 while let Some(&next) = self.redirects.get(&target) {
73 target = next;
74 }
75 if target != original {
76 self.redirects.insert(inst, target);
77 }
78 }
79 self.rebuild_cfg();
80 }
81 debug!(deduped = total_deduped, "finished");
82 }
83
84 /// Single dedup iteration. Returns the number of blocks deduped.
85 fn dedup_blocks_once(
86 &mut self,
87 key_to_blocks: &mut HashMap<DedupKey, SmallVec<[Block; 4]>>,
88 local_snapshots: &Snapshots,
89 ) -> usize {
90 for bid in self.cfg.blocks.indices() {
91 let block = &self.cfg.blocks[bid];
92 let term = &self.insts[block.terminator()];
93
94 // Only dedup blocks whose terminator cannot fall through (diverging or
95 // unconditional JUMP). JUMPI blocks fall through to position-dependent targets.
96 if term.can_fall_through() {
97 continue;
98 }
99
100 // Reachable JUMPDESTs with dynamic jumps cannot be deduped because any
101 // JUMPDEST could be reached from an unresolved dynamic JUMP with an
102 // arbitrary stack context.
103 let first = &self.insts[block.insts.start];
104 if self.has_dynamic_jumps && first.is_reachable_jumpdest(true) {
105 continue;
106 }
107
108 // Skip blocks containing PC — it returns the current program counter,
109 // so identical bytecode at different positions produces different results.
110 if block.insts().any(|i| self.insts[i].opcode == op::PC) {
111 continue;
112 }
113
114 // Skip unresolved dynamic JUMPs — any target is possible at runtime.
115 if term.opcode == op::JUMP && !term.flags.contains(InstFlags::STATIC_JUMP) {
116 continue;
117 }
118
119 let fingerprint = block_fingerprint(&self.insts, block);
120 if fingerprint.is_empty() {
121 continue;
122 }
123
124 let is_multi_jump = term.flags.contains(InstFlags::MULTI_JUMP);
125 key_to_blocks
126 .entry(DedupKey { fingerprint, succs: block.succs.clone(), is_multi_jump })
127 .or_default()
128 .push(bid);
129 }
130
131 let mut deduped = 0usize;
132 for group in key_to_blocks.values() {
133 if group.len() < 2 {
134 continue;
135 }
136 let (&canonical, dups) = group.split_first().unwrap();
137 let canonical_first_inst = self.cfg.blocks[canonical].insts.start;
138
139 // Restore block-local snapshots for the canonical block. The global
140 // fixpoint may have recorded context-specific constants that become stale
141 // once the canonical block serves multiple predecessors after merging.
142 // Block-local constants (computed without incoming stack state) remain valid.
143 self.snapshots.restore_from(self.cfg.blocks[canonical].insts(), local_snapshots);
144
145 for &dup in dups {
146 deduped += 1;
147 trace!("deduped: {from} -> {to}", from = dup, to = canonical);
148
149 // Mark all instructions in the duplicate block as dead.
150 let dup_block = &self.cfg.blocks[dup];
151 for i in dup_block.insts() {
152 self.insts[i].flags |= InstFlags::DEAD_CODE;
153 }
154
155 let dup_first_inst = dup_block.insts.start;
156 self.redirects.insert(dup_first_inst, canonical_first_inst);
157
158 // If the duplicate was a reachable JUMPDEST, the canonical must be too
159 // so that sections/gas-checks treat it as a jump target entry point.
160 if self.insts[dup_first_inst].is_jumpdest()
161 && self.insts[dup_first_inst].is_jumpdest_reachable()
162 && self.insts[canonical_first_inst].is_jumpdest()
163 {
164 self.insts[canonical_first_inst].set_jumpdest_reachable();
165 }
166
167 // Merge multi-jump targets from the duplicate into the canonical block.
168 // Without this, valid case PCs unique to the duplicate dispatcher would
169 // be lost and fall into the default InvalidJump path at runtime.
170 let canonical_term = self.cfg.blocks[canonical].terminator();
171 let dup_term = dup_block.terminator();
172 if self.insts[dup_term].flags.contains(InstFlags::MULTI_JUMP)
173 && let Some(dup_targets) = self.multi_jump_targets.remove(&dup_term)
174 {
175 let canonical_targets =
176 self.multi_jump_targets.get_mut(&canonical_term).unwrap();
177 for t in dup_targets {
178 if !canonical_targets.contains(&t) {
179 canonical_targets.push(t);
180 }
181 }
182 }
183
184 // Redirect predecessors that reach the duplicate via static jumps.
185 for &pred in &dup_block.preds {
186 let term_inst = self.cfg.blocks[pred].terminator();
187 let term = &self.insts[term_inst];
188 let is_static = term.is_static_jump()
189 && !term.flags.contains(InstFlags::INVALID_JUMP)
190 && !term.flags.contains(InstFlags::MULTI_JUMP)
191 && term.static_jump_target() == dup_first_inst;
192 if is_static {
193 self.insts[term_inst].set_static_jump_target(canonical_first_inst);
194 }
195 // Multi-jump target PCs are not rewritten here: each carries a
196 // distinct PC that callers push as a return address. The targets
197 // were already merged into the canonical block above, and
198 // `inst_entries` redirects (translate.rs) map the dead
199 // instruction's IR entry to the canonical one.
200 }
201 }
202 }
203
204 deduped
205 }
206}
207
208/// Builds a structural fingerprint of a block from instruction data, without consulting the
209/// raw bytecode.
210///
211/// For each non-dead instruction in the block, encodes the opcode followed by the immediate
212/// payload (interned U256Idx for `PUSH*`, immediate byte for `DUPN`/`SWAPN`/`EXCHANGE`).
213/// JUMP/JUMPI carry no immediate and so contribute only their opcode; jump targets are
214/// already factored into the surrounding `DedupKey` via the block's CFG successors.
215fn block_fingerprint(insts: &IndexVec<Inst, InstData>, block: &BlockData) -> SmallVec<[u8; 32]> {
216 let mut buf: SmallVec<[u8; 32]> = SmallVec::new();
217 for i in block.insts() {
218 let data = &insts[i];
219 buf.push(data.opcode);
220 if data.imm_len() > 0 {
221 let mut imm = &data.data.to_ne_bytes()[..];
222 while let [0, rest @ ..] = imm {
223 imm = rest;
224 }
225 buf.extend_from_slice(imm);
226 }
227 }
228 buf
229}
230
231#[cfg(test)]
232mod tests {
233 use crate::bytecode::{
234 AnalysisConfig, InstFlags, passes::block_analysis::tests::analyze_asm_with,
235 };
236
237 #[test]
238 fn dedup_identical_revert_blocks() {
239 // Two JUMPI branches each fall through to an identical `PUSH1 0x00 / DUP1 / REVERT`.
240 let bytecode = analyze_asm_with(
241 "
242 PUSH0
243 CALLDATALOAD
244 PUSH %bb2
245 JUMPI
246 ; revert A
247 PUSH1 0x00
248 DUP1
249 REVERT
250 bb2:
251 JUMPDEST
252 PUSH0
253 CALLDATALOAD
254 PUSH %bb4
255 JUMPI
256 ; revert B (identical to A)
257 PUSH1 0x00
258 DUP1
259 REVERT
260 bb4:
261 JUMPDEST
262 STOP
263 ",
264 AnalysisConfig::DEDUP,
265 );
266
267 // One of the two `PUSH1 0x00 / DUP1 / REVERT` blocks should be redirected.
268 assert_eq!(
269 bytecode.redirects.len(),
270 1,
271 "expected exactly 1 redirect for 2 identical revert blocks",
272 );
273 }
274
275 #[test]
276 fn dedup_preserves_unique_blocks() {
277 // Two different diverging blocks — they should NOT be deduplicated.
278 let bytecode = analyze_asm_with(
279 "
280 PUSH0
281 CALLDATALOAD
282 PUSH %target
283 JUMPI
284 ; revert with 0
285 PUSH1 0x00
286 DUP1
287 REVERT
288 target:
289 JUMPDEST
290 STOP
291 ",
292 AnalysisConfig::DEDUP,
293 );
294
295 assert!(bytecode.redirects.is_empty(), "should not dedup different blocks");
296 }
297
298 #[test]
299 fn dedup_three_identical_blocks() {
300 // Three identical `PUSH1 0x00 / DUP1 / REVERT` blocks.
301 let bytecode = analyze_asm_with(
302 "
303 PUSH0
304 CALLDATALOAD
305 PUSH %bb2
306 JUMPI
307 ; revert A
308 PUSH1 0x00
309 DUP1
310 REVERT
311 bb2:
312 JUMPDEST
313 PUSH0
314 CALLDATALOAD
315 PUSH %bb4
316 JUMPI
317 ; revert B
318 PUSH1 0x00
319 DUP1
320 REVERT
321 bb4:
322 JUMPDEST
323 PUSH0
324 CALLDATALOAD
325 PUSH %bb6
326 JUMPI
327 ; revert C
328 PUSH1 0x00
329 DUP1
330 REVERT
331 bb6:
332 JUMPDEST
333 STOP
334 ",
335 AnalysisConfig::DEDUP,
336 );
337
338 let redirect_count = bytecode.redirects.len();
339 assert_eq!(redirect_count, 2, "expected 2 redirects, got {redirect_count}");
340 }
341
342 #[test]
343 fn dedup_skips_pc_opcode() {
344 // Two identical `PC / PUSH1 0x00 / REVERT` blocks — PC is position-dependent,
345 // so they must NOT be deduplicated.
346 let bytecode = analyze_asm_with(
347 "
348 PUSH0
349 CALLDATALOAD
350 PUSH %bb2
351 JUMPI
352 ; PC + revert A
353 PC
354 PUSH1 0x00
355 REVERT
356 bb2:
357 JUMPDEST
358 PUSH0
359 CALLDATALOAD
360 PUSH %bb4
361 JUMPI
362 ; PC + revert B (same bytes, different PC value)
363 PC
364 PUSH1 0x00
365 REVERT
366 bb4:
367 JUMPDEST
368 STOP
369 ",
370 AnalysisConfig::DEDUP,
371 );
372
373 assert!(bytecode.redirects.is_empty(), "should not dedup blocks containing PC");
374 }
375
376 #[test]
377 fn dedup_jump_same_target() {
378 // Two byte-identical fallthrough JUMP tails resolved to the same target.
379 let bytecode = analyze_asm_with(
380 "
381 CALLDATASIZE
382 PUSH %path1
383 JUMPI
384 PUSH0
385 PUSH1 0xFF
386 JUMPI
387 PUSH %target
388 JUMP
389 path1:
390 JUMPDEST
391 PUSH0
392 PUSH1 0xFF
393 JUMPI
394 PUSH %target
395 JUMP
396 target:
397 JUMPDEST
398 STOP
399 ",
400 AnalysisConfig::DEDUP,
401 );
402 assert_eq!(bytecode.redirects.len(), 1, "same-target JUMP tails should be deduped");
403 }
404
405 #[test]
406 fn dedup_jump_different_targets() {
407 // Two byte-identical non-JUMPDEST JUMP tails with different resolved static targets.
408 // Must NOT be merged — the JUMP target is context-sensitive.
409 // Regression test for:
410 // revmc-dedup-non-jumpdest-fallthrough-jump-tail-static-target-confusion
411 let bytecode = analyze_asm_with(
412 "
413 CALLDATASIZE
414 PUSH %path1
415 JUMPI
416
417 PUSH %ret0
418 PUSH0
419 PUSH1 0xFF
420 JUMPI
421 PUSH1 0x42
422 SWAP1
423 JUMP
424
425 path1:
426 JUMPDEST
427 PUSH %ret1
428 PUSH0
429 PUSH1 0xFF
430 JUMPI
431 PUSH1 0x42
432 SWAP1
433 JUMP
434
435 ret0:
436 JUMPDEST
437 POP
438 PUSH1 0xAA
439 STOP
440
441 ret1:
442 JUMPDEST
443 POP
444 PUSH1 0xBB
445 STOP
446 ",
447 AnalysisConfig::DEDUP,
448 );
449
450 assert!(
451 bytecode.redirects.is_empty(),
452 "different-target JUMP tails must not be deduped, got {} redirect(s)",
453 bytecode.redirects.len(),
454 );
455 }
456
457 #[test]
458 fn dedup_weth() {
459 let code =
460 revm_primitives::hex::decode(include_str!("../../../../../data/weth.rt.hex").trim())
461 .unwrap();
462 let mut bytecode = crate::Bytecode::test(code);
463 bytecode.config = AnalysisConfig::DEDUP;
464 bytecode.analyze().unwrap();
465
466 assert_eq!(bytecode.redirects.len(), 13);
467 }
468
469 #[test]
470 fn dedup_skips_dynamic_jump_sstore_tail() {
471 // Two non-JUMPDEST byte-identical `SSTORE ; JUMP` tails with unresolved dynamic
472 // JUMP terminators must NOT be deduped.
473 let bytecode = analyze_asm_with(
474 "
475 PUSH0
476 CALLDATALOAD
477 PUSH1 0x20
478 CALLDATALOAD
479 PUSH %path_b
480 JUMPI
481
482 ; path A
483 PUSH1 0x11
484 PUSH1 0x01
485 PUSH0
486 PUSH %not_taken_a
487 JUMPI
488 SSTORE
489 JUMP
490
491 not_taken_a:
492 JUMPDEST
493 INVALID
494
495 path_b:
496 JUMPDEST
497 PUSH1 0x22
498 PUSH1 0x02
499 PUSH0
500 PUSH %not_taken_b
501 JUMPI
502 SSTORE
503 JUMP
504
505 not_taken_b:
506 JUMPDEST
507 INVALID
508
509 done:
510 JUMPDEST
511 STOP
512 ",
513 AnalysisConfig::DEDUP,
514 );
515
516 assert!(
517 bytecode.redirects.is_empty(),
518 "should not dedup blocks ending in unresolved dynamic JUMP",
519 );
520 }
521
522 #[test]
523 fn dedup_multi_jump_source_merges_targets() {
524 // Two byte-identical MULTI_JUMP dispatcher blocks (dispatcher_a, dispatcher_b)
525 // each route to two identical JUMPDEST+STOP return blocks. After the return
526 // blocks are deduped, the two dispatchers become identical in (bytes, succs).
527 // Dedup must merge their multi_jump_targets so the canonical switch emits cases
528 // for ALL valid return PCs.
529 let bytecode = analyze_asm_with(
530 "
531 ; Branch on first calldata word.
532 PUSH0
533 CALLDATALOAD
534 PUSH %path_b
535 JUMPI
536
537 ; Branch on second calldata word.
538 PUSH1 0x20
539 CALLDATALOAD
540 PUSH %path_a1
541 JUMPI
542 PUSH %ret0
543 PUSH %dispatcher_a
544 JUMP
545 path_a1:
546 JUMPDEST
547 PUSH %ret1
548 PUSH %dispatcher_a
549 JUMP
550
551 path_b:
552 JUMPDEST
553 PUSH1 0x20
554 CALLDATALOAD
555 PUSH %path_b1
556 JUMPI
557 PUSH %ret2
558 PUSH %dispatcher_b
559 JUMP
560 path_b1:
561 JUMPDEST
562 PUSH %ret3
563 PUSH %dispatcher_b
564 JUMP
565
566 ret0:
567 JUMPDEST
568 STOP
569 ret1:
570 JUMPDEST
571 STOP
572 ret2:
573 JUMPDEST
574 STOP
575 ret3:
576 JUMPDEST
577 STOP
578
579 dispatcher_a:
580 JUMPDEST
581 JUMP
582 dispatcher_b:
583 JUMPDEST
584 JUMP
585 ",
586 AnalysisConfig::DEDUP,
587 );
588
589 // The four STOP blocks are deduped (3 redirects), and one dispatcher is deduped
590 // (1 redirect) = 4 total.
591 assert_eq!(bytecode.redirects.len(), 4);
592
593 // Find the canonical dispatcher's multi_jump_targets — it must contain all 4
594 // original return PCs (possibly redirected to the canonical STOP inst).
595 let canonical_dispatcher_term = bytecode
596 .multi_jump_targets
597 .keys()
598 .find(|&&inst| !bytecode.insts[inst].is_dead_code())
599 .expect("should have a live MULTI_JUMP dispatcher");
600 let targets = &bytecode.multi_jump_targets[canonical_dispatcher_term];
601 assert_eq!(
602 targets.len(),
603 4,
604 "canonical dispatcher should have 4 multi-jump targets (merged), got {targets:?}",
605 );
606 }
607
608 #[test]
609 fn dedup_clears_stale_noop_flags() {
610 // Regression test for stale NOOP flags surviving dedup.
611 //
612 // Two byte-identical RETURN tails reached via different paths:
613 // - Path 0 pushes known constants (0x11, 0x22) → DSE can mark ADD as NOOP because the
614 // output is a known constant (0x33).
615 // - Path 1 pushes CALLDATASIZE twice → ADD output is dynamic.
616 //
617 // After dedup merges the tails, the canonical block's snapshots are restored
618 // to local (which lack the incoming constants), but DSE re-runs and must NOT
619 // mark ADD as NOOP since its output is no longer a known constant.
620 let bytecode = analyze_asm_with(
621 "
622 CALLDATASIZE
623 PUSH %path1
624 JUMPI
625
626 ; Path 0: known constants.
627 PUSH1 0x11
628 PUSH1 0x22
629 PUSH %tail0
630 JUMP
631
632 path1:
633 JUMPDEST
634 ; Path 1: dynamic values.
635 CALLDATASIZE
636 CALLDATASIZE
637 PUSH %tail1
638 JUMP
639
640 tail0:
641 JUMPDEST
642 ADD
643 PUSH0
644 MSTORE
645 PUSH1 0x20
646 PUSH0
647 RETURN
648
649 tail1:
650 JUMPDEST
651 ADD
652 PUSH0
653 MSTORE
654 PUSH1 0x20
655 PUSH0
656 RETURN
657 ",
658 AnalysisConfig::DEDUP,
659 );
660
661 // Dedup should have merged the two tails.
662 assert_eq!(bytecode.redirects.len(), 1);
663
664 // The canonical block's ADD must NOT be marked NOOP — its output is dynamic
665 // under the merged (local) snapshots.
666 let canonical_start = *bytecode.redirects.values().next().unwrap();
667 let add_inst = canonical_start + 1; // JUMPDEST, ADD
668 assert!(
669 !bytecode.insts[add_inst].flags.contains(InstFlags::NOOP),
670 "canonical ADD must not be NOOP after dedup (stale flag from context-specific DSE)",
671 );
672 }
673
674 #[test]
675 fn dedup_invalidates_stale_const_snapshots() {
676 // Two byte-identical RETURN tails reached with different incoming constants.
677 // After dedup the canonical block must NOT retain stale const snapshots.
678 let bytecode = analyze_asm_with(
679 "
680 CALLDATASIZE
681 PUSH %path1
682 JUMPI
683
684 PUSH1 0xAA
685 PUSH %tail0
686 JUMP
687
688 path1:
689 JUMPDEST
690 PUSH1 0xBB
691 PUSH %tail1
692 JUMP
693
694 tail0:
695 JUMPDEST
696 PUSH1 0x01
697 ADD
698 PUSH0
699 MSTORE
700 PUSH1 0x20
701 PUSH0
702 RETURN
703
704 tail1:
705 JUMPDEST
706 PUSH1 0x01
707 ADD
708 PUSH0
709 MSTORE
710 PUSH1 0x20
711 PUSH0
712 RETURN
713 ",
714 AnalysisConfig::DEDUP,
715 );
716
717 // Dedup should have merged the two tails.
718 assert_eq!(bytecode.redirects.len(), 1);
719
720 // The canonical block's ADD must not carry a stale const_output
721 // (it would be 0xAB from the first context, but the second expects 0xBC).
722 let canonical_start = *bytecode.redirects.values().next().unwrap();
723 let add_inst = canonical_start + 2; // JUMPDEST, PUSH1, ADD
724 assert!(
725 bytecode.const_output(add_inst).is_none(),
726 "canonical ADD should have no const_output after dedup (was stale)",
727 );
728 }
729}