Skip to main content

revmc_codegen/bytecode/
opcode.rs

1use crate::{OpcodeInfo, op_info_map};
2use revm_bytecode::{opcode as op, opcode::OPCODE_INFO};
3use revm_primitives::hardfork::SpecId;
4use std::{fmt, slice};
5
6/// A bytecode iterator that yields opcodes and their immediate data, alongside the program counter.
7///
8/// Created by calling [`OpcodesIter::with_pc`].
9#[derive(Debug)]
10pub struct OpcodesIterWithPc<'a> {
11    iter: OpcodesIter<'a>,
12    pc: usize,
13}
14
15impl OpcodesIterWithPc<'_> {
16    /// Rewinds the iterator by `n` bytes so that the next call to [`Iterator::next`] re-yields
17    /// them as fresh opcodes.
18    ///
19    /// # Safety
20    ///
21    /// `n` must not exceed the number of bytes already consumed.
22    pub(crate) unsafe fn rewind(&mut self, n: usize) {
23        // SAFETY: the caller guarantees n ≤ bytes consumed, so `remaining` was originally
24        // part of a larger contiguous slice that started at least `n` bytes earlier.
25        unsafe { self.iter.rewind(n) };
26        self.pc -= n;
27    }
28}
29
30impl<'a> Iterator for OpcodesIterWithPc<'a> {
31    type Item = (usize, Opcode<'a>);
32
33    #[inline]
34    fn next(&mut self) -> Option<Self::Item> {
35        self.iter.next().map(|opcode| {
36            let pc = self.pc;
37            self.pc += 1;
38            if let Some(imm) = opcode.immediate {
39                self.pc += imm.len();
40            }
41            (pc, opcode)
42        })
43    }
44
45    #[inline]
46    fn size_hint(&self) -> (usize, Option<usize>) {
47        self.iter.size_hint()
48    }
49}
50
51impl std::iter::FusedIterator for OpcodesIterWithPc<'_> {}
52
53/// An iterator that yields opcodes and their immediate data.
54///
55/// If the bytecode is not well-formed, the iterator will still yield opcodes, but the immediate
56/// data may be incorrect. For example, if the bytecode is `PUSH2 0x69`, the iterator will yield
57/// `PUSH2, None`.
58#[derive(Clone, Debug)]
59pub struct OpcodesIter<'a> {
60    iter: slice::Iter<'a, u8>,
61    info: &'static [OpcodeInfo; 256],
62}
63
64impl fmt::Display for OpcodesIter<'_> {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        for (i, op) in self.clone().enumerate() {
67            if i > 0 {
68                f.write_str(" ")?;
69            }
70            write!(f, "{op}")?;
71        }
72        Ok(())
73    }
74}
75
76impl<'a> OpcodesIter<'a> {
77    /// Create a new iterator over the given bytecode slice.
78    #[inline]
79    pub fn new(slice: &'a [u8], spec_id: SpecId) -> Self {
80        Self { iter: slice.iter(), info: op_info_map(spec_id) }
81    }
82
83    /// Returns a new iterator that also yields the program counter alongside the opcode and
84    /// immediate data.
85    #[inline]
86    pub fn with_pc(self) -> OpcodesIterWithPc<'a> {
87        OpcodesIterWithPc { iter: self, pc: 0 }
88    }
89
90    /// Returns the inner iterator.
91    #[inline]
92    pub fn inner(&self) -> &slice::Iter<'a, u8> {
93        &self.iter
94    }
95
96    /// Returns the inner iterator.
97    #[inline]
98    pub fn inner_mut(&mut self) -> &mut slice::Iter<'a, u8> {
99        &mut self.iter
100    }
101
102    /// Returns the inner iterator.
103    #[inline]
104    pub fn into_inner(self) -> slice::Iter<'a, u8> {
105        self.iter
106    }
107
108    /// Rewinds the iterator by `n` bytes so that the next call to [`Iterator::next`] re-yields
109    /// them as fresh opcodes.
110    ///
111    /// # Safety
112    ///
113    /// `n` must not exceed the number of bytes already consumed.
114    pub(crate) unsafe fn rewind(&mut self, n: usize) {
115        let inner = self.inner_mut();
116        let remaining = inner.as_slice();
117        // SAFETY: the caller guarantees n ≤ bytes consumed, so `remaining` was originally
118        // part of a larger contiguous slice that started at least `n` bytes earlier.
119        let rewound =
120            unsafe { std::slice::from_raw_parts(remaining.as_ptr().sub(n), remaining.len() + n) };
121        *inner = rewound.iter();
122    }
123}
124
125impl<'a> Iterator for OpcodesIter<'a> {
126    type Item = Opcode<'a>;
127
128    #[inline]
129    fn next(&mut self) -> Option<Self::Item> {
130        self.iter.next().map(|&opcode| {
131            let info = self.info[opcode as usize];
132            if info.is_unknown() || info.is_disabled() {
133                return Opcode { opcode, immediate: None };
134            }
135
136            let len = min_imm_len(opcode) as usize;
137            let immediate = if len > 0 {
138                let r = self.iter.as_slice().get(..len);
139                // TODO: Use `advance_by` when stable.
140                self.iter.by_ref().take(len).for_each(drop);
141                r
142            } else {
143                None
144            };
145            Opcode { opcode, immediate }
146        })
147    }
148
149    #[inline]
150    fn size_hint(&self) -> (usize, Option<usize>) {
151        let len = self.iter.len();
152        ((len != 0) as usize, Some(len))
153    }
154}
155
156impl std::iter::FusedIterator for OpcodesIter<'_> {}
157
158/// An opcode and its immediate data. Returned by [`OpcodesIter`].
159#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
160pub struct Opcode<'a> {
161    /// The opcode.
162    pub opcode: u8,
163    /// The immediate data, if any.
164    pub immediate: Option<&'a [u8]>,
165}
166
167impl fmt::Debug for Opcode<'_> {
168    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
169        fmt::Display::fmt(self, f)
170    }
171}
172
173impl fmt::Display for Opcode<'_> {
174    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
175        match OPCODE_INFO[self.opcode as usize] {
176            Some(s) => f.write_str(s.name()),
177            None => write!(f, "UNKNOWN(0x{:02x})", self.opcode),
178        }?;
179        match self.immediate {
180            Some(imm) => write!(f, " {}", revm_primitives::hex::encode_prefixed(imm)),
181            None => Ok(()),
182        }
183    }
184}
185
186/// Returns the length of the immediate data for the given opcode, or `0` if none.
187#[inline]
188pub const fn min_imm_len(op: u8) -> u8 {
189    if let Some(info) = &OPCODE_INFO[op as usize] { info.immediate_size() } else { 0 }
190}
191
192/// Returns the number of input and output stack elements of the given opcode.
193#[inline]
194pub const fn stack_io(op: u8) -> (u8, u8) {
195    if let Some(info) = &OPCODE_INFO[op as usize] {
196        (info.inputs(), info.outputs())
197    } else {
198        (0, 0)
199    }
200}
201
202/// Returns the real `(inputs, outputs)` stack I/O for an opcode, decoding the immediate for
203/// `DUPN`, `SWAPN`, and `EXCHANGE` whose opcode-table entries are placeholders.
204pub(crate) fn compute_stack_io(op: u8, immediate: Option<&[u8]>) -> (u8, u8) {
205    let imm_u8 = || immediate.map_or(0, |b| b[0]);
206    match op {
207        op::DUPN => decode_single(imm_u8()).map(|n| (n, n + 1)),
208        op::SWAPN => decode_single(imm_u8()).map(|n| (n + 1, n + 1)),
209        op::EXCHANGE => decode_pair(imm_u8()).map(|(_n, m)| (m + 1, m + 1)),
210        _ => return stack_io(op),
211    }
212    .unwrap_or((0, 0)) // Invalid immediate.
213}
214
215/// Decodes a DUPN/SWAPN immediate byte into a stack index.
216///
217/// Returns `None` if the immediate is in the invalid range `[91, 127]`.
218pub fn decode_single(x: u8) -> Option<u8> {
219    if x <= 90 || x >= 128 { Some(x.wrapping_add(145)) } else { None }
220}
221
222/// Encodes a stack index into a DUPN/SWAPN immediate byte.
223///
224/// Returns `None` if the value is outside the valid range.
225pub fn encode_single(n: u8) -> Option<u8> {
226    let x = n.wrapping_sub(145);
227    if decode_single(x) == Some(n) { Some(x) } else { None }
228}
229
230/// Decodes an EXCHANGE immediate byte into a pair of stack indices `(n, m)`.
231///
232/// Returns `None` if the value is outside the valid range.
233pub fn decode_pair(x: u8) -> Option<(u8, u8)> {
234    if x > 81 && x < 128 {
235        return None;
236    }
237    let k = (x ^ 143) as u16;
238    let q = (k / 16) as u8;
239    let r = (k % 16) as u8;
240    if q < r { Some((q + 1, r + 1)) } else { Some((r + 1, 29 - q)) }
241}
242
243/// Encodes a pair of stack indices into an EXCHANGE immediate byte.
244///
245/// Returns `None` if the pair cannot be encoded.
246pub fn encode_pair(n: u8, m: u8) -> Option<u8> {
247    if n == 0 || m == 0 {
248        return None;
249    }
250    // Try both (q,r) orderings and check round-trip.
251    let try_k = |q: u8, r: u8| -> Option<u8> {
252        if q >= 16 || r >= 16 {
253            return None;
254        }
255        let k = q as u16 * 16 + r as u16;
256        let x = (k as u8) ^ 143;
257        if decode_pair(x) == Some((n, m)) { Some(x) } else { None }
258    };
259
260    // Case 1: q < r → n = q+1, m = r+1.
261    if n < m {
262        try_k(n - 1, m - 1)
263    } else {
264        // Case 2: q >= r → n = r+1, m = 29-q.
265        if let Some(q) = 29u8.checked_sub(m) { try_k(q, n - 1) } else { None }
266    }
267}
268
269/// Returns a string representation of the given bytecode.
270pub fn format_bytecode(bytecode: &[u8], spec_id: SpecId) -> String {
271    let mut w = String::new();
272    format_bytecode_to(bytecode, spec_id, &mut w).unwrap();
273    w
274}
275
276/// Formats an EVM bytecode to the given writer.
277pub fn format_bytecode_to<W: fmt::Write + ?Sized>(
278    bytecode: &[u8],
279    spec_id: SpecId,
280    w: &mut W,
281) -> fmt::Result {
282    write!(w, "{}", OpcodesIter::new(bytecode, spec_id))
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288    use revm_bytecode::opcode as op;
289
290    const DEF_SPEC: SpecId = SpecId::ARROW_GLACIER;
291
292    #[test]
293    fn iter_basic() {
294        let bytecode = [0x01, 0x02, 0x03, 0x04, 0x05];
295        let mut iter = OpcodesIter::new(&bytecode, DEF_SPEC);
296
297        assert_eq!(iter.next(), Some(Opcode { opcode: 0x01, immediate: None }));
298        assert_eq!(iter.next(), Some(Opcode { opcode: 0x02, immediate: None }));
299        assert_eq!(iter.next(), Some(Opcode { opcode: 0x03, immediate: None }));
300        assert_eq!(iter.next(), Some(Opcode { opcode: 0x04, immediate: None }));
301        assert_eq!(iter.next(), Some(Opcode { opcode: 0x05, immediate: None }));
302        assert_eq!(iter.next(), None);
303    }
304
305    #[test]
306    fn iter_with_imm() {
307        let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
308        let mut iter = OpcodesIter::new(&bytecode, DEF_SPEC);
309
310        assert_eq!(iter.next(), Some(Opcode { opcode: op::PUSH0, immediate: None }));
311        assert_eq!(iter.next(), Some(Opcode { opcode: op::PUSH1, immediate: Some(&[0x69]) }));
312        assert_eq!(iter.next(), Some(Opcode { opcode: op::PUSH2, immediate: Some(&[0x01, 0x02]) }));
313        assert_eq!(iter.next(), None);
314    }
315
316    #[test]
317    fn iter_with_imm_too_short() {
318        let bytecode = [op::PUSH2, 0x69];
319        let mut iter = OpcodesIter::new(&bytecode, DEF_SPEC);
320
321        assert_eq!(iter.next(), Some(Opcode { opcode: op::PUSH2, immediate: None }));
322        assert_eq!(iter.next(), None);
323    }
324
325    #[test]
326    fn display() {
327        let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
328        let s = format_bytecode(&bytecode, DEF_SPEC);
329        assert_eq!(s, "PUSH0 PUSH1 0x69 PUSH2 0x0102");
330    }
331
332    #[test]
333    fn encode_decode_single_roundtrip() {
334        for x in 0..=255u8 {
335            if let Some(n) = decode_single(x) {
336                assert_eq!(encode_single(n), Some(x), "roundtrip failed for x={x}, n={n}");
337            }
338        }
339        for n in 0..=255u8 {
340            if let Some(x) = encode_single(n) {
341                assert_eq!(decode_single(x), Some(n), "roundtrip failed for n={n}, x={x}");
342            }
343        }
344        // Out-of-range: values in [236,255] ∪ [0,16] have no valid encoding.
345        assert_eq!(encode_single(0), None);
346        assert_eq!(encode_single(16), None);
347        assert_eq!(encode_single(236), None);
348        // Invalid raw bytes.
349        assert_eq!(decode_single(91), None);
350        assert_eq!(decode_single(127), None);
351    }
352
353    #[test]
354    fn encode_decode_pair_roundtrip() {
355        // encode → decode is always exact.
356        for n in 1..=30u8 {
357            for m in 1..=30u8 {
358                if let Some(x) = encode_pair(n, m) {
359                    assert_eq!(
360                        decode_pair(x),
361                        Some((n, m)),
362                        "roundtrip failed for (n,m)=({n},{m}), x={x}"
363                    );
364                }
365            }
366        }
367        // decode → encode: some pairs have multiple encodings, so the raw byte may differ,
368        // but decoding the result must produce the same pair.
369        // Some decoded pairs may have values too large to re-encode.
370        for x in 0..=255u8 {
371            if let Some((n, m)) = decode_pair(x)
372                && let Some(re) = encode_pair(n, m)
373            {
374                assert_eq!(
375                    decode_pair(re),
376                    Some((n, m)),
377                    "decode roundtrip failed for x={x}, re={re}, (n,m)=({n},{m})"
378                );
379            }
380        }
381        // Zero indices are invalid.
382        assert_eq!(encode_pair(0, 1), None);
383        assert_eq!(encode_pair(1, 0), None);
384        // Invalid raw bytes: [82, 127].
385        assert_eq!(decode_pair(82), None);
386        assert_eq!(decode_pair(127), None);
387        // Edge: 80 and 81 are now valid.
388        assert!(decode_pair(80).is_some());
389        assert!(decode_pair(81).is_some());
390    }
391}