diff --git a/hashes/zkevm-keccak/src/keccak_circuit.rs b/hashes/zkevm-keccak/src/keccak_circuit.rs
new file mode 100644
index 00000000..524e8bf9
--- /dev/null
+++ b/hashes/zkevm-keccak/src/keccak_circuit.rs
@@ -0,0 +1,858 @@
+mod cell_manager;
+pub mod keccak_packed_multi;
+mod keccak_table;
+mod param;
+mod table;
+#[cfg(test)]
+mod test;
+pub mod util;
+
+use keccak_table::KeccakTable;
+use std::marker::PhantomData;
+
+use self::{cell_manager::*, keccak_packed_multi::*, param::*, table::*};
+use param::{
+    CHI_BASE_LOOKUP_TABLE, NUM_BYTES_PER_WORD, NUM_ROUNDS, NUM_WORDS_TO_ABSORB,
+    NUM_WORDS_TO_SQUEEZE, RHO_MATRIX,
+};
+use util::{
+    constraint_builder::BaseConstraintBuilder,
+    eth_types::Field,
+    expression::{and, not, select, Expr},
+    get_absorb_positions, get_num_bits_per_lookup, rotate, scatter, target_part_sizes,
+};
+
+use crate::halo2_proofs::{
+    circuit::{Layouter, Region, Value},
+    plonk::{
+        Advice, Challenge, Column, ConstraintSystem, Error, Expression, Fixed, TableColumn,
+        VirtualCells,
+    },
+    poly::Rotation,
+};
+use halo2_base::halo2_proofs::{circuit::AssignedCell, plonk::Assigned};
+use itertools::Itertools;
+use log::info;
+
+#[cfg(feature = "halo2-axiom")]
+type KeccakAssignedValue<'v, F> = AssignedCell<&'v Assigned<F>, F>;
+#[cfg(not(feature = "halo2-axiom"))]
+type KeccakAssignedValue<'v, F> = AssignedCell<F, F>;
+
+pub fn assign_advice_custom<'v, F: Field>(
+    region: &mut Region<F>,
+    column: Column<Advice>,
+    offset: usize,
+    value: Value<F>,
+) -> KeccakAssignedValue<'v, F> {
+    #[cfg(feature = "halo2-axiom")]
+    {
+        region.assign_advice(column, offset, value)
+    }
+    #[cfg(feature = "halo2-pse")]
+    {
+        region
+            .assign_advice(|| format!("assign advice {}", offset), column, offset, || value)
+            .unwrap()
+    }
+}
+
+pub fn assign_fixed_custom<F: Field>(
+    region: &mut Region<F>,
+    column: Column<Fixed>,
+    offset: usize,
+    value: F,
+) {
+    #[cfg(feature = "halo2-axiom")]
+    {
+        region.assign_fixed(column, offset, value);
+    }
+    #[cfg(feature = "halo2-pse")]
+    {
+        region
+            .assign_fixed(
+                || format!("assign fixed {}", offset),
+                column,
+                offset,
+                || Value::known(value),
+            )
+            .unwrap();
+    }
+}
+
+/// KeccakConfig
+#[derive(Clone, Debug)]
+pub struct KeccakCircuitConfig<F> {
+    challenge: Challenge,
+    q_enable: Column<Fixed>,
+    // q_enable_row: Column<Fixed>,
+    q_first: Column<Fixed>,
+    q_round: Column<Fixed>,
+    q_absorb: Column<Fixed>,
+    q_round_last: Column<Fixed>,
+    q_padding: Column<Fixed>,
+    q_padding_last: Column<Fixed>,
+
+    pub keccak_table: KeccakTable,
+
+    cell_manager: CellManager<F>,
+    round_cst: Column<Fixed>,
+    normalize_3: [TableColumn; 2],
+    normalize_4: [TableColumn; 2],
+    normalize_6: [TableColumn; 2],
+    chi_base_table: [TableColumn; 2],
+    pack_table: [TableColumn; 2],
+    _marker: PhantomData<F>,
+}
+
+impl<F: Field> KeccakCircuitConfig<F> {
+    pub fn challenge(&self) -> Challenge {
+        self.challenge
+    }
+    /// Return a new KeccakCircuitConfig
+    pub fn new(meta: &mut ConstraintSystem<F>, challenge: Challenge) -> Self {
+        let q_enable = meta.fixed_column();
+        // let q_enable_row = meta.fixed_column();
+        let q_first = meta.fixed_column();
+        let q_round = meta.fixed_column();
+        let q_absorb = meta.fixed_column();
+        let q_round_last = meta.fixed_column();
+        let q_padding = meta.fixed_column();
+        let q_padding_last = meta.fixed_column();
+        let round_cst = meta.fixed_column();
+        let keccak_table = KeccakTable::construct(meta);
+
+        let is_final = keccak_table.is_enabled;
+        // let length = keccak_table.input_len;
+        let data_rlc = keccak_table.input_rlc;
+        let hash_rlc = keccak_table.output_rlc;
+
+        let normalize_3 = array_init::array_init(|_| meta.lookup_table_column());
+        let normalize_4 = array_init::array_init(|_| meta.lookup_table_column());
+        let normalize_6 = array_init::array_init(|_| meta.lookup_table_column());
+        let chi_base_table = array_init::array_init(|_| meta.lookup_table_column());
+        let pack_table = array_init::array_init(|_| meta.lookup_table_column());
+
+        let num_rows_per_round = get_num_rows_per_round();
+        let mut cell_manager = CellManager::new(get_num_rows_per_round());
+        let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+        let mut total_lookup_counter = 0;
+
+        let start_new_hash = |meta: &mut VirtualCells<F>, rot| {
+            // A new hash is started when the previous hash is done or on the first row
+            meta.query_fixed(q_first, rot) + meta.query_advice(is_final, rot)
+        };
+
+        // Round constant
+        let mut round_cst_expr = 0.expr();
+        meta.create_gate("Query round cst", |meta| {
+            round_cst_expr = meta.query_fixed(round_cst, Rotation::cur());
+            vec![0u64.expr()]
+        });
+        // State data
+        let mut s = vec![vec![0u64.expr(); 5]; 5];
+        let mut s_next = vec![vec![0u64.expr(); 5]; 5];
+        for i in 0..5 {
+            for j in 0..5 {
+                let cell = cell_manager.query_cell(meta);
+                s[i][j] = cell.expr();
+                s_next[i][j] = cell.at_offset(meta, num_rows_per_round as i32).expr();
+            }
+        }
+        // Absorb data
+        let absorb_from = cell_manager.query_cell(meta);
+        let absorb_data = cell_manager.query_cell(meta);
+        let absorb_result = cell_manager.query_cell(meta);
+        let mut absorb_from_next = vec![0u64.expr(); NUM_WORDS_TO_ABSORB];
+        let mut absorb_data_next = vec![0u64.expr(); NUM_WORDS_TO_ABSORB];
+        let mut absorb_result_next = vec![0u64.expr(); NUM_WORDS_TO_ABSORB];
+        for i in 0..NUM_WORDS_TO_ABSORB {
+            let rot = ((i + 1) * num_rows_per_round) as i32;
+            absorb_from_next[i] = absorb_from.at_offset(meta, rot).expr();
+            absorb_data_next[i] = absorb_data.at_offset(meta, rot).expr();
+            absorb_result_next[i] = absorb_result.at_offset(meta, rot).expr();
+        }
+
+        // Store the pre-state
+        let pre_s = s.clone();
+
+        // Absorb
+        // The absorption happening at the start of the 24 rounds is done spread out
+        // over those 24 rounds. In a single round (in 17 of the 24 rounds) a
+        // single word is absorbed so the work is spread out. The absorption is
+        // done simply by doing state + data and then normalizing the result to [0,1].
+        // We also need to convert the input data into bytes to calculate the input data
+        // rlc.
+        cell_manager.start_region();
+        let mut lookup_counter = 0;
+        let part_size = get_num_bits_per_absorb_lookup();
+        let input = absorb_from.expr() + absorb_data.expr();
+        let absorb_fat =
+            split::expr(meta, &mut cell_manager, &mut cb, input, 0, part_size, false, None);
+        cell_manager.start_region();
+        let absorb_res = transform::expr(
+            "absorb",
+            meta,
+            &mut cell_manager,
+            &mut lookup_counter,
+            absorb_fat,
+            normalize_3,
+            true,
+        );
+        cb.require_equal("absorb result", decode::expr(absorb_res), absorb_result.expr());
+        info!("- Post absorb:");
+        info!("Lookups: {}", lookup_counter);
+        info!("Columns: {}", cell_manager.get_width());
+        total_lookup_counter += lookup_counter;
+
+        // Squeeze
+        // The squeezing happening at the end of the 24 rounds is done spread out
+        // over those 24 rounds. In a single round (in 4 of the 24 rounds) a
+        // single word is converted to bytes.
+        cell_manager.start_region();
+        let mut lookup_counter = 0;
+        // Potential optimization: could do multiple bytes per lookup
+        let packed_parts =
+            split::expr(meta, &mut cell_manager, &mut cb, absorb_data.expr(), 0, 8, false, None);
+        cell_manager.start_region();
+        // input_bytes.len() = packed_parts.len() = 64 / 8 = 8 = NUM_BYTES_PER_WORD
+        let input_bytes = transform::expr(
+            "squeeze unpack",
+            meta,
+            &mut cell_manager,
+            &mut lookup_counter,
+            packed_parts,
+            pack_table.into_iter().rev().collect::<Vec<_>>().try_into().unwrap(),
+            true,
+        );
+        debug_assert_eq!(input_bytes.len(), NUM_BYTES_PER_WORD);
+
+        // Padding data
+        cell_manager.start_region();
+        let is_paddings = input_bytes.iter().map(|_| cell_manager.query_cell(meta)).collect_vec();
+        info!("- Post padding:");
+        info!("Lookups: {}", lookup_counter);
+        info!("Columns: {}", cell_manager.get_width());
+        total_lookup_counter += lookup_counter;
+
+        // Theta
+        // Calculate
+        // - `c[i] = s[i][0] + s[i][1] + s[i][2] + s[i][3] + s[i][4]`
+        // - `bc[i] = normalize(c)`.
+        // - `t[i] = bc[(i + 4) % 5] + rot(bc[(i + 1)% 5], 1)`
+        // This is done by splitting the bc values in parts in a way
+        // that allows us to also calculate the rotated value "for free".
+        cell_manager.start_region();
+        let mut lookup_counter = 0;
+        let part_size_c = get_num_bits_per_theta_c_lookup();
+        let mut c_parts = Vec::new();
+        for s in s.iter() {
+            // Calculate c and split into parts
+            let c = s[0].clone() + s[1].clone() + s[2].clone() + s[3].clone() + s[4].clone();
+            c_parts.push(split::expr(
+                meta,
+                &mut cell_manager,
+                &mut cb,
+                c,
+                1,
+                part_size_c,
+                false,
+                None,
+            ));
+        }
+        // Now calculate `bc` by normalizing `c`
+        cell_manager.start_region();
+        let mut bc = Vec::new();
+        for c in c_parts {
+            // Normalize c
+            bc.push(transform::expr(
+                "theta c",
+                meta,
+                &mut cell_manager,
+                &mut lookup_counter,
+                c,
+                normalize_6,
+                true,
+            ));
+        }
+        // Now do `bc[(i + 4) % 5] + rot(bc[(i + 1) % 5], 1)` using just expressions.
+        // We don't normalize the result here. We do it as part of the rho/pi step, even
+        // though we would only have to normalize 5 values instead of 25, because of the
+        // way the rho/pi and chi steps can be combined it's more efficient to
+        // do it there (the max value for chi is 4 already so that's the
+        // limiting factor).
+        let mut os = vec![vec![0u64.expr(); 5]; 5];
+        for i in 0..5 {
+            let t = decode::expr(bc[(i + 4) % 5].clone())
+                + decode::expr(rotate(bc[(i + 1) % 5].clone(), 1, part_size_c));
+            for j in 0..5 {
+                os[i][j] = s[i][j].clone() + t.clone();
+            }
+        }
+        s = os.clone();
+        info!("- Post theta:");
+        info!("Lookups: {}", lookup_counter);
+        info!("Columns: {}", cell_manager.get_width());
+        total_lookup_counter += lookup_counter;
+
+        // Rho/Pi
+        // For the rotation of rho/pi we split up the words like expected, but in a way
+        // that allows reusing the same parts in an optimal way for the chi step.
+        // We can save quite a few columns by not recombining the parts after rho/pi and
+        // re-splitting the words again before chi. Instead we do chi directly
+        // on the output parts of rho/pi. For rho/pi specically we do
+        // `s[j][2 * i + 3 * j) % 5] = normalize(rot(s[i][j], RHOM[i][j]))`.
+        cell_manager.start_region();
+        let mut lookup_counter = 0;
+        let part_size = get_num_bits_per_base_chi_lookup();
+        // To combine the rho/pi/chi steps we have to ensure a specific layout so
+        // query those cells here first.
+        // For chi we have to do `s[i][j] ^ ((~s[(i+1)%5][j]) & s[(i+2)%5][j])`. `j`
+        // remains static but `i` is accessed in a wrap around manner. To do this using
+        // multiple rows with lookups in a way that doesn't require any
+        // extra additional cells or selectors we have to put all `s[i]`'s on the same
+        // row. This isn't that strong of a requirement actually because we the
+        // words are split into multipe parts, and so only the parts at the same
+        // position of those words need to be on the same row.
+        let target_word_sizes = target_part_sizes(part_size);
+        let num_word_parts = target_word_sizes.len();
+        let mut rho_pi_chi_cells: [[[Vec<Cell<F>>; 5]; 5]; 3] = array_init::array_init(|_| {
+            array_init::array_init(|_| array_init::array_init(|_| Vec::new()))
+        });
+        let mut num_columns = 0;
+        let mut column_starts = [0usize; 3];
+        for p in 0..3 {
+            column_starts[p] = cell_manager.start_region();
+            let mut row_idx = 0;
+            num_columns = 0;
+            for j in 0..5 {
+                for _ in 0..num_word_parts {
+                    for i in 0..5 {
+                        rho_pi_chi_cells[p][i][j]
+                            .push(cell_manager.query_cell_at_row(meta, row_idx));
+                    }
+                    if row_idx == 0 {
+                        num_columns += 1;
+                    }
+                    row_idx = (((row_idx as usize) + 1) % num_rows_per_round) as i32;
+                }
+            }
+        }
+        // Do the transformation, resulting in the word parts also being normalized.
+        let pi_region_start = cell_manager.start_region();
+        let mut os_parts = vec![vec![Vec::new(); 5]; 5];
+        for (j, os_part) in os_parts.iter_mut().enumerate() {
+            for i in 0..5 {
+                // Split s into parts
+                let s_parts = split_uniform::expr(
+                    meta,
+                    &rho_pi_chi_cells[0][j][(2 * i + 3 * j) % 5],
+                    &mut cell_manager,
+                    &mut cb,
+                    s[i][j].clone(),
+                    RHO_MATRIX[i][j],
+                    part_size,
+                    true,
+                );
+                // Normalize the data to the target cells
+                let s_parts = transform_to::expr(
+                    "rho/pi",
+                    meta,
+                    &rho_pi_chi_cells[1][j][(2 * i + 3 * j) % 5],
+                    &mut lookup_counter,
+                    s_parts.clone(),
+                    normalize_4,
+                    true,
+                );
+                os_part[(2 * i + 3 * j) % 5] = s_parts.clone();
+            }
+        }
+        let pi_region_end = cell_manager.start_region();
+        // Pi parts range checks
+        // To make the uniform stuff work we had to combine some parts together
+        // in new cells (see split_uniform). Here we make sure those parts are range
+        // checked. Potential improvement: Could combine multiple smaller parts
+        // in a single lookup but doesn't save that much.
+        for c in pi_region_start..pi_region_end {
+            meta.lookup("pi part range check", |_| {
+                vec![(cell_manager.columns()[c].expr.clone(), normalize_4[0])]
+            });
+            lookup_counter += 1;
+        }
+        info!("- Post rho/pi:");
+        info!("Lookups: {}", lookup_counter);
+        info!("Columns: {}", cell_manager.get_width());
+        total_lookup_counter += lookup_counter;
+
+        // Chi
+        // In groups of 5 columns, we have to do `s[i][j] ^ ((~s[(i+1)%5][j]) &
+        // s[(i+2)%5][j])` five times, on each row (no selector needed).
+        // This is calculated by making use of `CHI_BASE_LOOKUP_TABLE`.
+        let mut lookup_counter = 0;
+        let part_size_base = get_num_bits_per_base_chi_lookup();
+        for idx in 0..num_columns {
+            // First fetch the cells we wan to use
+            let mut input: [Expression<F>; 5] = array_init::array_init(|_| 0.expr());
+            let mut output: [Expression<F>; 5] = array_init::array_init(|_| 0.expr());
+            for c in 0..5 {
+                input[c] = cell_manager.columns()[column_starts[1] + idx * 5 + c].expr.clone();
+                output[c] = cell_manager.columns()[column_starts[2] + idx * 5 + c].expr.clone();
+            }
+            // Now calculate `a ^ ((~b) & c)` by doing `lookup[3 - 2*a + b - c]`
+            for i in 0..5 {
+                let input = scatter::expr(3, part_size_base) - 2.expr() * input[i].clone()
+                    + input[(i + 1) % 5].clone()
+                    - input[(i + 2) % 5].clone().clone();
+                let output = output[i].clone();
+                meta.lookup("chi base", |_| {
+                    vec![(input.clone(), chi_base_table[0]), (output.clone(), chi_base_table[1])]
+                });
+                lookup_counter += 1;
+            }
+        }
+        // Now just decode the parts after the chi transformation done with the lookups
+        // above.
+        let mut os = vec![vec![0u64.expr(); 5]; 5];
+        for (i, os) in os.iter_mut().enumerate() {
+            for (j, os) in os.iter_mut().enumerate() {
+                let mut parts = Vec::new();
+                for idx in 0..num_word_parts {
+                    parts.push(Part {
+                        num_bits: part_size_base,
+                        cell: rho_pi_chi_cells[2][i][j][idx].clone(),
+                        expr: rho_pi_chi_cells[2][i][j][idx].expr(),
+                    });
+                }
+                *os = decode::expr(parts);
+            }
+        }
+        s = os.clone();
+
+        // iota
+        // Simply do the single xor on state [0][0].
+        cell_manager.start_region();
+        let part_size = get_num_bits_per_absorb_lookup();
+        let input = s[0][0].clone() + round_cst_expr.clone();
+        let iota_parts =
+            split::expr(meta, &mut cell_manager, &mut cb, input, 0, part_size, false, None);
+        cell_manager.start_region();
+        // Could share columns with absorb which may end up using 1 lookup/column
+        // fewer...
+        s[0][0] = decode::expr(transform::expr(
+            "iota",
+            meta,
+            &mut cell_manager,
+            &mut lookup_counter,
+            iota_parts,
+            normalize_3,
+            true,
+        ));
+        // Final results stored in the next row
+        for i in 0..5 {
+            for j in 0..5 {
+                cb.require_equal("next row check", s[i][j].clone(), s_next[i][j].clone());
+            }
+        }
+        info!("- Post chi:");
+        info!("Lookups: {}", lookup_counter);
+        info!("Columns: {}", cell_manager.get_width());
+        total_lookup_counter += lookup_counter;
+
+        let mut lookup_counter = 0;
+        cell_manager.start_region();
+
+        // Squeeze data
+        let squeeze_from = cell_manager.query_cell(meta);
+        let mut squeeze_from_prev = vec![0u64.expr(); NUM_WORDS_TO_SQUEEZE];
+        for (idx, squeeze_from_prev) in squeeze_from_prev.iter_mut().enumerate() {
+            let rot = (-(idx as i32) - 1) * num_rows_per_round as i32;
+            *squeeze_from_prev = squeeze_from.at_offset(meta, rot).expr();
+        }
+        // Squeeze
+        // The squeeze happening at the end of the 24 rounds is done spread out
+        // over those 24 rounds. In a single round (in 4 of the 24 rounds) a
+        // single word is converted to bytes.
+        // Potential optimization: could do multiple bytes per lookup
+        cell_manager.start_region();
+        // Unpack a single word into bytes (for the squeeze)
+        // Potential optimization: could do multiple bytes per lookup
+        let squeeze_from_parts =
+            split::expr(meta, &mut cell_manager, &mut cb, squeeze_from.expr(), 0, 8, false, None);
+        cell_manager.start_region();
+        let squeeze_bytes = transform::expr(
+            "squeeze unpack",
+            meta,
+            &mut cell_manager,
+            &mut lookup_counter,
+            squeeze_from_parts,
+            pack_table.into_iter().rev().collect::<Vec<_>>().try_into().unwrap(),
+            true,
+        );
+        info!("- Post squeeze:");
+        info!("Lookups: {}", lookup_counter);
+        info!("Columns: {}", cell_manager.get_width());
+        total_lookup_counter += lookup_counter;
+
+        // The round constraints that we've been building up till now
+        meta.create_gate("round", |meta| cb.gate(meta.query_fixed(q_round, Rotation::cur())));
+
+        // Absorb
+        meta.create_gate("absorb", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+            let continue_hash = not::expr(start_new_hash(meta, Rotation::cur()));
+            let absorb_positions = get_absorb_positions();
+            let mut a_slice = 0;
+            for j in 0..5 {
+                for i in 0..5 {
+                    if absorb_positions.contains(&(i, j)) {
+                        cb.condition(continue_hash.clone(), |cb| {
+                            cb.require_equal(
+                                "absorb verify input",
+                                absorb_from_next[a_slice].clone(),
+                                pre_s[i][j].clone(),
+                            );
+                        });
+                        cb.require_equal(
+                            "absorb result copy",
+                            select::expr(
+                                continue_hash.clone(),
+                                absorb_result_next[a_slice].clone(),
+                                absorb_data_next[a_slice].clone(),
+                            ),
+                            s_next[i][j].clone(),
+                        );
+                        a_slice += 1;
+                    } else {
+                        cb.require_equal(
+                            "absorb state copy",
+                            pre_s[i][j].clone() * continue_hash.clone(),
+                            s_next[i][j].clone(),
+                        );
+                    }
+                }
+            }
+            cb.gate(meta.query_fixed(q_absorb, Rotation::cur()))
+        });
+
+        // Collect the bytes that are spread out over previous rows
+        let mut hash_bytes = Vec::new();
+        for i in 0..NUM_WORDS_TO_SQUEEZE {
+            for byte in squeeze_bytes.iter() {
+                let rot = (-(i as i32) - 1) * num_rows_per_round as i32;
+                hash_bytes.push(byte.cell.at_offset(meta, rot).expr());
+            }
+        }
+
+        // Squeeze
+        meta.create_gate("squeeze", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+            let start_new_hash = start_new_hash(meta, Rotation::cur());
+            // The words to squeeze
+            let hash_words: Vec<_> =
+                pre_s.into_iter().take(4).map(|a| a[0].clone()).take(4).collect();
+            // Verify if we converted the correct words to bytes on previous rows
+            for (idx, word) in hash_words.iter().enumerate() {
+                cb.condition(start_new_hash.clone(), |cb| {
+                    cb.require_equal(
+                        "squeeze verify packed",
+                        word.clone(),
+                        squeeze_from_prev[idx].clone(),
+                    );
+                });
+            }
+
+            let challenge_expr = meta.query_challenge(challenge);
+            let rlc =
+                hash_bytes.into_iter().reduce(|rlc, x| rlc * challenge_expr.clone() + x).unwrap();
+            cb.require_equal("hash rlc check", rlc, meta.query_advice(hash_rlc, Rotation::cur()));
+            cb.gate(meta.query_fixed(q_round_last, Rotation::cur()))
+        });
+
+        // Some general input checks
+        meta.create_gate("input checks", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+            cb.require_boolean("boolean is_final", meta.query_advice(is_final, Rotation::cur()));
+            cb.gate(meta.query_fixed(q_enable, Rotation::cur()))
+        });
+
+        // Enforce fixed values on the first row
+        meta.create_gate("first row", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+            cb.require_zero(
+                "is_final needs to be disabled on the first row",
+                meta.query_advice(is_final, Rotation::cur()),
+            );
+            cb.gate(meta.query_fixed(q_first, Rotation::cur()))
+        });
+
+        // Enforce logic for when this block is the last block for a hash
+        let last_is_padding_in_block = is_paddings.last().unwrap().at_offset(
+            meta,
+            -(((NUM_ROUNDS + 1 - NUM_WORDS_TO_ABSORB) * num_rows_per_round) as i32),
+        );
+        meta.create_gate("is final", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+            // All absorb rows except the first row
+            cb.condition(
+                meta.query_fixed(q_absorb, Rotation::cur())
+                    - meta.query_fixed(q_first, Rotation::cur()),
+                |cb| {
+                    cb.require_equal(
+                        "is_final needs to be the same as the last is_padding in the block",
+                        meta.query_advice(is_final, Rotation::cur()),
+                        last_is_padding_in_block.expr(),
+                    );
+                },
+            );
+            // For all the rows of a round, only the first row can have `is_final == 1`.
+            cb.condition(
+                (1..num_rows_per_round as i32)
+                    .map(|i| meta.query_fixed(q_enable, Rotation(-i)))
+                    .fold(0.expr(), |acc, elem| acc + elem),
+                |cb| {
+                    cb.require_zero(
+                        "is_final only when q_enable",
+                        meta.query_advice(is_final, Rotation::cur()),
+                    );
+                },
+            );
+            cb.gate(1.expr())
+        });
+
+        // Padding
+        // May be cleaner to do this padding logic in the byte conversion lookup but
+        // currently easier to do it like this.
+        let prev_is_padding =
+            is_paddings.last().unwrap().at_offset(meta, -(num_rows_per_round as i32));
+        meta.create_gate("padding", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+            let q_padding = meta.query_fixed(q_padding, Rotation::cur());
+            let q_padding_last = meta.query_fixed(q_padding_last, Rotation::cur());
+
+            // All padding selectors need to be boolean
+            for is_padding in is_paddings.iter() {
+                cb.condition(meta.query_fixed(q_enable, Rotation::cur()), |cb| {
+                    cb.require_boolean("is_padding boolean", is_padding.expr());
+                });
+            }
+            // This last padding selector will be used on the first round row so needs to be
+            // zero
+            cb.condition(meta.query_fixed(q_absorb, Rotation::cur()), |cb| {
+                cb.require_zero(
+                    "last is_padding should be zero on absorb rows",
+                    is_paddings.last().unwrap().expr(),
+                );
+            });
+            // Now for each padding selector
+            for idx in 0..is_paddings.len() {
+                // Previous padding selector can be on the previous row
+                let is_padding_prev =
+                    if idx == 0 { prev_is_padding.expr() } else { is_paddings[idx - 1].expr() };
+                let is_first_padding = is_paddings[idx].expr() - is_padding_prev.clone();
+
+                // Check padding transition 0 -> 1 done only once
+                cb.condition(q_padding.expr(), |cb| {
+                    cb.require_boolean("padding step boolean", is_first_padding.clone());
+                });
+
+                // Padding start/intermediate/end byte checks
+                if idx == is_paddings.len() - 1 {
+                    // These can be combined in the future, but currently this would increase the
+                    // degree by one Padding start/intermediate byte, all
+                    // padding rows except the last one
+                    cb.condition(
+                        and::expr([
+                            q_padding.expr() - q_padding_last.expr(),
+                            is_paddings[idx].expr(),
+                        ]),
+                        |cb| {
+                            // Input bytes need to be zero, or one if this is the first padding byte
+                            cb.require_equal(
+                                "padding start/intermediate byte last byte",
+                                input_bytes[idx].expr.clone(),
+                                is_first_padding.expr(),
+                            );
+                        },
+                    );
+                    // Padding start/end byte, only on the last padding row
+                    cb.condition(
+                        and::expr([q_padding_last.expr(), is_paddings[idx].expr()]),
+                        |cb| {
+                            // The input byte needs to be 128, unless it's also the first padding
+                            // byte then it's 129
+                            cb.require_equal(
+                                "padding start/end byte",
+                                input_bytes[idx].expr.clone(),
+                                is_first_padding.expr() + 128.expr(),
+                            );
+                        },
+                    );
+                } else {
+                    // Padding start/intermediate byte
+                    cb.condition(and::expr([q_padding.expr(), is_paddings[idx].expr()]), |cb| {
+                        // Input bytes need to be zero, or one if this is the first padding byte
+                        cb.require_equal(
+                            "padding start/intermediate byte",
+                            input_bytes[idx].expr.clone(),
+                            is_first_padding.expr(),
+                        );
+                    });
+                }
+            }
+            cb.gate(1.expr())
+        });
+
+        assert!(num_rows_per_round > NUM_BYTES_PER_WORD, "We require enough rows per round to hold the running RLC of the bytes from the one keccak word absorbed per round");
+        // TODO: there is probably a way to only require NUM_BYTES_PER_WORD instead of
+        // NUM_BYTES_PER_WORD + 1 rows per round, but for simplicity and to keep the
+        // gate degree at 3, we just do the obvious thing for now Input data rlc
+        meta.create_gate("data rlc", |meta| {
+            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
+
+            let q_padding = meta.query_fixed(q_padding, Rotation::cur());
+            let start_new_hash_prev = start_new_hash(meta, Rotation(-(num_rows_per_round as i32)));
+            let data_rlc_prev = meta.query_advice(data_rlc, Rotation(-(num_rows_per_round as i32)));
+
+            // Update the length/data_rlc on rows where we absorb data
+            cb.condition(q_padding.expr(), |cb| {
+                let challenge_expr = meta.query_challenge(challenge);
+                // Use intermediate cells to keep the degree low
+                let mut new_data_rlc =
+                    data_rlc_prev.clone() * not::expr(start_new_hash_prev.expr());
+                let mut data_rlcs = (0..NUM_BYTES_PER_WORD)
+                    .map(|i| meta.query_advice(data_rlc, Rotation(i as i32 + 1)));
+                let intermed_rlc = data_rlcs.next().unwrap();
+                cb.require_equal("initial data rlc", intermed_rlc.clone(), new_data_rlc);
+                new_data_rlc = intermed_rlc;
+                for (byte, is_padding) in input_bytes.iter().zip(is_paddings.iter()) {
+                    new_data_rlc = select::expr(
+                        is_padding.expr(),
+                        new_data_rlc.clone(),
+                        new_data_rlc * challenge_expr.clone() + byte.expr.clone(),
+                    );
+                    if let Some(intermed_rlc) = data_rlcs.next() {
+                        cb.require_equal(
+                            "intermediate data rlc",
+                            intermed_rlc.clone(),
+                            new_data_rlc,
+                        );
+                        new_data_rlc = intermed_rlc;
+                    }
+                }
+                cb.require_equal(
+                    "update data rlc",
+                    meta.query_advice(data_rlc, Rotation::cur()),
+                    new_data_rlc,
+                );
+            });
+            // Keep length/data_rlc the same on rows where we don't absorb data
+            cb.condition(
+                and::expr([
+                    meta.query_fixed(q_enable, Rotation::cur())
+                        - meta.query_fixed(q_first, Rotation::cur()),
+                    not::expr(q_padding),
+                ]),
+                |cb| {
+                    cb.require_equal(
+                        "data_rlc equality check",
+                        meta.query_advice(data_rlc, Rotation::cur()),
+                        data_rlc_prev.clone(),
+                    );
+                },
+            );
+            cb.gate(1.expr())
+        });
+
+        info!("Degree: {}", meta.degree());
+        info!("Minimum rows: {}", meta.minimum_rows());
+        info!("Total Lookups: {}", total_lookup_counter);
+        #[cfg(feature = "display")]
+        {
+            println!("Total Keccak Columns: {}", cell_manager.get_width());
+            std::env::set_var("KECCAK_ADVICE_COLUMNS", cell_manager.get_width().to_string());
+        }
+        #[cfg(not(feature = "display"))]
+        info!("Total Keccak Columns: {}", cell_manager.get_width());
+        info!("num unused cells: {}", cell_manager.get_num_unused_cells());
+        info!("part_size absorb: {}", get_num_bits_per_absorb_lookup());
+        info!("part_size theta: {}", get_num_bits_per_theta_c_lookup());
+        info!("part_size theta c: {}", get_num_bits_per_lookup(THETA_C_LOOKUP_RANGE));
+        info!("part_size theta t: {}", get_num_bits_per_lookup(4));
+        info!("part_size rho/pi: {}", get_num_bits_per_rho_pi_lookup());
+        info!("part_size chi base: {}", get_num_bits_per_base_chi_lookup());
+        info!("uniform part sizes: {:?}", target_part_sizes(get_num_bits_per_theta_c_lookup()));
+
+        KeccakCircuitConfig {
+            challenge,
+            q_enable,
+            // q_enable_row,
+            q_first,
+            q_round,
+            q_absorb,
+            q_round_last,
+            q_padding,
+            q_padding_last,
+            keccak_table,
+            cell_manager,
+            round_cst,
+            normalize_3,
+            normalize_4,
+            normalize_6,
+            chi_base_table,
+            pack_table,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<F: Field> KeccakCircuitConfig<F> {
+    pub fn assign(&self, region: &mut Region<'_, F>, witness: &[KeccakRow<F>]) {
+        for (offset, keccak_row) in witness.iter().enumerate() {
+            self.set_row(region, offset, keccak_row);
+        }
+    }
+
+    pub fn set_row(&self, region: &mut Region<'_, F>, offset: usize, row: &KeccakRow<F>) {
+        // Fixed selectors
+        for (_, column, value) in &[
+            ("q_enable", self.q_enable, F::from(row.q_enable)),
+            ("q_first", self.q_first, F::from(offset == 0)),
+            ("q_round", self.q_round, F::from(row.q_round)),
+            ("q_round_last", self.q_round_last, F::from(row.q_round_last)),
+            ("q_absorb", self.q_absorb, F::from(row.q_absorb)),
+            ("q_padding", self.q_padding, F::from(row.q_padding)),
+            ("q_padding_last", self.q_padding_last, F::from(row.q_padding_last)),
+        ] {
+            assign_fixed_custom(region, *column, offset, *value);
+        }
+
+        assign_advice_custom(
+            region,
+            self.keccak_table.is_enabled,
+            offset,
+            Value::known(F::from(row.is_final)),
+        );
+
+        // Cell values
+        row.cell_values.iter().zip(self.cell_manager.columns()).for_each(|(bit, column)| {
+            assign_advice_custom(region, column.advice, offset, Value::known(*bit));
+        });
+
+        // Round constant
+        assign_fixed_custom(region, self.round_cst, offset, row.round_cst);
+    }
+
+    pub fn load_aux_tables(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
+        load_normalize_table(layouter, "normalize_6", &self.normalize_6, 6u64)?;
+        load_normalize_table(layouter, "normalize_4", &self.normalize_4, 4u64)?;
+        load_normalize_table(layouter, "normalize_3", &self.normalize_3, 3u64)?;
+        load_lookup_table(
+            layouter,
+            "chi base",
+            &self.chi_base_table,
+            get_num_bits_per_base_chi_lookup(),
+            &CHI_BASE_LOOKUP_TABLE,
+        )?;
+        load_pack_table(layouter, &self.pack_table)
+    }
+}
diff --git a/hashes/zkevm-keccak/src/keccak_circuit/cell_manager.rs b/hashes/zkevm-keccak/src/keccak_circuit/cell_manager.rs
new file mode 100644
index 00000000..efc60257
--- /dev/null
+++ b/hashes/zkevm-keccak/src/keccak_circuit/cell_manager.rs
@@ -0,0 +1,200 @@
+use super::util::expression::Expr;
+use crate::halo2_proofs::{
+    arithmetic::FieldExt,
+    plonk::{Advice, Column, ConstraintSystem, Expression, VirtualCells},
+    poly::Rotation,
+};
+use crate::keccak_circuit::KeccakRegion;
+
+#[derive(Clone, Debug)]
+pub(crate) struct Cell<F> {
+    pub(crate) expression: Expression<F>,
+    pub(crate) column_expression: Expression<F>,
+    pub(crate) column: Option<Column<Advice>>,
+    pub(crate) column_idx: usize,
+    pub(crate) rotation: i32,
+}
+
+impl<F: FieldExt> Cell<F> {
+    pub(crate) fn new(
+        meta: &mut VirtualCells<F>,
+        column: Column<Advice>,
+        column_idx: usize,
+        rotation: i32,
+    ) -> Self {
+        Self {
+            expression: meta.query_advice(column, Rotation(rotation)),
+            column_expression: meta.query_advice(column, Rotation::cur()),
+            column: Some(column),
+            column_idx,
+            rotation,
+        }
+    }
+
+    pub(crate) fn new_value(column_idx: usize, rotation: i32) -> Self {
+        Self {
+            expression: 0.expr(),
+            column_expression: 0.expr(),
+            column: None,
+            column_idx,
+            rotation,
+        }
+    }
+
+    pub(crate) fn at_offset(&self, meta: &mut ConstraintSystem<F>, offset: i32) -> Self {
+        let mut expression = 0.expr();
+        meta.create_gate("Query cell", |meta| {
+            expression = meta.query_advice(self.column.unwrap(), Rotation(self.rotation + offset));
+            vec![0.expr()]
+        });
+
+        Self {
+            expression,
+            column_expression: self.column_expression.clone(),
+            column: self.column,
+            column_idx: self.column_idx,
+            rotation: self.rotation + offset,
+        }
+    }
+
+    pub(crate) fn assign(&self, region: &mut KeccakRegion<F>, offset: i32, value: F) {
+        region.assign(self.column_idx, (offset + self.rotation) as usize, value);
+    }
+}
+
+impl<F: FieldExt> Expr<F> for Cell<F> {
+    fn expr(&self) -> Expression<F> {
+        self.expression.clone()
+    }
+}
+
+impl<F: FieldExt> Expr<F> for &Cell<F> {
+    fn expr(&self) -> Expression<F> {
+        self.expression.clone()
+    }
+}
+
+/// CellColumn
+#[derive(Clone, Debug)]
+pub(crate) struct CellColumn<F> {
+    pub(crate) advice: Column<Advice>,
+    pub(crate) expr: Expression<F>,
+}
+
+/// CellManager
+#[derive(Clone, Debug)]
+pub(crate) struct CellManager<F> {
+    height: usize,
+    width: usize,
+    current_row: usize,
+    columns: Vec<CellColumn<F>>,
+    // rows[i] gives the number of columns already used in row `i`
+    rows: Vec<usize>,
+    num_unused_cells: usize,
+}
+
+impl<F: FieldExt> CellManager<F> {
+    pub(crate) fn new(height: usize) -> Self {
+        Self {
+            height,
+            width: 0,
+            current_row: 0,
+            columns: Vec::new(),
+            rows: vec![0; height],
+            num_unused_cells: 0,
+        }
+    }
+
+    pub(crate) fn query_cell(&mut self, meta: &mut ConstraintSystem<F>) -> Cell<F> {
+        let (row_idx, column_idx) = self.get_position();
+        self.query_cell_at_pos(meta, row_idx as i32, column_idx)
+    }
+
+    pub(crate) fn query_cell_at_row(
+        &mut self,
+        meta: &mut ConstraintSystem<F>,
+        row_idx: i32,
+    ) -> Cell<F> {
+        let column_idx = self.rows[row_idx as usize];
+        self.rows[row_idx as usize] += 1;
+        self.width = self.width.max(column_idx + 1);
+        self.current_row = (row_idx as usize + 1) % self.height;
+        self.query_cell_at_pos(meta, row_idx, column_idx)
+    }
+
+    pub(crate) fn query_cell_value(&mut self) -> Cell<F> {
+        let (row_idx, column_idx) = self.get_position();
+        self.query_cell_value_at_pos(row_idx as i32, column_idx)
+    }
+
+    pub(crate) fn query_cell_value_at_row(&mut self, row_idx: i32) -> Cell<F> {
+        let column_idx = self.rows[row_idx as usize];
+        self.rows[row_idx as usize] += 1;
+        self.width = self.width.max(column_idx + 1);
+        self.current_row = (row_idx as usize + 1) % self.height;
+        self.query_cell_value_at_pos(row_idx, column_idx)
+    }
+
+    pub(crate) fn start_region(&mut self) -> usize {
+        // Make sure all rows start at the same column
+        let width = self.get_width();
+        #[cfg(debug_assertions)]
+        for row in self.rows.iter_mut() {
+            self.num_unused_cells += width - *row;
+        }
+        self.rows = vec![width; self.height];
+        width
+    }
+
+    pub(crate) fn get_width(&self) -> usize {
+        self.width
+    }
+
+    pub(crate) fn columns(&self) -> &[CellColumn<F>] {
+        &self.columns
+    }
+
+    pub(crate) fn get_num_unused_cells(&self) -> usize {
+        self.num_unused_cells
+    }
+
+    pub(crate) fn query_cell_at_pos(
+        &mut self,
+        meta: &mut ConstraintSystem<F>,
+        row_idx: i32,
+        column_idx: usize,
+    ) -> Cell<F> {
+        let column = if column_idx < self.columns.len() {
+            self.columns[column_idx].advice
+        } else {
+            let advice = meta.advice_column();
+            let mut expr = 0.expr();
+            meta.create_gate("Query column", |meta| {
+                expr = meta.query_advice(advice, Rotation::cur());
+                vec![0.expr()]
+            });
+            self.columns.push(CellColumn { advice, expr });
+            advice
+        };
+
+        let mut cells = Vec::new();
+        meta.create_gate("Query cell", |meta| {
+            cells.push(Cell::new(meta, column, column_idx, row_idx));
+            vec![0.expr()]
+        });
+        cells[0].clone()
+    }
+
+    pub(crate) fn query_cell_value_at_pos(&mut self, row_idx: i32, column_idx: usize) -> Cell<F> {
+        Cell::new_value(column_idx, row_idx)
+    }
+
+    fn get_position(&mut self) -> (usize, usize) {
+        let best_row_idx = self.current_row;
+        let best_row_pos = self.rows[best_row_idx];
+        self.rows[best_row_idx] += 1;
+        self.width = self.width.max(best_row_pos + 1);
+        self.current_row = (best_row_idx + 1) % self.height;
+        (best_row_idx, best_row_pos)
+    }
+}
diff --git a/hashes/zkevm-keccak/src/keccak_circuit/keccak_packed_multi.rs b/hashes/zkevm-keccak/src/keccak_circuit/keccak_packed_multi.rs
new file mode 100644
index 00000000..ae675514
--- /dev/null
+++ b/hashes/zkevm-keccak/src/keccak_circuit/keccak_packed_multi.rs
@@ -0,0 +1,983 @@
+use super::keccak_table::KeccakTable;
+use super::param::{
+    CHI_BASE_LOOKUP_TABLE, NUM_BYTES_PER_WORD, NUM_ROUNDS, NUM_WORDS_TO_ABSORB,
+    NUM_WORDS_TO_SQUEEZE, RATE, RATE_IN_BITS, RHO_MATRIX, ROUND_CST,
+};
+use super::util::{
+    constraint_builder::BaseConstraintBuilder, eth_types::Field, expression::Expr, field_xor,
+    get_absorb_positions, get_num_bits_per_lookup, into_bits, pack, pack_u64, pack_with_base,
+    rotate, target_part_sizes, to_bytes, unpack,
+};
+use super::{assign_advice_custom, KeccakAssignedValue};
+use super::{cell_manager::*, param::*};
+
+use crate::halo2_proofs::{
+    arithmetic::FieldExt,
+    circuit::{Region, Value},
+    plonk::{Error, Expression},
+};
+use log::debug;
+use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
+use std::env::var;
+
+pub(crate) fn get_num_rows_per_round() -> usize {
+    var("KECCAK_ROWS")
+        .unwrap_or_else(|_| "25".to_string())
+        .parse()
+        .expect("Cannot parse KECCAK_ROWS env var as usize")
+}
+
+pub(crate) fn get_num_bits_per_absorb_lookup() -> usize {
+    get_num_bits_per_lookup(ABSORB_LOOKUP_RANGE)
+}
+
+pub(crate) fn get_num_bits_per_theta_c_lookup() -> usize {
+    get_num_bits_per_lookup(THETA_C_LOOKUP_RANGE)
+}
+
+pub(crate) fn get_num_bits_per_rho_pi_lookup() -> usize {
+    get_num_bits_per_lookup(CHI_BASE_LOOKUP_RANGE.max(RHO_PI_LOOKUP_RANGE))
+}
+
+pub(crate) fn get_num_bits_per_base_chi_lookup() -> usize {
+    get_num_bits_per_lookup(CHI_BASE_LOOKUP_RANGE.max(RHO_PI_LOOKUP_RANGE))
+}
+
+/// The number of keccak_f's that can be done in this circuit
+///
+/// `num_rows` should be number of usable rows without blinding factors
+pub fn get_keccak_capacity(num_rows: usize) -> usize {
+    // - 1 because we have a dummy round at the very beginning of multi_keccak
+    // - NUM_WORDS_TO_ABSORB because `absorb_data_next` and `absorb_result_next` query `NUM_WORDS_TO_ABSORB * get_num_rows_per_round()` beyond any row where `q_absorb == 1`
+    (num_rows / get_num_rows_per_round() - 1 - NUM_WORDS_TO_ABSORB) / (NUM_ROUNDS + 1)
+}
+
+pub fn get_num_keccak_f(byte_length: usize) -> usize {
+    // ceil( (byte_length + 1) / RATE )
+    byte_length / RATE + 1
+}
+
+/// AbsorbData
+#[derive(Clone, Default, Debug, PartialEq)]
+pub(crate) struct AbsorbData<F: FieldExt> {
+    from: F,
+    absorb: F,
+    result: F,
+}
+
+/// SqueezeData
+#[derive(Clone, Default, Debug, PartialEq)]
+pub(crate) struct SqueezeData<F: FieldExt> {
+    packed: F,
+}
+
+/// KeccakRow
+#[derive(Clone, Debug)]
+pub struct KeccakRow<F: FieldExt> {
+    pub(crate) q_enable: bool,
+    // q_enable_row: bool,
+    pub(crate) q_round: bool,
+    pub(crate) q_absorb: bool,
+    pub(crate) q_round_last: bool,
+    pub(crate) q_padding: bool,
+    pub(crate) q_padding_last: bool,
+    pub(crate) round_cst: F,
+    pub(crate) is_final: bool,
+    pub(crate) cell_values: Vec<F>,
+    // We have no need for length as RLC equality checks length implicitly
+    // length: usize,
+    // SecondPhase values will be assigned separately
+    // data_rlc: Value<F>,
+    // hash_rlc: Value<F>,
+}
+
+impl<F: FieldExt> KeccakRow<F> {
+    pub fn dummy_rows(num_rows: usize) -> Vec<Self> {
+        (0..num_rows)
+            .map(|idx| KeccakRow {
+                q_enable: idx == 0,
+                // q_enable_row: true,
+                q_round: false,
+                q_absorb: idx == 0,
+                q_round_last: false,
+                q_padding: false,
+                q_padding_last: false,
+                round_cst: F::zero(),
+                is_final: false,
+                cell_values: Vec::new(),
+            })
+            .collect()
+    }
+}
+
+/// Part
+#[derive(Clone, Debug)]
+pub(crate) struct Part<F: FieldExt> {
+    pub(crate) cell: Cell<F>,
+    pub(crate) expr: Expression<F>,
+    pub(crate) num_bits: usize,
+}
+
+/// Part Value
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct PartValue<F: FieldExt> {
+    value: F,
+    rot: i32,
+    num_bits: usize,
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct KeccakRegion<F> {
+    pub(crate) rows: Vec<Vec<F>>,
+}
+
+impl<F: FieldExt> KeccakRegion<F> {
+    pub(crate) fn new() -> Self {
+        Self { rows: Vec::new() }
+    }
+
+    pub(crate) fn assign(&mut self, column: usize, offset: usize, value: F) {
+        while offset >= self.rows.len() {
+            self.rows.push(Vec::new());
+        }
+        let row = &mut self.rows[offset];
+        while column >= row.len() {
+            row.push(F::zero());
+        }
+        row[column] = value;
+    }
+}
+
+/// Recombines parts back together
+pub(crate) mod decode {
+    use super::super::param::BIT_COUNT;
+    use super::{Expr, FieldExt, Part, PartValue};
+    use crate::halo2_proofs::plonk::Expression;
+
+    pub(crate) fn expr<F: FieldExt>(parts: Vec<Part<F>>) -> Expression<F> {
+        parts.iter().rev().fold(0.expr(), |acc, part| {
+            acc * F::from(1u64 << (BIT_COUNT * part.num_bits)) + part.expr.clone()
+        })
+    }
+
+    pub(crate) fn value<F: FieldExt>(parts: Vec<PartValue<F>>) -> F {
+        parts.iter().rev().fold(F::zero(), |acc, part| {
+            acc * F::from(1u64 << (BIT_COUNT * part.num_bits)) + part.value
+        })
+    }
+}
+
+/// Splits a word into parts
+pub(crate) mod split {
+    use super::super::util::{pack, pack_part, unpack, WordParts};
+    use super::{
+        decode, BaseConstraintBuilder, CellManager, Expr, Field, FieldExt, KeccakRegion, Part,
+        PartValue,
+    };
+    use crate::halo2_proofs::plonk::{ConstraintSystem, Expression};
+
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn expr<F: FieldExt>(
+        meta: &mut ConstraintSystem<F>,
+        cell_manager: &mut CellManager<F>,
+        cb: &mut BaseConstraintBuilder<F>,
+        input: Expression<F>,
+        rot: usize,
+        target_part_size: usize,
+        normalize: bool,
+        row: Option<usize>,
+    ) -> Vec<Part<F>> {
+        let word = WordParts::new(target_part_size, rot, normalize);
+        let mut parts = Vec::with_capacity(word.parts.len());
+        for word_part in word.parts {
+            let cell = if let Some(row) = row {
+                cell_manager.query_cell_at_row(meta, row as i32)
+            } else {
+                cell_manager.query_cell(meta)
+            };
+            parts.push(Part {
+                num_bits: word_part.bits.len(),
+                cell: cell.clone(),
+                expr: cell.expr(),
+            });
+        }
+        // Input parts need to equal original input expression
+        cb.require_equal("split", decode::expr(parts.clone()), input);
+        parts
+    }
+
+    pub(crate) fn value<F: Field>(
+        cell_manager: &mut CellManager<F>,
+        region: &mut KeccakRegion<F>,
+        input: F,
+        rot: usize,
+        target_part_size: usize,
+        normalize: bool,
+        row: Option<usize>,
+    ) -> Vec<PartValue<F>> {
+        let input_bits = unpack(input);
+        debug_assert_eq!(pack::<F>(&input_bits), input);
+        let word = WordParts::new(target_part_size, rot, normalize);
+        let mut parts = Vec::with_capacity(word.parts.len());
+        for word_part in word.parts {
+            let value = pack_part(&input_bits, &word_part);
+            let cell = if let Some(row) = row {
+                cell_manager.query_cell_value_at_row(row as i32)
+            } else {
+                cell_manager.query_cell_value()
+            };
+            cell.assign(region, 0, F::from(value));
+            parts.push(PartValue {
+                num_bits: word_part.bits.len(),
+                rot: cell.rotation,
+                value: F::from(value),
+            });
+        }
+        debug_assert_eq!(decode::value(parts.clone()), input);
+        parts
+    }
+}
+
+// Split into parts, but storing the parts in a specific way to have the same
+// table layout in `output_cells` regardless of rotation.
+pub(crate) mod split_uniform {
+    use super::super::param::BIT_SIZE;
+    use super::super::util::{
+        eth_types::Field, pack, pack_part, rotate, rotate_rev, unpack, WordParts,
+    };
+    use super::{
+        decode, target_part_sizes, BaseConstraintBuilder, Cell, CellManager, Expr, FieldExt,
+        KeccakRegion, Part, PartValue,
+    };
+    use crate::halo2_proofs::plonk::{ConstraintSystem, Expression};
+
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn expr<F: FieldExt>(
+        meta: &mut ConstraintSystem<F>,
+        output_cells: &[Cell<F>],
+        cell_manager: &mut CellManager<F>,
+        cb: &mut BaseConstraintBuilder<F>,
+        input: Expression<F>,
+        rot: usize,
+        target_part_size: usize,
+        normalize: bool,
+    ) -> Vec<Part<F>> {
+        let mut input_parts = Vec::new();
+        let mut output_parts = Vec::new();
+        let word = WordParts::new(target_part_size, rot, normalize);
+
+        let word = rotate(word.parts, rot, target_part_size);
+
+        let target_sizes = target_part_sizes(target_part_size);
+        let mut word_iter = word.iter();
+        let mut counter = 0;
+        while let Some(word_part) = word_iter.next() {
+            if word_part.bits.len() == target_sizes[counter] {
+                // Input and output part are the same
+                let part = Part {
+                    num_bits: target_sizes[counter],
+                    cell: output_cells[counter].clone(),
+                    expr: output_cells[counter].expr(),
+                };
+                input_parts.push(part.clone());
+                output_parts.push(part);
+                counter += 1;
+            } else if let Some(extra_part) = word_iter.next() {
+                // The two parts combined need to have the expected combined length
+                debug_assert_eq!(
+                    word_part.bits.len() + extra_part.bits.len(),
+                    target_sizes[counter]
+                );
+
+                // Needs two cells here to store the parts
+                // These still need to be range checked elsewhere!
+                let part_a = cell_manager.query_cell(meta);
+                let part_b = cell_manager.query_cell(meta);
+
+                // Make sure the parts combined equal the value in the uniform output
+                let expr = part_a.expr()
+                    + part_b.expr()
+                        * F::from((BIT_SIZE as u32).pow(word_part.bits.len() as u32) as u64);
+                cb.require_equal("rot part", expr, output_cells[counter].expr());
+
+                // Input needs the two parts because it needs to be able to undo the rotation
+                input_parts.push(Part {
+                    num_bits: word_part.bits.len(),
+                    cell: part_a.clone(),
+                    expr: part_a.expr(),
+                });
+                input_parts.push(Part {
+                    num_bits: extra_part.bits.len(),
+                    cell: part_b.clone(),
+                    expr: part_b.expr(),
+                });
+                // Output only has the combined cell
+                output_parts.push(Part {
+                    num_bits: target_sizes[counter],
+                    cell: output_cells[counter].clone(),
+                    expr: output_cells[counter].expr(),
+                });
+                counter += 1;
+            } else {
+                unreachable!();
+            }
+        }
+        let input_parts = rotate_rev(input_parts, rot, target_part_size);
+        // Input parts need to equal original input expression
+        cb.require_equal("split", decode::expr(input_parts), input);
+        // Uniform output
+        output_parts
+    }
+
+    pub(crate) fn value<F: Field>(
+        output_cells: &[Cell<F>],
+        cell_manager: &mut CellManager<F>,
+        region: &mut KeccakRegion<F>,
+        input: F,
+        rot: usize,
+        target_part_size: usize,
+        normalize: bool,
+    ) -> Vec<PartValue<F>> {
+        let input_bits = unpack(input);
+        debug_assert_eq!(pack::<F>(&input_bits), input);
+
+        let mut input_parts = Vec::new();
+        let mut output_parts = Vec::new();
+        let word = WordParts::new(target_part_size, rot, normalize);
+
+        let word = rotate(word.parts, rot, target_part_size);
+
+        let target_sizes = target_part_sizes(target_part_size);
+        let mut word_iter = word.iter();
+        let mut counter = 0;
+        while let Some(word_part) = word_iter.next() {
+            if word_part.bits.len() == target_sizes[counter] {
+                let value = pack_part(&input_bits, word_part);
+                output_cells[counter].assign(region, 0, F::from(value));
+                input_parts.push(PartValue {
+                    num_bits: word_part.bits.len(),
+                    rot: output_cells[counter].rotation,
+                    value: F::from(value),
+                });
+                output_parts.push(PartValue {
+                    num_bits: word_part.bits.len(),
+                    rot: output_cells[counter].rotation,
+                    value: F::from(value),
+                });
+                counter += 1;
+            } else if let Some(extra_part) = word_iter.next() {
+                debug_assert_eq!(
+                    word_part.bits.len() + extra_part.bits.len(),
+                    target_sizes[counter]
+                );
+
+                let part_a = cell_manager.query_cell_value();
+                let part_b = cell_manager.query_cell_value();
+
+                let value_a = pack_part(&input_bits, word_part);
+                let value_b = pack_part(&input_bits, extra_part);
+
+                part_a.assign(region, 0, F::from(value_a));
+                part_b.assign(region, 0, F::from(value_b));
+
+                let value = value_a + value_b * (BIT_SIZE as u64).pow(word_part.bits.len() as u32);
+
+                output_cells[counter].assign(region, 0, F::from(value));
+
+                input_parts.push(PartValue {
+                    num_bits: word_part.bits.len(),
+                    value: F::from(value_a),
+                    rot: part_a.rotation,
+                });
+                input_parts.push(PartValue {
+                    num_bits: extra_part.bits.len(),
+                    value: F::from(value_b),
+                    rot: part_b.rotation,
+                });
+                output_parts.push(PartValue {
+                    num_bits: target_sizes[counter],
+                    value: F::from(value),
+                    rot: output_cells[counter].rotation,
+                });
+                counter += 1;
+            } else {
+                unreachable!();
+            }
+        }
+        let input_parts = rotate_rev(input_parts, rot, target_part_size);
+        debug_assert_eq!(decode::value(input_parts), input);
+        output_parts
+    }
+}
+
+// Transform values using a lookup table
+pub(crate) mod transform {
+    use super::{transform_to, CellManager, Field, FieldExt, KeccakRegion, Part, PartValue};
+    use crate::halo2_proofs::plonk::{ConstraintSystem, TableColumn};
+    use itertools::Itertools;
+
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn expr<F: FieldExt>(
+        name: &'static str,
+        meta: &mut ConstraintSystem<F>,
+        cell_manager: &mut CellManager<F>,
+        lookup_counter: &mut usize,
+        input: Vec<Part<F>>,
+        transform_table: [TableColumn; 2],
+        uniform_lookup: bool,
+    ) -> Vec<Part<F>> {
+        let cells = input
+            .iter()
+            .map(|input_part| {
+                if uniform_lookup {
+                    cell_manager.query_cell_at_row(meta, input_part.cell.rotation)
+                } else {
+                    cell_manager.query_cell(meta)
+                }
+            })
+            .collect_vec();
+        transform_to::expr(
+            name,
+            meta,
+            &cells,
+            lookup_counter,
+            input,
+            transform_table,
+            uniform_lookup,
+        )
+    }
+
+    pub(crate) fn value<F: Field>(
+        cell_manager: &mut CellManager<F>,
+        region: &mut KeccakRegion<F>,
+        input: Vec<PartValue<F>>,
+        do_packing: bool,
+        f: fn(&u8) -> u8,
+        uniform_lookup: bool,
+    ) -> Vec<PartValue<F>> {
+        let cells = input
+            .iter()
+            .map(|input_part| {
+                if uniform_lookup {
+                    cell_manager.query_cell_value_at_row(input_part.rot)
+                } else {
+                    cell_manager.query_cell_value()
+                }
+            })
+            .collect_vec();
+        transform_to::value(&cells, region, input, do_packing, f)
+    }
+}
+
+// Transfroms values to cells
+pub(crate) mod transform_to {
+    use super::super::util::{pack, to_bytes, unpack};
+    use super::{Cell, Expr, Field, FieldExt, KeccakRegion, Part, PartValue};
+    use crate::halo2_proofs::plonk::{ConstraintSystem, TableColumn};
+
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn expr<F: FieldExt>(
+        name: &'static str,
+        meta: &mut ConstraintSystem<F>,
+        cells: &[Cell<F>],
+        lookup_counter: &mut usize,
+        input: Vec<Part<F>>,
+        transform_table: [TableColumn; 2],
+        uniform_lookup: bool,
+    ) -> Vec<Part<F>> {
+        let mut output = Vec::with_capacity(input.len());
+        for (idx, input_part) in input.iter().enumerate() {
+            let output_part = cells[idx].clone();
+            if !uniform_lookup || input_part.cell.rotation == 0 {
+                meta.lookup(name, |_| {
+                    vec![
+                        (input_part.expr.clone(), transform_table[0]),
+                        (output_part.expr(), transform_table[1]),
+                    ]
+                });
+                *lookup_counter += 1;
+            }
+            output.push(Part {
+                num_bits: input_part.num_bits,
+                cell: output_part.clone(),
+                expr: output_part.expr(),
+            });
+        }
+        output
+    }
+
+    pub(crate) fn value<F: Field>(
+        cells: &[Cell<F>],
+        region: &mut KeccakRegion<F>,
+        input: Vec<PartValue<F>>,
+        do_packing: bool,
+        f: fn(&u8) -> u8,
+    ) -> Vec<PartValue<F>> {
+        let mut output = Vec::new();
+        for (idx, input_part) in input.iter().enumerate() {
+            let input_bits = &unpack(input_part.value)[0..input_part.num_bits];
+            let output_bits = input_bits.iter().map(f).collect::<Vec<_>>();
+            let value = if do_packing {
+                pack(&output_bits)
+            } else {
+                F::from(to_bytes::value(&output_bits)[0] as u64)
+            };
+            let output_part = cells[idx].clone();
+            output_part.assign(region, 0, value);
+            output.push(PartValue {
+                num_bits: input_part.num_bits,
+                rot: output_part.rotation,
+                value,
+            });
+        }
+        output
+    }
+}
+
+/// Computes and assigns the input RLC values (but not the output RLC values:
+/// see `multi_keccak_phase1`).
+pub(crate) fn keccak_phase1<'v, F: Field>(
+    region: &mut Region<F>,
+    keccak_table: &KeccakTable,
+    bytes: &[u8],
+    challenge: Value<F>,
+    input_rlcs: &mut Vec<KeccakAssignedValue<'v, F>>,
+    offset: &mut usize,
+) {
+    let num_chunks = get_num_keccak_f(bytes.len());
+    let num_rows_per_round = get_num_rows_per_round();
+
+    let mut byte_idx = 0;
+    let mut data_rlc = Value::known(F::zero());
+
+    for _ in 0..num_chunks {
+        for round in 0..NUM_ROUNDS + 1 {
+            if round < NUM_WORDS_TO_ABSORB {
+                for idx in 0..NUM_BYTES_PER_WORD {
+                    assign_advice_custom(
+                        region,
+                        keccak_table.input_rlc,
+                        *offset + idx + 1,
+                        data_rlc,
+                    );
+                    if byte_idx < bytes.len() {
+                        data_rlc =
+                            data_rlc * challenge + Value::known(F::from(bytes[byte_idx] as u64));
+                    }
+                    byte_idx += 1;
+                }
+            }
+            let input_rlc = assign_advice_custom(region, keccak_table.input_rlc, *offset, data_rlc);
+            if round == NUM_ROUNDS {
+                input_rlcs.push(input_rlc);
+            }
+
+            *offset += num_rows_per_round;
+        }
+    }
+}
+
+/// Witness generation in `FirstPhase` for a keccak hash digest without
+/// computing RLCs, which are deferred to `SecondPhase`.
+pub(crate) fn keccak_phase0<F: Field>(
+    rows: &mut Vec<KeccakRow<F>>,
+    squeeze_digests: &mut Vec<[F; NUM_WORDS_TO_SQUEEZE]>,
+    bytes: &[u8],
+) {
+    let mut bits = into_bits(bytes);
+    let mut s = [[F::zero(); 5]; 5];
+    let absorb_positions = get_absorb_positions();
+    let num_bytes_in_last_block = bytes.len() % RATE;
+    let num_rows_per_round = get_num_rows_per_round();
+    let two = F::from(2u64);
+
+    // Padding
+    bits.push(1);
+    while (bits.len() + 1) % RATE_IN_BITS != 0 {
+        bits.push(0);
+    }
+    bits.push(1);
+
+    let chunks = bits.chunks(RATE_IN_BITS);
+    let num_chunks = chunks.len();
+
+    let mut cell_managers = Vec::with_capacity(NUM_ROUNDS + 1);
+    let mut regions = Vec::with_capacity(NUM_ROUNDS + 1);
+    let mut hash_words = [F::zero(); NUM_WORDS_TO_SQUEEZE];
+
+    for (idx, chunk) in chunks.enumerate() {
+        let is_final_block = idx == num_chunks - 1;
+
+        let mut absorb_rows = Vec::new();
+        // Absorb
+        for (idx, &(i, j)) in absorb_positions.iter().enumerate() {
+            let absorb = pack(&chunk[idx * 64..(idx + 1) * 64]);
+            let from = s[i][j];
+            s[i][j] = field_xor(s[i][j], absorb);
+            absorb_rows.push(AbsorbData { from, absorb, result: s[i][j] });
+        }
+
+        // better memory management to clear already allocated Vecs
+        cell_managers.clear();
+        regions.clear();
+
+        for round in 0..NUM_ROUNDS + 1 {
+            let mut cell_manager = CellManager::new(num_rows_per_round);
+            let mut region = KeccakRegion::new();
+
+            let mut absorb_row = AbsorbData::default();
+            if round < NUM_WORDS_TO_ABSORB {
+                absorb_row = absorb_rows[round].clone();
+            }
+
+            // State data
+            for s in &s {
+                for s in s {
+                    let cell = cell_manager.query_cell_value();
+                    cell.assign(&mut region, 0, *s);
+                }
+            }
+
+            // Absorb data
+            let absorb_from = cell_manager.query_cell_value();
+            let absorb_data = cell_manager.query_cell_value();
+            let absorb_result = cell_manager.query_cell_value();
+            absorb_from.assign(&mut region, 0, absorb_row.from);
+            absorb_data.assign(&mut region, 0, absorb_row.absorb);
+            absorb_result.assign(&mut region, 0, absorb_row.result);
+
+            // Absorb
+            cell_manager.start_region();
+            let part_size = get_num_bits_per_absorb_lookup();
+            let input = absorb_row.from + absorb_row.absorb;
+            let absorb_fat =
+                split::value(&mut cell_manager, &mut region, input, 0, part_size, false, None);
+            cell_manager.start_region();
+            let _absorb_result = transform::value(
+                &mut cell_manager,
+                &mut region,
+                absorb_fat.clone(),
+                true,
+                |v| v & 1,
+                true,
+            );
+
+            // Padding
+            cell_manager.start_region();
+            // Unpack a single word into bytes (for the absorption)
+            // Potential optimization: could do multiple bytes per lookup
+            let packed =
+                split::value(&mut cell_manager, &mut region, absorb_row.absorb, 0, 8, false, None);
+            cell_manager.start_region();
+            let input_bytes =
+                transform::value(&mut cell_manager, &mut region, packed, false, |v| *v, true);
+            cell_manager.start_region();
+            let is_paddings =
+                input_bytes.iter().map(|_| cell_manager.query_cell_value()).collect::<Vec<_>>();
+            debug_assert_eq!(is_paddings.len(), NUM_BYTES_PER_WORD);
+            if round < NUM_WORDS_TO_ABSORB {
+                for (padding_idx, is_padding) in is_paddings.iter().enumerate() {
+                    let byte_idx = round * NUM_BYTES_PER_WORD + padding_idx;
+                    let padding = is_final_block && byte_idx >= num_bytes_in_last_block;
+                    is_padding.assign(&mut region, 0, F::from(padding));
+                }
+            }
+            cell_manager.start_region();
+
+            if round != NUM_ROUNDS {
+                // Theta
+                let part_size = get_num_bits_per_theta_c_lookup();
+                let mut bcf = Vec::new();
+                for s in &s {
+                    let c = s[0] + s[1] + s[2] + s[3] + s[4];
+                    let bc_fat =
+                        split::value(&mut cell_manager, &mut region, c, 1, part_size, false, None);
+                    bcf.push(bc_fat);
+                }
+                cell_manager.start_region();
+                let mut bc = Vec::new();
+                for bc_fat in bcf {
+                    let bc_norm = transform::value(
+                        &mut cell_manager,
+                        &mut region,
+                        bc_fat.clone(),
+                        true,
+                        |v| v & 1,
+                        true,
+                    );
+                    bc.push(bc_norm);
+                }
+                cell_manager.start_region();
+                let mut os = [[F::zero(); 5]; 5];
+                for i in 0..5 {
+                    let t = decode::value(bc[(i + 4) % 5].clone())
+                        + decode::value(rotate(bc[(i + 1) % 5].clone(), 1, part_size));
+                    for j in 0..5 {
+                        os[i][j] = s[i][j] + t;
+                    }
+                }
+                s = os;
+                cell_manager.start_region();
+
+                // Rho/Pi
+                let part_size = get_num_bits_per_base_chi_lookup();
+                let target_word_sizes = target_part_sizes(part_size);
+                let num_word_parts = target_word_sizes.len();
+                let mut rho_pi_chi_cells: [[[Vec<Cell<F>>; 5]; 5]; 3] =
+                    array_init::array_init(|_| {
+                        array_init::array_init(|_| array_init::array_init(|_| Vec::new()))
+                    });
+                let mut column_starts = [0usize; 3];
+                for p in 0..3 {
+                    column_starts[p] = cell_manager.start_region();
+                    let mut row_idx = 0;
+                    for j in 0..5 {
+                        for _ in 0..num_word_parts {
+                            for i in 0..5 {
+                                rho_pi_chi_cells[p][i][j]
+                                    .push(cell_manager.query_cell_value_at_row(row_idx as i32));
+                            }
+                            row_idx = (row_idx + 1) % num_rows_per_round;
+                        }
+                    }
+                }
+                cell_manager.start_region();
+                let mut os_parts: [[Vec<PartValue<F>>; 5]; 5] =
+                    array_init::array_init(|_| array_init::array_init(|_| Vec::new()));
+                for (j, os_part) in os_parts.iter_mut().enumerate() {
+                    for i in 0..5 {
+                        let s_parts = split_uniform::value(
+                            &rho_pi_chi_cells[0][j][(2 * i + 3 * j) % 5],
+                            &mut cell_manager,
+                            &mut region,
+                            s[i][j],
+                            RHO_MATRIX[i][j],
+                            part_size,
+                            true,
+                        );
+
+                        let s_parts = transform_to::value(
+                            &rho_pi_chi_cells[1][j][(2 * i + 3 * j) % 5],
+                            &mut region,
+                            s_parts.clone(),
+                            true,
+                            |v| v & 1,
+                        );
+                        os_part[(2 * i + 3 * j) % 5] = s_parts.clone();
+                    }
+                }
+                cell_manager.start_region();
+
+                // Chi
+                let part_size_base = get_num_bits_per_base_chi_lookup();
+                let three_packed = pack::<F>(&vec![3u8; part_size_base]);
+                let mut os = [[F::zero(); 5]; 5];
+                for j in 0..5 {
+                    for i in 0..5 {
+                        let mut s_parts = Vec::new();
+                        for ((part_a, part_b), part_c) in os_parts[i][j]
+                            .iter()
+                            .zip(os_parts[(i + 1) % 5][j].iter())
+                            .zip(os_parts[(i + 2) % 5][j].iter())
+                        {
+                            let value =
+                                three_packed - two * part_a.value + part_b.value - part_c.value;
+                            s_parts.push(PartValue {
+                                num_bits: part_size_base,
+                                rot: j as i32,
+                                value,
+                            });
+                        }
+                        os[i][j] = decode::value(transform_to::value(
+                            &rho_pi_chi_cells[2][i][j],
+                            &mut region,
+                            s_parts.clone(),
+                            true,
+                            |v| CHI_BASE_LOOKUP_TABLE[*v as usize],
+                        ));
+                    }
+                }
+                s = os;
+                cell_manager.start_region();
+
+                // iota
+                let part_size = get_num_bits_per_absorb_lookup();
+                let input = s[0][0] + pack_u64::<F>(ROUND_CST[round]);
+                let iota_parts = split::value::<F>(
+                    &mut cell_manager,
+                    &mut region,
+                    input,
+                    0,
+                    part_size,
+                    false,
+                    None,
+                );
+                cell_manager.start_region();
+                s[0][0] = decode::value(transform::value(
+                    &mut cell_manager,
+                    &mut region,
+                    iota_parts.clone(),
+                    true,
+                    |v| v & 1,
+                    true,
+                ));
+            }
+
+            // The words to squeeze out: this is the hash digest as words with
+            // NUM_BYTES_PER_WORD (=8) bytes each
+            for (hash_word, a) in hash_words.iter_mut().zip(s.iter()) {
+                *hash_word = a[0];
+            }
+
+            cell_managers.push(cell_manager);
+            regions.push(region);
+        }
+
+        // Now that we know the state at the end of the rounds, set the squeeze data
+        let num_rounds = cell_managers.len();
+        for (idx, word) in hash_words.iter().enumerate() {
+            let cell_manager = &mut cell_managers[num_rounds - 2 - idx];
+            let region = &mut regions[num_rounds - 2 - idx];
+
+            cell_manager.start_region();
+            let squeeze_packed = cell_manager.query_cell_value();
+            squeeze_packed.assign(region, 0, *word);
+
+            cell_manager.start_region();
+            let packed = split::value(cell_manager, region, *word, 0, 8, false, None);
+            cell_manager.start_region();
+            transform::value(cell_manager, region, packed, false, |v| *v, true);
+        }
+        squeeze_digests.push(hash_words);
+
+        for round in 0..NUM_ROUNDS + 1 {
+            let round_cst = pack_u64(ROUND_CST[round]);
+
+            for row_idx in 0..num_rows_per_round {
+                rows.push(KeccakRow {
+                    q_enable: row_idx == 0,
+                    // q_enable_row: true,
+                    q_round: row_idx == 0 && round < NUM_ROUNDS,
+                    q_absorb: row_idx == 0 && round == NUM_ROUNDS,
+                    q_round_last: row_idx == 0 && round == NUM_ROUNDS,
+                    q_padding: row_idx == 0 && round < NUM_WORDS_TO_ABSORB,
+                    q_padding_last: row_idx == 0 && round == NUM_WORDS_TO_ABSORB - 1,
+                    round_cst,
+                    is_final: is_final_block && round == NUM_ROUNDS && row_idx == 0,
+                    cell_values: regions[round].rows.get(row_idx).unwrap_or(&vec![]).clone(),
+                });
+                #[cfg(debug_assertions)]
+                {
+                    let mut r = rows.last().unwrap().clone();
+                    r.cell_values.clear();
+                    log::trace!("offset {:?} row idx {} row {:?}", rows.len() - 1, row_idx, r);
+                }
+            }
+            log::trace!(" = = = = = = round {} end", round);
+        }
+        log::trace!(" ====================== chunk {} end", idx);
+    }
+
+    #[cfg(debug_assertions)]
+    {
+        let hash_bytes = s
+            .into_iter()
+            .take(4)
+            .map(|a| {
+                pack_with_base::<F>(&unpack(a[0]), 2)
+                    .to_repr()
+                    .into_iter()
+                    .take(8)
+                    .collect::<Vec<_>>()
+                    .to_vec()
+            })
+            .collect::<Vec<_>>();
+        debug!("hash: {:x?}", &(hash_bytes[0..4].concat()));
+        // debug!("data rlc: {:x?}", data_rlc);
+    }
+}
+
+/// Computes and assigns the input and output RLC values.
+pub(crate) fn multi_keccak_phase1<'a, 'v, F: Field>(
+    region: &mut Region<F>,
+    keccak_table: &KeccakTable,
+    bytes: impl IntoIterator<Item = &'a [u8]>,
+    challenge: Value<F>,
+    squeeze_digests: Vec<[F; NUM_WORDS_TO_SQUEEZE]>,
+) -> (Vec<KeccakAssignedValue<'v, F>>, Vec<KeccakAssignedValue<'v, F>>) {
+    let mut input_rlcs = Vec::with_capacity(squeeze_digests.len());
+    let mut output_rlcs = Vec::with_capacity(squeeze_digests.len());
+
+    let num_rows_per_round = get_num_rows_per_round();
+    for idx in 0..num_rows_per_round {
+        [keccak_table.input_rlc, keccak_table.output_rlc]
+            .map(|column| assign_advice_custom(region, column, idx, Value::known(F::zero())));
+    }
+
+    let mut offset = num_rows_per_round;
+    for bytes in bytes {
+        keccak_phase1(region, keccak_table, bytes, challenge, &mut input_rlcs, &mut offset);
+    }
+    debug_assert!(input_rlcs.len() <= squeeze_digests.len());
+    while input_rlcs.len() < squeeze_digests.len() {
+        keccak_phase1(region, keccak_table, &[], challenge, &mut input_rlcs, &mut offset);
+    }
+
+    offset = num_rows_per_round;
+    for hash_words in squeeze_digests {
+        offset += num_rows_per_round * NUM_ROUNDS;
+        let hash_rlc = hash_words
+            .into_iter()
+            .flat_map(|a| to_bytes::value(&unpack(a)))
+            .map(|x| Value::known(F::from(x as u64)))
+            .reduce(|rlc, x| rlc * challenge + x)
+            .unwrap();
+        let output_rlc = assign_advice_custom(region, keccak_table.output_rlc, offset, hash_rlc);
+        output_rlcs.push(output_rlc);
+        offset += num_rows_per_round;
+    }
+
+    (input_rlcs, output_rlcs)
+}
+
+/// Returns vector of KeccakRow and vector of hash digest outputs.
+pub(crate) fn multi_keccak_phase0<F: Field>(
+    bytes: &[Vec<u8>],
+    capacity: Option<usize>,
+) -> (Vec<KeccakRow<F>>, Vec<[F; NUM_WORDS_TO_SQUEEZE]>) {
+    let num_rows_per_round = get_num_rows_per_round();
+    let mut rows =
+        Vec::with_capacity((1 + capacity.unwrap_or(0) * (NUM_ROUNDS + 1)) * num_rows_per_round);
+    // Dummy first row so that the initial data is absorbed
+    // The initial data doesn't really matter, `is_final` just needs to be disabled.
+    rows.append(&mut KeccakRow::dummy_rows(num_rows_per_round));
+    // Actual keccaks
+    let artifacts = bytes
+        .par_iter()
+        .map(|bytes| {
+            let num_keccak_f = get_num_keccak_f(bytes.len());
+            let mut squeeze_digests = Vec::with_capacity(num_keccak_f);
+            let mut rows = Vec::with_capacity(num_keccak_f * (NUM_ROUNDS + 1) * num_rows_per_round);
+            keccak_phase0(&mut rows, &mut squeeze_digests, bytes);
+            (rows, squeeze_digests)
+        })
+        .collect::<Vec<_>>();
+
+    let mut squeeze_digests = Vec::with_capacity(capacity.unwrap_or(0));
+    for (rows_part, squeezes) in artifacts {
+        rows.extend(rows_part);
+        squeeze_digests.extend(squeezes);
+    }
+
+    if let Some(capacity) = capacity {
+        // Pad with no data hashes to the expected capacity
+        while rows.len() < (1 + capacity * (NUM_ROUNDS + 1)) * get_num_rows_per_round() {
+            keccak_phase0(&mut rows, &mut squeeze_digests, &[]);
+        }
+        // Check that we are not over capacity
+        if rows.len() > (1 + capacity * (NUM_ROUNDS + 1)) * get_num_rows_per_round() {
+            panic!("{:?}", Error::BoundsFailure);
+        }
+    }
+    (rows, squeeze_digests)
+}
diff --git a/hashes/zkevm-keccak/src/keccak_circuit/keccak_table.rs b/hashes/zkevm-keccak/src/keccak_circuit/keccak_table.rs
new file mode 100644
index 00000000..3aa27503
--- /dev/null
+++ b/hashes/zkevm-keccak/src/keccak_circuit/keccak_table.rs
@@ -0,0 +1,31 @@
+use super::util::eth_types::Field;
+use crate::halo2_proofs::plonk::{Advice, Column, ConstraintSystem, SecondPhase};
+
+/// Keccak Table, used to verify keccak hashing from RLC'ed input.
+#[derive(Clone, Debug)]
+pub struct KeccakTable {
+    /// True when the row is enabled
+    pub is_enabled: Column<Advice>,
+    /// Byte array input as `RLC(reversed(input))`
+    pub input_rlc: Column<Advice>, // RLC of input bytes
+    // Byte array input length
+    // pub input_len: Column<Advice>,
+    /// RLC of the hash result
+    pub output_rlc: Column<Advice>, // RLC of hash of input bytes
+}
+
+impl KeccakTable {
+    /// Construct a new KeccakTable
+    pub fn construct<F: Field>(meta: &mut ConstraintSystem<F>) -> Self {
+        let input_rlc = meta.advice_column_in(SecondPhase);
+        let output_rlc = meta.advice_column_in(SecondPhase);
+        meta.enable_equality(input_rlc);
+        meta.enable_equality(output_rlc);
+        Self {
+            is_enabled: meta.advice_column(),
+            input_rlc,
+            // input_len: meta.advice_column(),
+            output_rlc,
+        }
+    }
+}
diff --git a/hashes/zkevm-keccak/src/keccak_circuit/param.rs b/hashes/zkevm-keccak/src/keccak_circuit/param.rs
new file mode 100644
index 00000000..09220ae6
--- /dev/null
+++ b/hashes/zkevm-keccak/src/keccak_circuit/param.rs
@@ -0,0 +1,67 @@
+pub(crate) const MAX_DEGREE: usize = 3;
+pub(crate) const ABSORB_LOOKUP_RANGE: usize = 3;
+pub(crate) const THETA_C_LOOKUP_RANGE: usize = 6;
+pub(crate) const RHO_PI_LOOKUP_RANGE: usize = 4;
+pub(crate) const CHI_BASE_LOOKUP_RANGE: usize = 5;
+
+pub(crate) const NUM_BITS_PER_BYTE: usize = 8;
+pub(crate) const NUM_BYTES_PER_WORD: usize = 8;
+pub(crate) const NUM_BITS_PER_WORD: usize = NUM_BYTES_PER_WORD * NUM_BITS_PER_BYTE;
+pub(crate) const KECCAK_WIDTH: usize = 5 * 5;
+pub(crate) const KECCAK_WIDTH_IN_BITS: usize = KECCAK_WIDTH * NUM_BITS_PER_WORD;
+pub(crate) const NUM_ROUNDS: usize = 24;
+pub(crate) const NUM_WORDS_TO_ABSORB: usize = 17;
+pub(crate) const NUM_BYTES_TO_ABSORB: usize = NUM_WORDS_TO_ABSORB * NUM_BYTES_PER_WORD;
+pub(crate) const NUM_WORDS_TO_SQUEEZE: usize = 4;
+pub(crate) const NUM_BYTES_TO_SQUEEZE: usize = NUM_WORDS_TO_SQUEEZE * NUM_BYTES_PER_WORD;
+pub(crate) const ABSORB_WIDTH_PER_ROW: usize = NUM_BITS_PER_WORD;
+pub(crate) const ABSORB_WIDTH_PER_ROW_BYTES: usize = ABSORB_WIDTH_PER_ROW / NUM_BITS_PER_BYTE;
+pub(crate) const RATE: usize = NUM_WORDS_TO_ABSORB * NUM_BYTES_PER_WORD;
+pub(crate) const RATE_IN_BITS: usize = RATE * NUM_BITS_PER_BYTE;
+// pub(crate) const THETA_C_WIDTH: usize = 5 * NUM_BITS_PER_WORD;
+pub(crate) const RHO_MATRIX: [[usize; 5]; 5] = [
+    [0, 36, 3, 41, 18],
+    [1, 44, 10, 45, 2],
+    [62, 6, 43, 15, 61],
+    [28, 55, 25, 21, 56],
+    [27, 20, 39, 8, 14],
+];
+pub(crate) const ROUND_CST: [u64; NUM_ROUNDS + 1] = [
+    0x0000000000000001,
+    0x0000000000008082,
+    0x800000000000808a,
+    0x8000000080008000,
+    0x000000000000808b,
+    0x0000000080000001,
+    0x8000000080008081,
+    0x8000000000008009,
+    0x000000000000008a,
+    0x0000000000000088,
+    0x0000000080008009,
+    0x000000008000000a,
+    0x000000008000808b,
+    0x800000000000008b,
+    0x8000000000008089,
+    0x8000000000008003,
+    0x8000000000008002,
+    0x8000000000000080,
+    0x000000000000800a,
+    0x800000008000000a,
+    0x8000000080008081,
+    0x8000000000008080,
+    0x0000000080000001,
+    0x8000000080008008,
+    0x0000000000000000, // absorb round
+];
+// Bit positions that have a non-zero value in `IOTA_ROUND_CST`.
+// pub(crate) const ROUND_CST_BIT_POS: [usize; 7] = [0, 1, 3, 7, 15, 31, 63];
+
+// The number of bits used in the sparse word representation per bit
+pub(crate) const BIT_COUNT: usize = 3;
+// The base of the bit in the sparse word representation
+pub(crate) const BIT_SIZE: usize = 2usize.pow(BIT_COUNT as u32);
+
+// `a ^ ((~b) & c)` is calculated by doing `lookup[3 - 2*a + b - c]`
+pub(crate) const CHI_BASE_LOOKUP_TABLE: [u8; 5] = [0, 1, 1, 0, 0];
+// `a ^ ((~b) & c) ^ d` is calculated by doing `lookup[5 - 2*a - b + c - 2*d]`
+// pub(crate) const CHI_EXT_LOOKUP_TABLE: [u8; 7] = [0, 0, 1, 1, 0, 0, 1];
diff --git a/hashes/zkevm-keccak/src/keccak_circuit/table.rs b/hashes/zkevm-keccak/src/keccak_circuit/table.rs
new file mode 100644
index 00000000..0be78f3d
--- /dev/null
+++ b/hashes/zkevm-keccak/src/keccak_circuit/table.rs
@@ -0,0 +1,112 @@
+use super::util::eth_types::Field;
+use super::{param::*, util::*};
+use crate::halo2_proofs::{
+    circuit::{Layouter, Value},
+    plonk::{Error, TableColumn},
+};
+use itertools::Itertools;
+
+/// Loads a normalization table with the given parameters
+pub(crate) fn load_normalize_table<F: Field>(
+    layouter: &mut impl Layouter<F>,
+    name: &str,
+    tables: &[TableColumn; 2],
+    range: u64,
+) -> Result<(), Error> {
+    let part_size = get_num_bits_per_lookup(range as usize);
+    layouter.assign_table(
+        || format!("{name} table"),
+        |mut table| {
+            for (offset, perm) in
+                (0..part_size).map(|_| 0u64..range).multi_cartesian_product().enumerate()
+            {
+                let mut input = 0u64;
+                let mut output = 0u64;
+                let mut factor = 1u64;
+                for input_part in perm.iter() {
+                    input += input_part * factor;
+                    output += (input_part & 1) * factor;
+                    factor *= BIT_SIZE as u64;
+                }
+                table.assign_cell(
+                    || format!("{name} input"),
+                    tables[0],
+                    offset,
+                    || Value::known(F::from(input)),
+                )?;
+                table.assign_cell(
+                    || format!("{name} output"),
+                    tables[1],
+                    offset,
+                    || Value::known(F::from(output)),
+                )?;
+            }
+            Ok(())
+        },
+    )
+}
+
+/// Loads the byte packing table
+pub(crate) fn load_pack_table<F: Field>(
+    layouter: &mut impl Layouter<F>,
+    tables: &[TableColumn; 2],
+) -> Result<(), Error> {
+    layouter.assign_table(
+        || "pack table",
+        |mut table| {
+            for (offset, idx) in (0u64..256).enumerate() {
+                table.assign_cell(
+                    || "unpacked",
+                    tables[0],
+                    offset,
+                    || Value::known(F::from(idx)),
+                )?;
+                let packed: F = pack(&into_bits(&[idx as u8]));
+                table.assign_cell(|| "packed", tables[1], offset, || Value::known(packed))?;
+            }
+            Ok(())
+        },
+    )
+}
+
+/// Loads a lookup table
+pub(crate) fn load_lookup_table<F: Field>(
+    layouter: &mut impl Layouter<F>,
+    name: &str,
+    tables: &[TableColumn; 2],
+    part_size: usize,
+    lookup_table: &[u8],
+) -> Result<(), Error> {
+    layouter.assign_table(
+        || format!("{name} table"),
+        |mut table| {
+            for (offset, perm) in (0..part_size)
+                .map(|_| 0..lookup_table.len() as u64)
+                .multi_cartesian_product()
+                .enumerate()
+            {
+                let mut input = 0u64;
+                let mut output = 0u64;
+                let mut factor = 1u64;
+                for input_part in perm.iter() {
+                    input += input_part * factor;
+                    output += (lookup_table[*input_part as usize] as u64) * factor;
+                    factor *= BIT_SIZE as u64;
+                }
+                table.assign_cell(
+                    || format!("{name} input"),
+                    tables[0],
+                    offset,
+                    || Value::known(F::from(input)),
+                )?;
+                table.assign_cell(
+                    || format!("{name} output"),
+                    tables[1],
+                    offset,
+                    || Value::known(F::from(output)),
+                )?;
+            }
+            Ok(())
+        },
+    )
+}
diff --git a/hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs b/hashes/zkevm-keccak/src/keccak_circuit/test.rs
similarity index 99%
rename from hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs
rename to hashes/zkevm-keccak/src/keccak_circuit/test.rs
index 4619a197..c87fd5d6 100644
--- a/hashes/zkevm-keccak/src/keccak_packed_multi/tests.rs
+++ b/hashes/zkevm-keccak/src/keccak_circuit/test.rs
@@ -19,6 +19,7 @@ use crate::halo2_proofs::{
     },
 };
 use rand_core::OsRng;
+use std::env::var;
 
 /// KeccakCircuit
 #[derive(Default, Clone, Debug)]
diff --git a/hashes/zkevm-keccak/src/util.rs b/hashes/zkevm-keccak/src/keccak_circuit/util.rs
similarity index 57%
rename from hashes/zkevm-keccak/src/util.rs
rename to hashes/zkevm-keccak/src/keccak_circuit/util.rs
index 868c366c..76601696 100644
--- a/hashes/zkevm-keccak/src/util.rs
+++ b/hashes/zkevm-keccak/src/keccak_circuit/util.rs
@@ -1,9 +1,11 @@
 //! Utility traits, functions used in the crate.
 
+use super::param::*;
 use crate::halo2_proofs::{
     circuit::{Layouter, Value},
     plonk::{Error, TableColumn},
 };
+use eth_types::{Field, ToScalar, Word};
 use itertools::Itertools;
 use std::env::var;
 
@@ -11,70 +13,6 @@ pub mod constraint_builder;
 pub mod eth_types;
 pub mod expression;
 
-use eth_types::{Field, ToScalar, Word};
-
-pub const NUM_BITS_PER_BYTE: usize = 8;
-pub const NUM_BYTES_PER_WORD: usize = 8;
-pub const NUM_BITS_PER_WORD: usize = NUM_BYTES_PER_WORD * NUM_BITS_PER_BYTE;
-pub const KECCAK_WIDTH: usize = 5 * 5;
-pub const KECCAK_WIDTH_IN_BITS: usize = KECCAK_WIDTH * NUM_BITS_PER_WORD;
-pub const NUM_ROUNDS: usize = 24;
-pub const NUM_WORDS_TO_ABSORB: usize = 17;
-pub const NUM_BYTES_TO_ABSORB: usize = NUM_WORDS_TO_ABSORB * NUM_BYTES_PER_WORD;
-pub const NUM_WORDS_TO_SQUEEZE: usize = 4;
-pub const NUM_BYTES_TO_SQUEEZE: usize = NUM_WORDS_TO_SQUEEZE * NUM_BYTES_PER_WORD;
-pub const ABSORB_WIDTH_PER_ROW: usize = NUM_BITS_PER_WORD;
-pub const ABSORB_WIDTH_PER_ROW_BYTES: usize = ABSORB_WIDTH_PER_ROW / NUM_BITS_PER_BYTE;
-pub const RATE: usize = NUM_WORDS_TO_ABSORB * NUM_BYTES_PER_WORD;
-pub const RATE_IN_BITS: usize = RATE * NUM_BITS_PER_BYTE;
-// pub(crate) const THETA_C_WIDTH: usize = 5 * NUM_BITS_PER_WORD;
-pub(crate) const RHO_MATRIX: [[usize; 5]; 5] = [
-    [0, 36, 3, 41, 18],
-    [1, 44, 10, 45, 2],
-    [62, 6, 43, 15, 61],
-    [28, 55, 25, 21, 56],
-    [27, 20, 39, 8, 14],
-];
-pub(crate) const ROUND_CST: [u64; NUM_ROUNDS + 1] = [
-    0x0000000000000001,
-    0x0000000000008082,
-    0x800000000000808a,
-    0x8000000080008000,
-    0x000000000000808b,
-    0x0000000080000001,
-    0x8000000080008081,
-    0x8000000000008009,
-    0x000000000000008a,
-    0x0000000000000088,
-    0x0000000080008009,
-    0x000000008000000a,
-    0x000000008000808b,
-    0x800000000000008b,
-    0x8000000000008089,
-    0x8000000000008003,
-    0x8000000000008002,
-    0x8000000000000080,
-    0x000000000000800a,
-    0x800000008000000a,
-    0x8000000080008081,
-    0x8000000000008080,
-    0x0000000080000001,
-    0x8000000080008008,
-    0x0000000000000000, // absorb round
-];
-// Bit positions that have a non-zero value in `IOTA_ROUND_CST`.
-// pub(crate) const ROUND_CST_BIT_POS: [usize; 7] = [0, 1, 3, 7, 15, 31, 63];
-
-// The number of bits used in the sparse word representation per bit
-pub const BIT_COUNT: usize = 3;
-// The base of the bit in the sparse word representation
-pub const BIT_SIZE: usize = 2usize.pow(BIT_COUNT as u32);
-
-// `a ^ ((~b) & c)` is calculated by doing `lookup[3 - 2*a + b - c]`
-pub(crate) const CHI_BASE_LOOKUP_TABLE: [u8; 5] = [0, 1, 1, 0, 0];
-// `a ^ ((~b) & c) ^ d` is calculated by doing `lookup[5 - 2*a - b + c - 2*d]`
-// pub(crate) const CHI_EXT_LOOKUP_TABLE: [u8; 7] = [0, 0, 1, 1, 0, 0, 1];
-
 /// Description of which bits (positions) a part contains
 #[derive(Clone, Debug)]
 pub struct PartInfo {
@@ -89,19 +27,66 @@ pub struct WordParts {
     pub parts: Vec<PartInfo>,
 }
 
-/// Packs bits into bytes
-pub mod to_bytes {
-    pub(crate) fn value(bits: &[u8]) -> Vec<u8> {
-        debug_assert!(bits.len() % 8 == 0, "bits not a multiple of 8");
-        let mut bytes = Vec::new();
-        for byte_bits in bits.chunks(8) {
-            let mut value = 0u8;
-            for (idx, bit) in byte_bits.iter().enumerate() {
-                value += *bit << idx;
+impl WordParts {
+    /// Returns a description of how a word will be split into parts
+    pub fn new(part_size: usize, rot: usize, normalize: bool) -> Self {
+        let mut bits = (0usize..64).collect::<Vec<_>>();
+        bits.rotate_right(rot);
+
+        let mut parts = Vec::new();
+        let mut rot_idx = 0;
+
+        let mut idx = 0;
+        let target_sizes = if normalize {
+            // After the rotation we want the parts of all the words to be at the same
+            // positions
+            target_part_sizes(part_size)
+        } else {
+            // Here we only care about minimizing the number of parts
+            let num_parts_a = rot / part_size;
+            let partial_part_a = rot % part_size;
+
+            let num_parts_b = (64 - rot) / part_size;
+            let partial_part_b = (64 - rot) % part_size;
+
+            let mut part_sizes = vec![part_size; num_parts_a];
+            if partial_part_a > 0 {
+                part_sizes.push(partial_part_a);
+            }
+
+            part_sizes.extend(vec![part_size; num_parts_b]);
+            if partial_part_b > 0 {
+                part_sizes.push(partial_part_b);
+            }
+
+            part_sizes
+        };
+        // Split into parts bit by bit
+        for part_size in target_sizes {
+            let mut num_consumed = 0;
+            while num_consumed < part_size {
+                let mut part_bits: Vec<usize> = Vec::new();
+                while num_consumed < part_size {
+                    if !part_bits.is_empty() && bits[idx] == 0 {
+                        break;
+                    }
+                    if bits[idx] == 0 {
+                        rot_idx = parts.len();
+                    }
+                    part_bits.push(bits[idx]);
+                    idx += 1;
+                    num_consumed += 1;
+                }
+                parts.push(PartInfo { bits: part_bits });
             }
-            bytes.push(value);
         }
-        bytes
+
+        debug_assert_eq!(get_rotate_count(rot, part_size), rot_idx);
+
+        parts.rotate_left(rot_idx);
+        debug_assert_eq!(parts[0].bits[0], 0);
+
+        Self { parts }
     }
 }
 
@@ -126,16 +111,6 @@ pub fn rotate_left(bits: &[u8], count: usize) -> [u8; NUM_BITS_PER_WORD] {
     rotated.try_into().unwrap()
 }
 
-/// Scatters a value into a packed word constant
-pub mod scatter {
-    use super::{eth_types::Field, pack};
-    use crate::halo2_proofs::plonk::Expression;
-
-    pub(crate) fn expr<F: Field>(value: u8, count: usize) -> Expression<F> {
-        Expression::Constant(pack(&vec![value; count]))
-    }
-}
-
 /// The words that absorb data
 pub fn get_absorb_positions() -> Vec<(usize, usize)> {
     let mut absorb_positions = Vec::new();
@@ -223,69 +198,6 @@ pub fn get_rotate_count(count: usize, part_size: usize) -> usize {
     (count + part_size - 1) / part_size
 }
 
-impl WordParts {
-    /// Returns a description of how a word will be split into parts
-    pub fn new(part_size: usize, rot: usize, normalize: bool) -> Self {
-        let mut bits = (0usize..64).collect::<Vec<_>>();
-        bits.rotate_right(rot);
-
-        let mut parts = Vec::new();
-        let mut rot_idx = 0;
-
-        let mut idx = 0;
-        let target_sizes = if normalize {
-            // After the rotation we want the parts of all the words to be at the same
-            // positions
-            target_part_sizes(part_size)
-        } else {
-            // Here we only care about minimizing the number of parts
-            let num_parts_a = rot / part_size;
-            let partial_part_a = rot % part_size;
-
-            let num_parts_b = (64 - rot) / part_size;
-            let partial_part_b = (64 - rot) % part_size;
-
-            let mut part_sizes = vec![part_size; num_parts_a];
-            if partial_part_a > 0 {
-                part_sizes.push(partial_part_a);
-            }
-
-            part_sizes.extend(vec![part_size; num_parts_b]);
-            if partial_part_b > 0 {
-                part_sizes.push(partial_part_b);
-            }
-
-            part_sizes
-        };
-        // Split into parts bit by bit
-        for part_size in target_sizes {
-            let mut num_consumed = 0;
-            while num_consumed < part_size {
-                let mut part_bits: Vec<usize> = Vec::new();
-                while num_consumed < part_size {
-                    if !part_bits.is_empty() && bits[idx] == 0 {
-                        break;
-                    }
-                    if bits[idx] == 0 {
-                        rot_idx = parts.len();
-                    }
-                    part_bits.push(bits[idx]);
-                    idx += 1;
-                    num_consumed += 1;
-                }
-                parts.push(PartInfo { bits: part_bits });
-            }
-        }
-
-        debug_assert_eq!(get_rotate_count(rot, part_size), rot_idx);
-
-        parts.rotate_left(rot_idx);
-        debug_assert_eq!(parts[0].bits[0], 0);
-
-        Self { parts }
-    }
-}
-
 /// Get the degree of the circuit from the KECCAK_DEGREE env variable
 pub fn get_degree() -> usize {
     var("KECCAK_DEGREE")
@@ -306,107 +218,28 @@ pub fn get_num_bits_per_lookup(range: usize) -> usize {
     num_bits as usize
 }
 
-/// Loads a normalization table with the given parameters
-pub(crate) fn load_normalize_table<F: Field>(
-    layouter: &mut impl Layouter<F>,
-    name: &str,
-    tables: &[TableColumn; 2],
-    range: u64,
-) -> Result<(), Error> {
-    let part_size = get_num_bits_per_lookup(range as usize);
-    layouter.assign_table(
-        || format!("{name} table"),
-        |mut table| {
-            for (offset, perm) in
-                (0..part_size).map(|_| 0u64..range).multi_cartesian_product().enumerate()
-            {
-                let mut input = 0u64;
-                let mut output = 0u64;
-                let mut factor = 1u64;
-                for input_part in perm.iter() {
-                    input += input_part * factor;
-                    output += (input_part & 1) * factor;
-                    factor *= BIT_SIZE as u64;
-                }
-                table.assign_cell(
-                    || format!("{name} input"),
-                    tables[0],
-                    offset,
-                    || Value::known(F::from(input)),
-                )?;
-                table.assign_cell(
-                    || format!("{name} output"),
-                    tables[1],
-                    offset,
-                    || Value::known(F::from(output)),
-                )?;
-            }
-            Ok(())
-        },
-    )
-}
+/// Scatters a value into a packed word constant
+pub mod scatter {
+    use super::{eth_types::Field, pack};
+    use crate::halo2_proofs::plonk::Expression;
 
-/// Loads the byte packing table
-pub(crate) fn load_pack_table<F: Field>(
-    layouter: &mut impl Layouter<F>,
-    tables: &[TableColumn; 2],
-) -> Result<(), Error> {
-    layouter.assign_table(
-        || "pack table",
-        |mut table| {
-            for (offset, idx) in (0u64..256).enumerate() {
-                table.assign_cell(
-                    || "unpacked",
-                    tables[0],
-                    offset,
-                    || Value::known(F::from(idx)),
-                )?;
-                let packed: F = pack(&into_bits(&[idx as u8]));
-                table.assign_cell(|| "packed", tables[1], offset, || Value::known(packed))?;
-            }
-            Ok(())
-        },
-    )
+    pub(crate) fn expr<F: Field>(value: u8, count: usize) -> Expression<F> {
+        Expression::Constant(pack(&vec![value; count]))
+    }
 }
 
-/// Loads a lookup table
-pub(crate) fn load_lookup_table<F: Field>(
-    layouter: &mut impl Layouter<F>,
-    name: &str,
-    tables: &[TableColumn; 2],
-    part_size: usize,
-    lookup_table: &[u8],
-) -> Result<(), Error> {
-    layouter.assign_table(
-        || format!("{name} table"),
-        |mut table| {
-            for (offset, perm) in (0..part_size)
-                .map(|_| 0..lookup_table.len() as u64)
-                .multi_cartesian_product()
-                .enumerate()
-            {
-                let mut input = 0u64;
-                let mut output = 0u64;
-                let mut factor = 1u64;
-                for input_part in perm.iter() {
-                    input += input_part * factor;
-                    output += (lookup_table[*input_part as usize] as u64) * factor;
-                    factor *= BIT_SIZE as u64;
-                }
-                table.assign_cell(
-                    || format!("{name} input"),
-                    tables[0],
-                    offset,
-                    || Value::known(F::from(input)),
-                )?;
-                table.assign_cell(
-                    || format!("{name} output"),
-                    tables[1],
-                    offset,
-                    || Value::known(F::from(output)),
-                )?;
+/// Packs bits into bytes
+pub mod to_bytes {
+    pub(crate) fn value(bits: &[u8]) -> Vec<u8> {
+        debug_assert!(bits.len() % 8 == 0, "bits not a multiple of 8");
+        let mut bytes = Vec::new();
+        for byte_bits in bits.chunks(8) {
+            let mut value = 0u8;
+            for (idx, bit) in byte_bits.iter().enumerate() {
+                value += *bit << idx;
             }
-            Ok(())
-        },
-    )
+            bytes.push(value);
+        }
+        bytes
+    }
 }
diff --git a/hashes/zkevm-keccak/src/util/constraint_builder.rs b/hashes/zkevm-keccak/src/keccak_circuit/util/constraint_builder.rs
similarity index 100%
rename from hashes/zkevm-keccak/src/util/constraint_builder.rs
rename to hashes/zkevm-keccak/src/keccak_circuit/util/constraint_builder.rs
diff --git a/hashes/zkevm-keccak/src/util/eth_types.rs b/hashes/zkevm-keccak/src/keccak_circuit/util/eth_types.rs
similarity index 100%
rename from hashes/zkevm-keccak/src/util/eth_types.rs
rename to hashes/zkevm-keccak/src/keccak_circuit/util/eth_types.rs
diff --git a/hashes/zkevm-keccak/src/util/expression.rs b/hashes/zkevm-keccak/src/keccak_circuit/util/expression.rs
similarity index 100%
rename from hashes/zkevm-keccak/src/util/expression.rs
rename to hashes/zkevm-keccak/src/keccak_circuit/util/expression.rs
diff --git a/hashes/zkevm-keccak/src/keccak_packed_multi.rs b/hashes/zkevm-keccak/src/keccak_packed_multi.rs
deleted file mode 100644
index 3edc2e1a..00000000
--- a/hashes/zkevm-keccak/src/keccak_packed_multi.rs
+++ /dev/null
@@ -1,2039 +0,0 @@
-use super::util::{
-    constraint_builder::BaseConstraintBuilder,
-    eth_types::Field,
-    expression::{and, not, select, Expr},
-    field_xor, get_absorb_positions, get_num_bits_per_lookup, into_bits, load_lookup_table,
-    load_normalize_table, load_pack_table, pack, pack_u64, pack_with_base, rotate, scatter,
-    target_part_sizes, to_bytes, unpack, CHI_BASE_LOOKUP_TABLE, NUM_BYTES_PER_WORD, NUM_ROUNDS,
-    NUM_WORDS_TO_ABSORB, NUM_WORDS_TO_SQUEEZE, RATE, RATE_IN_BITS, RHO_MATRIX, ROUND_CST,
-};
-use crate::halo2_proofs::{
-    arithmetic::FieldExt,
-    circuit::{Layouter, Region, Value},
-    plonk::{
-        Advice, Challenge, Column, ConstraintSystem, Error, Expression, Fixed, SecondPhase,
-        TableColumn, VirtualCells,
-    },
-    poly::Rotation,
-};
-use halo2_base::halo2_proofs::{circuit::AssignedCell, plonk::Assigned};
-use itertools::Itertools;
-use log::{debug, info};
-use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
-use std::env::var;
-use std::marker::PhantomData;
-
-#[cfg(test)]
-mod tests;
-
-const MAX_DEGREE: usize = 3;
-const ABSORB_LOOKUP_RANGE: usize = 3;
-const THETA_C_LOOKUP_RANGE: usize = 6;
-const RHO_PI_LOOKUP_RANGE: usize = 4;
-const CHI_BASE_LOOKUP_RANGE: usize = 5;
-
-pub fn get_num_rows_per_round() -> usize {
-    var("KECCAK_ROWS")
-        .unwrap_or_else(|_| "25".to_string())
-        .parse()
-        .expect("Cannot parse KECCAK_ROWS env var as usize")
-}
-
-fn get_num_bits_per_absorb_lookup() -> usize {
-    get_num_bits_per_lookup(ABSORB_LOOKUP_RANGE)
-}
-
-fn get_num_bits_per_theta_c_lookup() -> usize {
-    get_num_bits_per_lookup(THETA_C_LOOKUP_RANGE)
-}
-
-fn get_num_bits_per_rho_pi_lookup() -> usize {
-    get_num_bits_per_lookup(CHI_BASE_LOOKUP_RANGE.max(RHO_PI_LOOKUP_RANGE))
-}
-
-fn get_num_bits_per_base_chi_lookup() -> usize {
-    get_num_bits_per_lookup(CHI_BASE_LOOKUP_RANGE.max(RHO_PI_LOOKUP_RANGE))
-}
-
-/// The number of keccak_f's that can be done in this circuit
-///
-/// `num_rows` should be number of usable rows without blinding factors
-pub fn get_keccak_capacity(num_rows: usize) -> usize {
-    // - 1 because we have a dummy round at the very beginning of multi_keccak
-    // - NUM_WORDS_TO_ABSORB because `absorb_data_next` and `absorb_result_next` query `NUM_WORDS_TO_ABSORB * get_num_rows_per_round()` beyond any row where `q_absorb == 1`
-    (num_rows / get_num_rows_per_round() - 1 - NUM_WORDS_TO_ABSORB) / (NUM_ROUNDS + 1)
-}
-
-pub fn get_num_keccak_f(byte_length: usize) -> usize {
-    // ceil( (byte_length + 1) / RATE )
-    byte_length / RATE + 1
-}
-
-/// AbsorbData
-#[derive(Clone, Default, Debug, PartialEq)]
-pub(crate) struct AbsorbData<F: FieldExt> {
-    from: F,
-    absorb: F,
-    result: F,
-}
-
-/// SqueezeData
-#[derive(Clone, Default, Debug, PartialEq)]
-pub(crate) struct SqueezeData<F: FieldExt> {
-    packed: F,
-}
-
-/// KeccakRow
-#[derive(Clone, Debug)]
-pub struct KeccakRow<F: FieldExt> {
-    q_enable: bool,
-    // q_enable_row: bool,
-    q_round: bool,
-    q_absorb: bool,
-    q_round_last: bool,
-    q_padding: bool,
-    q_padding_last: bool,
-    round_cst: F,
-    is_final: bool,
-    cell_values: Vec<F>,
-    // We have no need for length as RLC equality checks length implicitly
-    // length: usize,
-    // SecondPhase values will be assigned separately
-    // data_rlc: Value<F>,
-    // hash_rlc: Value<F>,
-}
-
-impl<F: FieldExt> KeccakRow<F> {
-    pub fn dummy_rows(num_rows: usize) -> Vec<Self> {
-        (0..num_rows)
-            .map(|idx| KeccakRow {
-                q_enable: idx == 0,
-                // q_enable_row: true,
-                q_round: false,
-                q_absorb: idx == 0,
-                q_round_last: false,
-                q_padding: false,
-                q_padding_last: false,
-                round_cst: F::zero(),
-                is_final: false,
-                cell_values: Vec::new(),
-            })
-            .collect()
-    }
-}
-
-/// Part
-#[derive(Clone, Debug)]
-pub(crate) struct Part<F: FieldExt> {
-    cell: Cell<F>,
-    expr: Expression<F>,
-    num_bits: usize,
-}
-
-/// Part Value
-#[derive(Clone, Copy, Debug)]
-pub(crate) struct PartValue<F: FieldExt> {
-    value: F,
-    rot: i32,
-    num_bits: usize,
-}
-
-#[derive(Clone, Debug)]
-pub(crate) struct KeccakRegion<F> {
-    pub(crate) rows: Vec<Vec<F>>,
-}
-
-impl<F: FieldExt> KeccakRegion<F> {
-    pub(crate) fn new() -> Self {
-        Self { rows: Vec::new() }
-    }
-
-    pub(crate) fn assign(&mut self, column: usize, offset: usize, value: F) {
-        while offset >= self.rows.len() {
-            self.rows.push(Vec::new());
-        }
-        let row = &mut self.rows[offset];
-        while column >= row.len() {
-            row.push(F::zero());
-        }
-        row[column] = value;
-    }
-}
-
-#[derive(Clone, Debug)]
-pub(crate) struct Cell<F> {
-    expression: Expression<F>,
-    column_expression: Expression<F>,
-    column: Option<Column<Advice>>,
-    column_idx: usize,
-    rotation: i32,
-}
-
-impl<F: FieldExt> Cell<F> {
-    pub(crate) fn new(
-        meta: &mut VirtualCells<F>,
-        column: Column<Advice>,
-        column_idx: usize,
-        rotation: i32,
-    ) -> Self {
-        Self {
-            expression: meta.query_advice(column, Rotation(rotation)),
-            column_expression: meta.query_advice(column, Rotation::cur()),
-            column: Some(column),
-            column_idx,
-            rotation,
-        }
-    }
-
-    pub(crate) fn new_value(column_idx: usize, rotation: i32) -> Self {
-        Self {
-            expression: 0.expr(),
-            column_expression: 0.expr(),
-            column: None,
-            column_idx,
-            rotation,
-        }
-    }
-
-    pub(crate) fn at_offset(&self, meta: &mut ConstraintSystem<F>, offset: i32) -> Self {
-        let mut expression = 0.expr();
-        meta.create_gate("Query cell", |meta| {
-            expression = meta.query_advice(self.column.unwrap(), Rotation(self.rotation + offset));
-            vec![0.expr()]
-        });
-
-        Self {
-            expression,
-            column_expression: self.column_expression.clone(),
-            column: self.column,
-            column_idx: self.column_idx,
-            rotation: self.rotation + offset,
-        }
-    }
-
-    pub(crate) fn assign(&self, region: &mut KeccakRegion<F>, offset: i32, value: F) {
-        region.assign(self.column_idx, (offset + self.rotation) as usize, value);
-    }
-}
-
-impl<F: FieldExt> Expr<F> for Cell<F> {
-    fn expr(&self) -> Expression<F> {
-        self.expression.clone()
-    }
-}
-
-impl<F: FieldExt> Expr<F> for &Cell<F> {
-    fn expr(&self) -> Expression<F> {
-        self.expression.clone()
-    }
-}
-
-/// CellColumn
-#[derive(Clone, Debug)]
-pub(crate) struct CellColumn<F> {
-    advice: Column<Advice>,
-    expr: Expression<F>,
-}
-
-/// CellManager
-#[derive(Clone, Debug)]
-pub(crate) struct CellManager<F> {
-    height: usize,
-    width: usize,
-    current_row: usize,
-    columns: Vec<CellColumn<F>>,
-    // rows[i] gives the number of columns already used in row `i`
-    rows: Vec<usize>,
-    num_unused_cells: usize,
-}
-
-impl<F: FieldExt> CellManager<F> {
-    pub(crate) fn new(height: usize) -> Self {
-        Self {
-            height,
-            width: 0,
-            current_row: 0,
-            columns: Vec::new(),
-            rows: vec![0; height],
-            num_unused_cells: 0,
-        }
-    }
-
-    pub(crate) fn query_cell(&mut self, meta: &mut ConstraintSystem<F>) -> Cell<F> {
-        let (row_idx, column_idx) = self.get_position();
-        self.query_cell_at_pos(meta, row_idx as i32, column_idx)
-    }
-
-    pub(crate) fn query_cell_at_row(
-        &mut self,
-        meta: &mut ConstraintSystem<F>,
-        row_idx: i32,
-    ) -> Cell<F> {
-        let column_idx = self.rows[row_idx as usize];
-        self.rows[row_idx as usize] += 1;
-        self.width = self.width.max(column_idx + 1);
-        self.current_row = (row_idx as usize + 1) % self.height;
-        self.query_cell_at_pos(meta, row_idx, column_idx)
-    }
-
-    pub(crate) fn query_cell_at_pos(
-        &mut self,
-        meta: &mut ConstraintSystem<F>,
-        row_idx: i32,
-        column_idx: usize,
-    ) -> Cell<F> {
-        let column = if column_idx < self.columns.len() {
-            self.columns[column_idx].advice
-        } else {
-            let advice = meta.advice_column();
-            let mut expr = 0.expr();
-            meta.create_gate("Query column", |meta| {
-                expr = meta.query_advice(advice, Rotation::cur());
-                vec![0.expr()]
-            });
-            self.columns.push(CellColumn { advice, expr });
-            advice
-        };
-
-        let mut cells = Vec::new();
-        meta.create_gate("Query cell", |meta| {
-            cells.push(Cell::new(meta, column, column_idx, row_idx));
-            vec![0.expr()]
-        });
-        cells[0].clone()
-    }
-
-    pub(crate) fn query_cell_value(&mut self) -> Cell<F> {
-        let (row_idx, column_idx) = self.get_position();
-        self.query_cell_value_at_pos(row_idx as i32, column_idx)
-    }
-
-    pub(crate) fn query_cell_value_at_row(&mut self, row_idx: i32) -> Cell<F> {
-        let column_idx = self.rows[row_idx as usize];
-        self.rows[row_idx as usize] += 1;
-        self.width = self.width.max(column_idx + 1);
-        self.current_row = (row_idx as usize + 1) % self.height;
-        self.query_cell_value_at_pos(row_idx, column_idx)
-    }
-
-    pub(crate) fn query_cell_value_at_pos(&mut self, row_idx: i32, column_idx: usize) -> Cell<F> {
-        Cell::new_value(column_idx, row_idx)
-    }
-
-    fn get_position(&mut self) -> (usize, usize) {
-        let best_row_idx = self.current_row;
-        let best_row_pos = self.rows[best_row_idx];
-        self.rows[best_row_idx] += 1;
-        self.width = self.width.max(best_row_pos + 1);
-        self.current_row = (best_row_idx + 1) % self.height;
-        (best_row_idx, best_row_pos)
-    }
-
-    pub(crate) fn get_width(&self) -> usize {
-        self.width
-    }
-
-    pub(crate) fn start_region(&mut self) -> usize {
-        // Make sure all rows start at the same column
-        let width = self.get_width();
-        #[cfg(debug_assertions)]
-        for row in self.rows.iter_mut() {
-            self.num_unused_cells += width - *row;
-        }
-        self.rows = vec![width; self.height];
-        width
-    }
-
-    pub(crate) fn columns(&self) -> &[CellColumn<F>] {
-        &self.columns
-    }
-
-    pub(crate) fn get_num_unused_cells(&self) -> usize {
-        self.num_unused_cells
-    }
-}
-
-/// Keccak Table, used to verify keccak hashing from RLC'ed input.
-#[derive(Clone, Debug)]
-pub struct KeccakTable {
-    /// True when the row is enabled
-    pub is_enabled: Column<Advice>,
-    /// Byte array input as `RLC(reversed(input))`
-    pub input_rlc: Column<Advice>, // RLC of input bytes
-    // Byte array input length
-    // pub input_len: Column<Advice>,
-    /// RLC of the hash result
-    pub output_rlc: Column<Advice>, // RLC of hash of input bytes
-}
-
-impl KeccakTable {
-    /// Construct a new KeccakTable
-    pub fn construct<F: Field>(meta: &mut ConstraintSystem<F>) -> Self {
-        let input_rlc = meta.advice_column_in(SecondPhase);
-        let output_rlc = meta.advice_column_in(SecondPhase);
-        meta.enable_equality(input_rlc);
-        meta.enable_equality(output_rlc);
-        Self {
-            is_enabled: meta.advice_column(),
-            input_rlc,
-            // input_len: meta.advice_column(),
-            output_rlc,
-        }
-    }
-}
-
-#[cfg(feature = "halo2-axiom")]
-type KeccakAssignedValue<'v, F> = AssignedCell<&'v Assigned<F>, F>;
-#[cfg(not(feature = "halo2-axiom"))]
-type KeccakAssignedValue<'v, F> = AssignedCell<F, F>;
-
-pub fn assign_advice_custom<'v, F: Field>(
-    region: &mut Region<F>,
-    column: Column<Advice>,
-    offset: usize,
-    value: Value<F>,
-) -> KeccakAssignedValue<'v, F> {
-    #[cfg(feature = "halo2-axiom")]
-    {
-        region.assign_advice(column, offset, value)
-    }
-    #[cfg(feature = "halo2-pse")]
-    {
-        region
-            .assign_advice(|| format!("assign advice {}", offset), column, offset, || value)
-            .unwrap()
-    }
-}
-
-pub fn assign_fixed_custom<F: Field>(
-    region: &mut Region<F>,
-    column: Column<Fixed>,
-    offset: usize,
-    value: F,
-) {
-    #[cfg(feature = "halo2-axiom")]
-    {
-        region.assign_fixed(column, offset, value);
-    }
-    #[cfg(feature = "halo2-pse")]
-    {
-        region
-            .assign_fixed(
-                || format!("assign fixed {}", offset),
-                column,
-                offset,
-                || Value::known(value),
-            )
-            .unwrap();
-    }
-}
-
-/// Recombines parts back together
-mod decode {
-    use super::{Expr, FieldExt, Part, PartValue};
-    use crate::halo2_proofs::plonk::Expression;
-    use crate::util::BIT_COUNT;
-
-    pub(crate) fn expr<F: FieldExt>(parts: Vec<Part<F>>) -> Expression<F> {
-        parts.iter().rev().fold(0.expr(), |acc, part| {
-            acc * F::from(1u64 << (BIT_COUNT * part.num_bits)) + part.expr.clone()
-        })
-    }
-
-    pub(crate) fn value<F: FieldExt>(parts: Vec<PartValue<F>>) -> F {
-        parts.iter().rev().fold(F::zero(), |acc, part| {
-            acc * F::from(1u64 << (BIT_COUNT * part.num_bits)) + part.value
-        })
-    }
-}
-
-/// Splits a word into parts
-mod split {
-    use super::{
-        decode, BaseConstraintBuilder, CellManager, Expr, Field, FieldExt, KeccakRegion, Part,
-        PartValue,
-    };
-    use crate::halo2_proofs::plonk::{ConstraintSystem, Expression};
-    use crate::util::{pack, pack_part, unpack, WordParts};
-
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn expr<F: FieldExt>(
-        meta: &mut ConstraintSystem<F>,
-        cell_manager: &mut CellManager<F>,
-        cb: &mut BaseConstraintBuilder<F>,
-        input: Expression<F>,
-        rot: usize,
-        target_part_size: usize,
-        normalize: bool,
-        row: Option<usize>,
-    ) -> Vec<Part<F>> {
-        let word = WordParts::new(target_part_size, rot, normalize);
-        let mut parts = Vec::with_capacity(word.parts.len());
-        for word_part in word.parts {
-            let cell = if let Some(row) = row {
-                cell_manager.query_cell_at_row(meta, row as i32)
-            } else {
-                cell_manager.query_cell(meta)
-            };
-            parts.push(Part {
-                num_bits: word_part.bits.len(),
-                cell: cell.clone(),
-                expr: cell.expr(),
-            });
-        }
-        // Input parts need to equal original input expression
-        cb.require_equal("split", decode::expr(parts.clone()), input);
-        parts
-    }
-
-    pub(crate) fn value<F: Field>(
-        cell_manager: &mut CellManager<F>,
-        region: &mut KeccakRegion<F>,
-        input: F,
-        rot: usize,
-        target_part_size: usize,
-        normalize: bool,
-        row: Option<usize>,
-    ) -> Vec<PartValue<F>> {
-        let input_bits = unpack(input);
-        debug_assert_eq!(pack::<F>(&input_bits), input);
-        let word = WordParts::new(target_part_size, rot, normalize);
-        let mut parts = Vec::with_capacity(word.parts.len());
-        for word_part in word.parts {
-            let value = pack_part(&input_bits, &word_part);
-            let cell = if let Some(row) = row {
-                cell_manager.query_cell_value_at_row(row as i32)
-            } else {
-                cell_manager.query_cell_value()
-            };
-            cell.assign(region, 0, F::from(value));
-            parts.push(PartValue {
-                num_bits: word_part.bits.len(),
-                rot: cell.rotation,
-                value: F::from(value),
-            });
-        }
-        debug_assert_eq!(decode::value(parts.clone()), input);
-        parts
-    }
-}
-
-// Split into parts, but storing the parts in a specific way to have the same
-// table layout in `output_cells` regardless of rotation.
-mod split_uniform {
-    use super::{
-        decode, target_part_sizes, BaseConstraintBuilder, Cell, CellManager, Expr, FieldExt,
-        KeccakRegion, Part, PartValue,
-    };
-    use crate::halo2_proofs::plonk::{ConstraintSystem, Expression};
-    use crate::util::{
-        eth_types::Field, pack, pack_part, rotate, rotate_rev, unpack, WordParts, BIT_SIZE,
-    };
-
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn expr<F: FieldExt>(
-        meta: &mut ConstraintSystem<F>,
-        output_cells: &[Cell<F>],
-        cell_manager: &mut CellManager<F>,
-        cb: &mut BaseConstraintBuilder<F>,
-        input: Expression<F>,
-        rot: usize,
-        target_part_size: usize,
-        normalize: bool,
-    ) -> Vec<Part<F>> {
-        let mut input_parts = Vec::new();
-        let mut output_parts = Vec::new();
-        let word = WordParts::new(target_part_size, rot, normalize);
-
-        let word = rotate(word.parts, rot, target_part_size);
-
-        let target_sizes = target_part_sizes(target_part_size);
-        let mut word_iter = word.iter();
-        let mut counter = 0;
-        while let Some(word_part) = word_iter.next() {
-            if word_part.bits.len() == target_sizes[counter] {
-                // Input and output part are the same
-                let part = Part {
-                    num_bits: target_sizes[counter],
-                    cell: output_cells[counter].clone(),
-                    expr: output_cells[counter].expr(),
-                };
-                input_parts.push(part.clone());
-                output_parts.push(part);
-                counter += 1;
-            } else if let Some(extra_part) = word_iter.next() {
-                // The two parts combined need to have the expected combined length
-                debug_assert_eq!(
-                    word_part.bits.len() + extra_part.bits.len(),
-                    target_sizes[counter]
-                );
-
-                // Needs two cells here to store the parts
-                // These still need to be range checked elsewhere!
-                let part_a = cell_manager.query_cell(meta);
-                let part_b = cell_manager.query_cell(meta);
-
-                // Make sure the parts combined equal the value in the uniform output
-                let expr = part_a.expr()
-                    + part_b.expr()
-                        * F::from((BIT_SIZE as u32).pow(word_part.bits.len() as u32) as u64);
-                cb.require_equal("rot part", expr, output_cells[counter].expr());
-
-                // Input needs the two parts because it needs to be able to undo the rotation
-                input_parts.push(Part {
-                    num_bits: word_part.bits.len(),
-                    cell: part_a.clone(),
-                    expr: part_a.expr(),
-                });
-                input_parts.push(Part {
-                    num_bits: extra_part.bits.len(),
-                    cell: part_b.clone(),
-                    expr: part_b.expr(),
-                });
-                // Output only has the combined cell
-                output_parts.push(Part {
-                    num_bits: target_sizes[counter],
-                    cell: output_cells[counter].clone(),
-                    expr: output_cells[counter].expr(),
-                });
-                counter += 1;
-            } else {
-                unreachable!();
-            }
-        }
-        let input_parts = rotate_rev(input_parts, rot, target_part_size);
-        // Input parts need to equal original input expression
-        cb.require_equal("split", decode::expr(input_parts), input);
-        // Uniform output
-        output_parts
-    }
-
-    pub(crate) fn value<F: Field>(
-        output_cells: &[Cell<F>],
-        cell_manager: &mut CellManager<F>,
-        region: &mut KeccakRegion<F>,
-        input: F,
-        rot: usize,
-        target_part_size: usize,
-        normalize: bool,
-    ) -> Vec<PartValue<F>> {
-        let input_bits = unpack(input);
-        debug_assert_eq!(pack::<F>(&input_bits), input);
-
-        let mut input_parts = Vec::new();
-        let mut output_parts = Vec::new();
-        let word = WordParts::new(target_part_size, rot, normalize);
-
-        let word = rotate(word.parts, rot, target_part_size);
-
-        let target_sizes = target_part_sizes(target_part_size);
-        let mut word_iter = word.iter();
-        let mut counter = 0;
-        while let Some(word_part) = word_iter.next() {
-            if word_part.bits.len() == target_sizes[counter] {
-                let value = pack_part(&input_bits, word_part);
-                output_cells[counter].assign(region, 0, F::from(value));
-                input_parts.push(PartValue {
-                    num_bits: word_part.bits.len(),
-                    rot: output_cells[counter].rotation,
-                    value: F::from(value),
-                });
-                output_parts.push(PartValue {
-                    num_bits: word_part.bits.len(),
-                    rot: output_cells[counter].rotation,
-                    value: F::from(value),
-                });
-                counter += 1;
-            } else if let Some(extra_part) = word_iter.next() {
-                debug_assert_eq!(
-                    word_part.bits.len() + extra_part.bits.len(),
-                    target_sizes[counter]
-                );
-
-                let part_a = cell_manager.query_cell_value();
-                let part_b = cell_manager.query_cell_value();
-
-                let value_a = pack_part(&input_bits, word_part);
-                let value_b = pack_part(&input_bits, extra_part);
-
-                part_a.assign(region, 0, F::from(value_a));
-                part_b.assign(region, 0, F::from(value_b));
-
-                let value = value_a + value_b * (BIT_SIZE as u64).pow(word_part.bits.len() as u32);
-
-                output_cells[counter].assign(region, 0, F::from(value));
-
-                input_parts.push(PartValue {
-                    num_bits: word_part.bits.len(),
-                    value: F::from(value_a),
-                    rot: part_a.rotation,
-                });
-                input_parts.push(PartValue {
-                    num_bits: extra_part.bits.len(),
-                    value: F::from(value_b),
-                    rot: part_b.rotation,
-                });
-                output_parts.push(PartValue {
-                    num_bits: target_sizes[counter],
-                    value: F::from(value),
-                    rot: output_cells[counter].rotation,
-                });
-                counter += 1;
-            } else {
-                unreachable!();
-            }
-        }
-        let input_parts = rotate_rev(input_parts, rot, target_part_size);
-        debug_assert_eq!(decode::value(input_parts), input);
-        output_parts
-    }
-}
-
-// Transform values using a lookup table
-mod transform {
-    use super::{transform_to, CellManager, Field, FieldExt, KeccakRegion, Part, PartValue};
-    use crate::halo2_proofs::plonk::{ConstraintSystem, TableColumn};
-    use itertools::Itertools;
-
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn expr<F: FieldExt>(
-        name: &'static str,
-        meta: &mut ConstraintSystem<F>,
-        cell_manager: &mut CellManager<F>,
-        lookup_counter: &mut usize,
-        input: Vec<Part<F>>,
-        transform_table: [TableColumn; 2],
-        uniform_lookup: bool,
-    ) -> Vec<Part<F>> {
-        let cells = input
-            .iter()
-            .map(|input_part| {
-                if uniform_lookup {
-                    cell_manager.query_cell_at_row(meta, input_part.cell.rotation)
-                } else {
-                    cell_manager.query_cell(meta)
-                }
-            })
-            .collect_vec();
-        transform_to::expr(
-            name,
-            meta,
-            &cells,
-            lookup_counter,
-            input,
-            transform_table,
-            uniform_lookup,
-        )
-    }
-
-    pub(crate) fn value<F: Field>(
-        cell_manager: &mut CellManager<F>,
-        region: &mut KeccakRegion<F>,
-        input: Vec<PartValue<F>>,
-        do_packing: bool,
-        f: fn(&u8) -> u8,
-        uniform_lookup: bool,
-    ) -> Vec<PartValue<F>> {
-        let cells = input
-            .iter()
-            .map(|input_part| {
-                if uniform_lookup {
-                    cell_manager.query_cell_value_at_row(input_part.rot)
-                } else {
-                    cell_manager.query_cell_value()
-                }
-            })
-            .collect_vec();
-        transform_to::value(&cells, region, input, do_packing, f)
-    }
-}
-
-// Transfroms values to cells
-mod transform_to {
-    use super::{Cell, Expr, Field, FieldExt, KeccakRegion, Part, PartValue};
-    use crate::halo2_proofs::plonk::{ConstraintSystem, TableColumn};
-    use crate::util::{pack, to_bytes, unpack};
-
-    #[allow(clippy::too_many_arguments)]
-    pub(crate) fn expr<F: FieldExt>(
-        name: &'static str,
-        meta: &mut ConstraintSystem<F>,
-        cells: &[Cell<F>],
-        lookup_counter: &mut usize,
-        input: Vec<Part<F>>,
-        transform_table: [TableColumn; 2],
-        uniform_lookup: bool,
-    ) -> Vec<Part<F>> {
-        let mut output = Vec::with_capacity(input.len());
-        for (idx, input_part) in input.iter().enumerate() {
-            let output_part = cells[idx].clone();
-            if !uniform_lookup || input_part.cell.rotation == 0 {
-                meta.lookup(name, |_| {
-                    vec![
-                        (input_part.expr.clone(), transform_table[0]),
-                        (output_part.expr(), transform_table[1]),
-                    ]
-                });
-                *lookup_counter += 1;
-            }
-            output.push(Part {
-                num_bits: input_part.num_bits,
-                cell: output_part.clone(),
-                expr: output_part.expr(),
-            });
-        }
-        output
-    }
-
-    pub(crate) fn value<F: Field>(
-        cells: &[Cell<F>],
-        region: &mut KeccakRegion<F>,
-        input: Vec<PartValue<F>>,
-        do_packing: bool,
-        f: fn(&u8) -> u8,
-    ) -> Vec<PartValue<F>> {
-        let mut output = Vec::new();
-        for (idx, input_part) in input.iter().enumerate() {
-            let input_bits = &unpack(input_part.value)[0..input_part.num_bits];
-            let output_bits = input_bits.iter().map(f).collect::<Vec<_>>();
-            let value = if do_packing {
-                pack(&output_bits)
-            } else {
-                F::from(to_bytes::value(&output_bits)[0] as u64)
-            };
-            let output_part = cells[idx].clone();
-            output_part.assign(region, 0, value);
-            output.push(PartValue {
-                num_bits: input_part.num_bits,
-                rot: output_part.rotation,
-                value,
-            });
-        }
-        output
-    }
-}
-
-/// KeccakConfig
-#[derive(Clone, Debug)]
-pub struct KeccakCircuitConfig<F> {
-    challenge: Challenge,
-    q_enable: Column<Fixed>,
-    // q_enable_row: Column<Fixed>,
-    q_first: Column<Fixed>,
-    q_round: Column<Fixed>,
-    q_absorb: Column<Fixed>,
-    q_round_last: Column<Fixed>,
-    q_padding: Column<Fixed>,
-    q_padding_last: Column<Fixed>,
-
-    pub keccak_table: KeccakTable,
-
-    cell_manager: CellManager<F>,
-    round_cst: Column<Fixed>,
-    normalize_3: [TableColumn; 2],
-    normalize_4: [TableColumn; 2],
-    normalize_6: [TableColumn; 2],
-    chi_base_table: [TableColumn; 2],
-    pack_table: [TableColumn; 2],
-    _marker: PhantomData<F>,
-}
-
-impl<F: Field> KeccakCircuitConfig<F> {
-    pub fn challenge(&self) -> Challenge {
-        self.challenge
-    }
-    /// Return a new KeccakCircuitConfig
-    pub fn new(meta: &mut ConstraintSystem<F>, challenge: Challenge) -> Self {
-        let q_enable = meta.fixed_column();
-        // let q_enable_row = meta.fixed_column();
-        let q_first = meta.fixed_column();
-        let q_round = meta.fixed_column();
-        let q_absorb = meta.fixed_column();
-        let q_round_last = meta.fixed_column();
-        let q_padding = meta.fixed_column();
-        let q_padding_last = meta.fixed_column();
-        let round_cst = meta.fixed_column();
-        let keccak_table = KeccakTable::construct(meta);
-
-        let is_final = keccak_table.is_enabled;
-        // let length = keccak_table.input_len;
-        let data_rlc = keccak_table.input_rlc;
-        let hash_rlc = keccak_table.output_rlc;
-
-        let normalize_3 = array_init::array_init(|_| meta.lookup_table_column());
-        let normalize_4 = array_init::array_init(|_| meta.lookup_table_column());
-        let normalize_6 = array_init::array_init(|_| meta.lookup_table_column());
-        let chi_base_table = array_init::array_init(|_| meta.lookup_table_column());
-        let pack_table = array_init::array_init(|_| meta.lookup_table_column());
-
-        let num_rows_per_round = get_num_rows_per_round();
-        let mut cell_manager = CellManager::new(get_num_rows_per_round());
-        let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-        let mut total_lookup_counter = 0;
-
-        let start_new_hash = |meta: &mut VirtualCells<F>, rot| {
-            // A new hash is started when the previous hash is done or on the first row
-            meta.query_fixed(q_first, rot) + meta.query_advice(is_final, rot)
-        };
-
-        // Round constant
-        let mut round_cst_expr = 0.expr();
-        meta.create_gate("Query round cst", |meta| {
-            round_cst_expr = meta.query_fixed(round_cst, Rotation::cur());
-            vec![0u64.expr()]
-        });
-        // State data
-        let mut s = vec![vec![0u64.expr(); 5]; 5];
-        let mut s_next = vec![vec![0u64.expr(); 5]; 5];
-        for i in 0..5 {
-            for j in 0..5 {
-                let cell = cell_manager.query_cell(meta);
-                s[i][j] = cell.expr();
-                s_next[i][j] = cell.at_offset(meta, num_rows_per_round as i32).expr();
-            }
-        }
-        // Absorb data
-        let absorb_from = cell_manager.query_cell(meta);
-        let absorb_data = cell_manager.query_cell(meta);
-        let absorb_result = cell_manager.query_cell(meta);
-        let mut absorb_from_next = vec![0u64.expr(); NUM_WORDS_TO_ABSORB];
-        let mut absorb_data_next = vec![0u64.expr(); NUM_WORDS_TO_ABSORB];
-        let mut absorb_result_next = vec![0u64.expr(); NUM_WORDS_TO_ABSORB];
-        for i in 0..NUM_WORDS_TO_ABSORB {
-            let rot = ((i + 1) * num_rows_per_round) as i32;
-            absorb_from_next[i] = absorb_from.at_offset(meta, rot).expr();
-            absorb_data_next[i] = absorb_data.at_offset(meta, rot).expr();
-            absorb_result_next[i] = absorb_result.at_offset(meta, rot).expr();
-        }
-
-        // Store the pre-state
-        let pre_s = s.clone();
-
-        // Absorb
-        // The absorption happening at the start of the 24 rounds is done spread out
-        // over those 24 rounds. In a single round (in 17 of the 24 rounds) a
-        // single word is absorbed so the work is spread out. The absorption is
-        // done simply by doing state + data and then normalizing the result to [0,1].
-        // We also need to convert the input data into bytes to calculate the input data
-        // rlc.
-        cell_manager.start_region();
-        let mut lookup_counter = 0;
-        let part_size = get_num_bits_per_absorb_lookup();
-        let input = absorb_from.expr() + absorb_data.expr();
-        let absorb_fat =
-            split::expr(meta, &mut cell_manager, &mut cb, input, 0, part_size, false, None);
-        cell_manager.start_region();
-        let absorb_res = transform::expr(
-            "absorb",
-            meta,
-            &mut cell_manager,
-            &mut lookup_counter,
-            absorb_fat,
-            normalize_3,
-            true,
-        );
-        cb.require_equal("absorb result", decode::expr(absorb_res), absorb_result.expr());
-        info!("- Post absorb:");
-        info!("Lookups: {}", lookup_counter);
-        info!("Columns: {}", cell_manager.get_width());
-        total_lookup_counter += lookup_counter;
-
-        // Squeeze
-        // The squeezing happening at the end of the 24 rounds is done spread out
-        // over those 24 rounds. In a single round (in 4 of the 24 rounds) a
-        // single word is converted to bytes.
-        cell_manager.start_region();
-        let mut lookup_counter = 0;
-        // Potential optimization: could do multiple bytes per lookup
-        let packed_parts =
-            split::expr(meta, &mut cell_manager, &mut cb, absorb_data.expr(), 0, 8, false, None);
-        cell_manager.start_region();
-        // input_bytes.len() = packed_parts.len() = 64 / 8 = 8 = NUM_BYTES_PER_WORD
-        let input_bytes = transform::expr(
-            "squeeze unpack",
-            meta,
-            &mut cell_manager,
-            &mut lookup_counter,
-            packed_parts,
-            pack_table.into_iter().rev().collect::<Vec<_>>().try_into().unwrap(),
-            true,
-        );
-        debug_assert_eq!(input_bytes.len(), NUM_BYTES_PER_WORD);
-
-        // Padding data
-        cell_manager.start_region();
-        let is_paddings = input_bytes.iter().map(|_| cell_manager.query_cell(meta)).collect_vec();
-        info!("- Post padding:");
-        info!("Lookups: {}", lookup_counter);
-        info!("Columns: {}", cell_manager.get_width());
-        total_lookup_counter += lookup_counter;
-
-        // Theta
-        // Calculate
-        // - `c[i] = s[i][0] + s[i][1] + s[i][2] + s[i][3] + s[i][4]`
-        // - `bc[i] = normalize(c)`.
-        // - `t[i] = bc[(i + 4) % 5] + rot(bc[(i + 1)% 5], 1)`
-        // This is done by splitting the bc values in parts in a way
-        // that allows us to also calculate the rotated value "for free".
-        cell_manager.start_region();
-        let mut lookup_counter = 0;
-        let part_size_c = get_num_bits_per_theta_c_lookup();
-        let mut c_parts = Vec::new();
-        for s in s.iter() {
-            // Calculate c and split into parts
-            let c = s[0].clone() + s[1].clone() + s[2].clone() + s[3].clone() + s[4].clone();
-            c_parts.push(split::expr(
-                meta,
-                &mut cell_manager,
-                &mut cb,
-                c,
-                1,
-                part_size_c,
-                false,
-                None,
-            ));
-        }
-        // Now calculate `bc` by normalizing `c`
-        cell_manager.start_region();
-        let mut bc = Vec::new();
-        for c in c_parts {
-            // Normalize c
-            bc.push(transform::expr(
-                "theta c",
-                meta,
-                &mut cell_manager,
-                &mut lookup_counter,
-                c,
-                normalize_6,
-                true,
-            ));
-        }
-        // Now do `bc[(i + 4) % 5] + rot(bc[(i + 1) % 5], 1)` using just expressions.
-        // We don't normalize the result here. We do it as part of the rho/pi step, even
-        // though we would only have to normalize 5 values instead of 25, because of the
-        // way the rho/pi and chi steps can be combined it's more efficient to
-        // do it there (the max value for chi is 4 already so that's the
-        // limiting factor).
-        let mut os = vec![vec![0u64.expr(); 5]; 5];
-        for i in 0..5 {
-            let t = decode::expr(bc[(i + 4) % 5].clone())
-                + decode::expr(rotate(bc[(i + 1) % 5].clone(), 1, part_size_c));
-            for j in 0..5 {
-                os[i][j] = s[i][j].clone() + t.clone();
-            }
-        }
-        s = os.clone();
-        info!("- Post theta:");
-        info!("Lookups: {}", lookup_counter);
-        info!("Columns: {}", cell_manager.get_width());
-        total_lookup_counter += lookup_counter;
-
-        // Rho/Pi
-        // For the rotation of rho/pi we split up the words like expected, but in a way
-        // that allows reusing the same parts in an optimal way for the chi step.
-        // We can save quite a few columns by not recombining the parts after rho/pi and
-        // re-splitting the words again before chi. Instead we do chi directly
-        // on the output parts of rho/pi. For rho/pi specically we do
-        // `s[j][2 * i + 3 * j) % 5] = normalize(rot(s[i][j], RHOM[i][j]))`.
-        cell_manager.start_region();
-        let mut lookup_counter = 0;
-        let part_size = get_num_bits_per_base_chi_lookup();
-        // To combine the rho/pi/chi steps we have to ensure a specific layout so
-        // query those cells here first.
-        // For chi we have to do `s[i][j] ^ ((~s[(i+1)%5][j]) & s[(i+2)%5][j])`. `j`
-        // remains static but `i` is accessed in a wrap around manner. To do this using
-        // multiple rows with lookups in a way that doesn't require any
-        // extra additional cells or selectors we have to put all `s[i]`'s on the same
-        // row. This isn't that strong of a requirement actually because we the
-        // words are split into multipe parts, and so only the parts at the same
-        // position of those words need to be on the same row.
-        let target_word_sizes = target_part_sizes(part_size);
-        let num_word_parts = target_word_sizes.len();
-        let mut rho_pi_chi_cells: [[[Vec<Cell<F>>; 5]; 5]; 3] = array_init::array_init(|_| {
-            array_init::array_init(|_| array_init::array_init(|_| Vec::new()))
-        });
-        let mut num_columns = 0;
-        let mut column_starts = [0usize; 3];
-        for p in 0..3 {
-            column_starts[p] = cell_manager.start_region();
-            let mut row_idx = 0;
-            num_columns = 0;
-            for j in 0..5 {
-                for _ in 0..num_word_parts {
-                    for i in 0..5 {
-                        rho_pi_chi_cells[p][i][j]
-                            .push(cell_manager.query_cell_at_row(meta, row_idx));
-                    }
-                    if row_idx == 0 {
-                        num_columns += 1;
-                    }
-                    row_idx = (((row_idx as usize) + 1) % num_rows_per_round) as i32;
-                }
-            }
-        }
-        // Do the transformation, resulting in the word parts also being normalized.
-        let pi_region_start = cell_manager.start_region();
-        let mut os_parts = vec![vec![Vec::new(); 5]; 5];
-        for (j, os_part) in os_parts.iter_mut().enumerate() {
-            for i in 0..5 {
-                // Split s into parts
-                let s_parts = split_uniform::expr(
-                    meta,
-                    &rho_pi_chi_cells[0][j][(2 * i + 3 * j) % 5],
-                    &mut cell_manager,
-                    &mut cb,
-                    s[i][j].clone(),
-                    RHO_MATRIX[i][j],
-                    part_size,
-                    true,
-                );
-                // Normalize the data to the target cells
-                let s_parts = transform_to::expr(
-                    "rho/pi",
-                    meta,
-                    &rho_pi_chi_cells[1][j][(2 * i + 3 * j) % 5],
-                    &mut lookup_counter,
-                    s_parts.clone(),
-                    normalize_4,
-                    true,
-                );
-                os_part[(2 * i + 3 * j) % 5] = s_parts.clone();
-            }
-        }
-        let pi_region_end = cell_manager.start_region();
-        // Pi parts range checks
-        // To make the uniform stuff work we had to combine some parts together
-        // in new cells (see split_uniform). Here we make sure those parts are range
-        // checked. Potential improvement: Could combine multiple smaller parts
-        // in a single lookup but doesn't save that much.
-        for c in pi_region_start..pi_region_end {
-            meta.lookup("pi part range check", |_| {
-                vec![(cell_manager.columns()[c].expr.clone(), normalize_4[0])]
-            });
-            lookup_counter += 1;
-        }
-        info!("- Post rho/pi:");
-        info!("Lookups: {}", lookup_counter);
-        info!("Columns: {}", cell_manager.get_width());
-        total_lookup_counter += lookup_counter;
-
-        // Chi
-        // In groups of 5 columns, we have to do `s[i][j] ^ ((~s[(i+1)%5][j]) &
-        // s[(i+2)%5][j])` five times, on each row (no selector needed).
-        // This is calculated by making use of `CHI_BASE_LOOKUP_TABLE`.
-        let mut lookup_counter = 0;
-        let part_size_base = get_num_bits_per_base_chi_lookup();
-        for idx in 0..num_columns {
-            // First fetch the cells we wan to use
-            let mut input: [Expression<F>; 5] = array_init::array_init(|_| 0.expr());
-            let mut output: [Expression<F>; 5] = array_init::array_init(|_| 0.expr());
-            for c in 0..5 {
-                input[c] = cell_manager.columns()[column_starts[1] + idx * 5 + c].expr.clone();
-                output[c] = cell_manager.columns()[column_starts[2] + idx * 5 + c].expr.clone();
-            }
-            // Now calculate `a ^ ((~b) & c)` by doing `lookup[3 - 2*a + b - c]`
-            for i in 0..5 {
-                let input = scatter::expr(3, part_size_base) - 2.expr() * input[i].clone()
-                    + input[(i + 1) % 5].clone()
-                    - input[(i + 2) % 5].clone().clone();
-                let output = output[i].clone();
-                meta.lookup("chi base", |_| {
-                    vec![(input.clone(), chi_base_table[0]), (output.clone(), chi_base_table[1])]
-                });
-                lookup_counter += 1;
-            }
-        }
-        // Now just decode the parts after the chi transformation done with the lookups
-        // above.
-        let mut os = vec![vec![0u64.expr(); 5]; 5];
-        for (i, os) in os.iter_mut().enumerate() {
-            for (j, os) in os.iter_mut().enumerate() {
-                let mut parts = Vec::new();
-                for idx in 0..num_word_parts {
-                    parts.push(Part {
-                        num_bits: part_size_base,
-                        cell: rho_pi_chi_cells[2][i][j][idx].clone(),
-                        expr: rho_pi_chi_cells[2][i][j][idx].expr(),
-                    });
-                }
-                *os = decode::expr(parts);
-            }
-        }
-        s = os.clone();
-
-        // iota
-        // Simply do the single xor on state [0][0].
-        cell_manager.start_region();
-        let part_size = get_num_bits_per_absorb_lookup();
-        let input = s[0][0].clone() + round_cst_expr.clone();
-        let iota_parts =
-            split::expr(meta, &mut cell_manager, &mut cb, input, 0, part_size, false, None);
-        cell_manager.start_region();
-        // Could share columns with absorb which may end up using 1 lookup/column
-        // fewer...
-        s[0][0] = decode::expr(transform::expr(
-            "iota",
-            meta,
-            &mut cell_manager,
-            &mut lookup_counter,
-            iota_parts,
-            normalize_3,
-            true,
-        ));
-        // Final results stored in the next row
-        for i in 0..5 {
-            for j in 0..5 {
-                cb.require_equal("next row check", s[i][j].clone(), s_next[i][j].clone());
-            }
-        }
-        info!("- Post chi:");
-        info!("Lookups: {}", lookup_counter);
-        info!("Columns: {}", cell_manager.get_width());
-        total_lookup_counter += lookup_counter;
-
-        let mut lookup_counter = 0;
-        cell_manager.start_region();
-
-        // Squeeze data
-        let squeeze_from = cell_manager.query_cell(meta);
-        let mut squeeze_from_prev = vec![0u64.expr(); NUM_WORDS_TO_SQUEEZE];
-        for (idx, squeeze_from_prev) in squeeze_from_prev.iter_mut().enumerate() {
-            let rot = (-(idx as i32) - 1) * num_rows_per_round as i32;
-            *squeeze_from_prev = squeeze_from.at_offset(meta, rot).expr();
-        }
-        // Squeeze
-        // The squeeze happening at the end of the 24 rounds is done spread out
-        // over those 24 rounds. In a single round (in 4 of the 24 rounds) a
-        // single word is converted to bytes.
-        // Potential optimization: could do multiple bytes per lookup
-        cell_manager.start_region();
-        // Unpack a single word into bytes (for the squeeze)
-        // Potential optimization: could do multiple bytes per lookup
-        let squeeze_from_parts =
-            split::expr(meta, &mut cell_manager, &mut cb, squeeze_from.expr(), 0, 8, false, None);
-        cell_manager.start_region();
-        let squeeze_bytes = transform::expr(
-            "squeeze unpack",
-            meta,
-            &mut cell_manager,
-            &mut lookup_counter,
-            squeeze_from_parts,
-            pack_table.into_iter().rev().collect::<Vec<_>>().try_into().unwrap(),
-            true,
-        );
-        info!("- Post squeeze:");
-        info!("Lookups: {}", lookup_counter);
-        info!("Columns: {}", cell_manager.get_width());
-        total_lookup_counter += lookup_counter;
-
-        // The round constraints that we've been building up till now
-        meta.create_gate("round", |meta| cb.gate(meta.query_fixed(q_round, Rotation::cur())));
-
-        // Absorb
-        meta.create_gate("absorb", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-            let continue_hash = not::expr(start_new_hash(meta, Rotation::cur()));
-            let absorb_positions = get_absorb_positions();
-            let mut a_slice = 0;
-            for j in 0..5 {
-                for i in 0..5 {
-                    if absorb_positions.contains(&(i, j)) {
-                        cb.condition(continue_hash.clone(), |cb| {
-                            cb.require_equal(
-                                "absorb verify input",
-                                absorb_from_next[a_slice].clone(),
-                                pre_s[i][j].clone(),
-                            );
-                        });
-                        cb.require_equal(
-                            "absorb result copy",
-                            select::expr(
-                                continue_hash.clone(),
-                                absorb_result_next[a_slice].clone(),
-                                absorb_data_next[a_slice].clone(),
-                            ),
-                            s_next[i][j].clone(),
-                        );
-                        a_slice += 1;
-                    } else {
-                        cb.require_equal(
-                            "absorb state copy",
-                            pre_s[i][j].clone() * continue_hash.clone(),
-                            s_next[i][j].clone(),
-                        );
-                    }
-                }
-            }
-            cb.gate(meta.query_fixed(q_absorb, Rotation::cur()))
-        });
-
-        // Collect the bytes that are spread out over previous rows
-        let mut hash_bytes = Vec::new();
-        for i in 0..NUM_WORDS_TO_SQUEEZE {
-            for byte in squeeze_bytes.iter() {
-                let rot = (-(i as i32) - 1) * num_rows_per_round as i32;
-                hash_bytes.push(byte.cell.at_offset(meta, rot).expr());
-            }
-        }
-
-        // Squeeze
-        meta.create_gate("squeeze", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-            let start_new_hash = start_new_hash(meta, Rotation::cur());
-            // The words to squeeze
-            let hash_words: Vec<_> =
-                pre_s.into_iter().take(4).map(|a| a[0].clone()).take(4).collect();
-            // Verify if we converted the correct words to bytes on previous rows
-            for (idx, word) in hash_words.iter().enumerate() {
-                cb.condition(start_new_hash.clone(), |cb| {
-                    cb.require_equal(
-                        "squeeze verify packed",
-                        word.clone(),
-                        squeeze_from_prev[idx].clone(),
-                    );
-                });
-            }
-
-            let challenge_expr = meta.query_challenge(challenge);
-            let rlc =
-                hash_bytes.into_iter().reduce(|rlc, x| rlc * challenge_expr.clone() + x).unwrap();
-            cb.require_equal("hash rlc check", rlc, meta.query_advice(hash_rlc, Rotation::cur()));
-            cb.gate(meta.query_fixed(q_round_last, Rotation::cur()))
-        });
-
-        // Some general input checks
-        meta.create_gate("input checks", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-            cb.require_boolean("boolean is_final", meta.query_advice(is_final, Rotation::cur()));
-            cb.gate(meta.query_fixed(q_enable, Rotation::cur()))
-        });
-
-        // Enforce fixed values on the first row
-        meta.create_gate("first row", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-            cb.require_zero(
-                "is_final needs to be disabled on the first row",
-                meta.query_advice(is_final, Rotation::cur()),
-            );
-            cb.gate(meta.query_fixed(q_first, Rotation::cur()))
-        });
-
-        // Enforce logic for when this block is the last block for a hash
-        let last_is_padding_in_block = is_paddings.last().unwrap().at_offset(
-            meta,
-            -(((NUM_ROUNDS + 1 - NUM_WORDS_TO_ABSORB) * num_rows_per_round) as i32),
-        );
-        meta.create_gate("is final", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-            // All absorb rows except the first row
-            cb.condition(
-                meta.query_fixed(q_absorb, Rotation::cur())
-                    - meta.query_fixed(q_first, Rotation::cur()),
-                |cb| {
-                    cb.require_equal(
-                        "is_final needs to be the same as the last is_padding in the block",
-                        meta.query_advice(is_final, Rotation::cur()),
-                        last_is_padding_in_block.expr(),
-                    );
-                },
-            );
-            // For all the rows of a round, only the first row can have `is_final == 1`.
-            cb.condition(
-                (1..num_rows_per_round as i32)
-                    .map(|i| meta.query_fixed(q_enable, Rotation(-i)))
-                    .fold(0.expr(), |acc, elem| acc + elem),
-                |cb| {
-                    cb.require_zero(
-                        "is_final only when q_enable",
-                        meta.query_advice(is_final, Rotation::cur()),
-                    );
-                },
-            );
-            cb.gate(1.expr())
-        });
-
-        // Padding
-        // May be cleaner to do this padding logic in the byte conversion lookup but
-        // currently easier to do it like this.
-        let prev_is_padding =
-            is_paddings.last().unwrap().at_offset(meta, -(num_rows_per_round as i32));
-        meta.create_gate("padding", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-            let q_padding = meta.query_fixed(q_padding, Rotation::cur());
-            let q_padding_last = meta.query_fixed(q_padding_last, Rotation::cur());
-
-            // All padding selectors need to be boolean
-            for is_padding in is_paddings.iter() {
-                cb.condition(meta.query_fixed(q_enable, Rotation::cur()), |cb| {
-                    cb.require_boolean("is_padding boolean", is_padding.expr());
-                });
-            }
-            // This last padding selector will be used on the first round row so needs to be
-            // zero
-            cb.condition(meta.query_fixed(q_absorb, Rotation::cur()), |cb| {
-                cb.require_zero(
-                    "last is_padding should be zero on absorb rows",
-                    is_paddings.last().unwrap().expr(),
-                );
-            });
-            // Now for each padding selector
-            for idx in 0..is_paddings.len() {
-                // Previous padding selector can be on the previous row
-                let is_padding_prev =
-                    if idx == 0 { prev_is_padding.expr() } else { is_paddings[idx - 1].expr() };
-                let is_first_padding = is_paddings[idx].expr() - is_padding_prev.clone();
-
-                // Check padding transition 0 -> 1 done only once
-                cb.condition(q_padding.expr(), |cb| {
-                    cb.require_boolean("padding step boolean", is_first_padding.clone());
-                });
-
-                // Padding start/intermediate/end byte checks
-                if idx == is_paddings.len() - 1 {
-                    // These can be combined in the future, but currently this would increase the
-                    // degree by one Padding start/intermediate byte, all
-                    // padding rows except the last one
-                    cb.condition(
-                        and::expr([
-                            q_padding.expr() - q_padding_last.expr(),
-                            is_paddings[idx].expr(),
-                        ]),
-                        |cb| {
-                            // Input bytes need to be zero, or one if this is the first padding byte
-                            cb.require_equal(
-                                "padding start/intermediate byte last byte",
-                                input_bytes[idx].expr.clone(),
-                                is_first_padding.expr(),
-                            );
-                        },
-                    );
-                    // Padding start/end byte, only on the last padding row
-                    cb.condition(
-                        and::expr([q_padding_last.expr(), is_paddings[idx].expr()]),
-                        |cb| {
-                            // The input byte needs to be 128, unless it's also the first padding
-                            // byte then it's 129
-                            cb.require_equal(
-                                "padding start/end byte",
-                                input_bytes[idx].expr.clone(),
-                                is_first_padding.expr() + 128.expr(),
-                            );
-                        },
-                    );
-                } else {
-                    // Padding start/intermediate byte
-                    cb.condition(and::expr([q_padding.expr(), is_paddings[idx].expr()]), |cb| {
-                        // Input bytes need to be zero, or one if this is the first padding byte
-                        cb.require_equal(
-                            "padding start/intermediate byte",
-                            input_bytes[idx].expr.clone(),
-                            is_first_padding.expr(),
-                        );
-                    });
-                }
-            }
-            cb.gate(1.expr())
-        });
-
-        assert!(num_rows_per_round > NUM_BYTES_PER_WORD, "We require enough rows per round to hold the running RLC of the bytes from the one keccak word absorbed per round");
-        // TODO: there is probably a way to only require NUM_BYTES_PER_WORD instead of
-        // NUM_BYTES_PER_WORD + 1 rows per round, but for simplicity and to keep the
-        // gate degree at 3, we just do the obvious thing for now Input data rlc
-        meta.create_gate("data rlc", |meta| {
-            let mut cb = BaseConstraintBuilder::new(MAX_DEGREE);
-
-            let q_padding = meta.query_fixed(q_padding, Rotation::cur());
-            let start_new_hash_prev = start_new_hash(meta, Rotation(-(num_rows_per_round as i32)));
-            let data_rlc_prev = meta.query_advice(data_rlc, Rotation(-(num_rows_per_round as i32)));
-
-            // Update the length/data_rlc on rows where we absorb data
-            cb.condition(q_padding.expr(), |cb| {
-                let challenge_expr = meta.query_challenge(challenge);
-                // Use intermediate cells to keep the degree low
-                let mut new_data_rlc =
-                    data_rlc_prev.clone() * not::expr(start_new_hash_prev.expr());
-                let mut data_rlcs = (0..NUM_BYTES_PER_WORD)
-                    .map(|i| meta.query_advice(data_rlc, Rotation(i as i32 + 1)));
-                let intermed_rlc = data_rlcs.next().unwrap();
-                cb.require_equal("initial data rlc", intermed_rlc.clone(), new_data_rlc);
-                new_data_rlc = intermed_rlc;
-                for (byte, is_padding) in input_bytes.iter().zip(is_paddings.iter()) {
-                    new_data_rlc = select::expr(
-                        is_padding.expr(),
-                        new_data_rlc.clone(),
-                        new_data_rlc * challenge_expr.clone() + byte.expr.clone(),
-                    );
-                    if let Some(intermed_rlc) = data_rlcs.next() {
-                        cb.require_equal(
-                            "intermediate data rlc",
-                            intermed_rlc.clone(),
-                            new_data_rlc,
-                        );
-                        new_data_rlc = intermed_rlc;
-                    }
-                }
-                cb.require_equal(
-                    "update data rlc",
-                    meta.query_advice(data_rlc, Rotation::cur()),
-                    new_data_rlc,
-                );
-            });
-            // Keep length/data_rlc the same on rows where we don't absorb data
-            cb.condition(
-                and::expr([
-                    meta.query_fixed(q_enable, Rotation::cur())
-                        - meta.query_fixed(q_first, Rotation::cur()),
-                    not::expr(q_padding),
-                ]),
-                |cb| {
-                    cb.require_equal(
-                        "data_rlc equality check",
-                        meta.query_advice(data_rlc, Rotation::cur()),
-                        data_rlc_prev.clone(),
-                    );
-                },
-            );
-            cb.gate(1.expr())
-        });
-
-        info!("Degree: {}", meta.degree());
-        info!("Minimum rows: {}", meta.minimum_rows());
-        info!("Total Lookups: {}", total_lookup_counter);
-        #[cfg(feature = "display")]
-        {
-            println!("Total Keccak Columns: {}", cell_manager.get_width());
-            std::env::set_var("KECCAK_ADVICE_COLUMNS", cell_manager.get_width().to_string());
-        }
-        #[cfg(not(feature = "display"))]
-        info!("Total Keccak Columns: {}", cell_manager.get_width());
-        info!("num unused cells: {}", cell_manager.get_num_unused_cells());
-        info!("part_size absorb: {}", get_num_bits_per_absorb_lookup());
-        info!("part_size theta: {}", get_num_bits_per_theta_c_lookup());
-        info!("part_size theta c: {}", get_num_bits_per_lookup(THETA_C_LOOKUP_RANGE));
-        info!("part_size theta t: {}", get_num_bits_per_lookup(4));
-        info!("part_size rho/pi: {}", get_num_bits_per_rho_pi_lookup());
-        info!("part_size chi base: {}", get_num_bits_per_base_chi_lookup());
-        info!("uniform part sizes: {:?}", target_part_sizes(get_num_bits_per_theta_c_lookup()));
-
-        KeccakCircuitConfig {
-            challenge,
-            q_enable,
-            // q_enable_row,
-            q_first,
-            q_round,
-            q_absorb,
-            q_round_last,
-            q_padding,
-            q_padding_last,
-            keccak_table,
-            cell_manager,
-            round_cst,
-            normalize_3,
-            normalize_4,
-            normalize_6,
-            chi_base_table,
-            pack_table,
-            _marker: PhantomData,
-        }
-    }
-}
-
-impl<F: Field> KeccakCircuitConfig<F> {
-    pub fn assign(&self, region: &mut Region<'_, F>, witness: &[KeccakRow<F>]) {
-        for (offset, keccak_row) in witness.iter().enumerate() {
-            self.set_row(region, offset, keccak_row);
-        }
-    }
-
-    pub fn set_row(&self, region: &mut Region<'_, F>, offset: usize, row: &KeccakRow<F>) {
-        // Fixed selectors
-        for (_, column, value) in &[
-            ("q_enable", self.q_enable, F::from(row.q_enable)),
-            ("q_first", self.q_first, F::from(offset == 0)),
-            ("q_round", self.q_round, F::from(row.q_round)),
-            ("q_round_last", self.q_round_last, F::from(row.q_round_last)),
-            ("q_absorb", self.q_absorb, F::from(row.q_absorb)),
-            ("q_padding", self.q_padding, F::from(row.q_padding)),
-            ("q_padding_last", self.q_padding_last, F::from(row.q_padding_last)),
-        ] {
-            assign_fixed_custom(region, *column, offset, *value);
-        }
-
-        assign_advice_custom(
-            region,
-            self.keccak_table.is_enabled,
-            offset,
-            Value::known(F::from(row.is_final)),
-        );
-
-        // Cell values
-        row.cell_values.iter().zip(self.cell_manager.columns()).for_each(|(bit, column)| {
-            assign_advice_custom(region, column.advice, offset, Value::known(*bit));
-        });
-
-        // Round constant
-        assign_fixed_custom(region, self.round_cst, offset, row.round_cst);
-    }
-
-    pub fn load_aux_tables(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
-        load_normalize_table(layouter, "normalize_6", &self.normalize_6, 6u64)?;
-        load_normalize_table(layouter, "normalize_4", &self.normalize_4, 4u64)?;
-        load_normalize_table(layouter, "normalize_3", &self.normalize_3, 3u64)?;
-        load_lookup_table(
-            layouter,
-            "chi base",
-            &self.chi_base_table,
-            get_num_bits_per_base_chi_lookup(),
-            &CHI_BASE_LOOKUP_TABLE,
-        )?;
-        load_pack_table(layouter, &self.pack_table)
-    }
-}
-
-/// Computes and assigns the input RLC values (but not the output RLC values:
-/// see `multi_keccak_phase1`).
-pub fn keccak_phase1<'v, F: Field>(
-    region: &mut Region<F>,
-    keccak_table: &KeccakTable,
-    bytes: &[u8],
-    challenge: Value<F>,
-    input_rlcs: &mut Vec<KeccakAssignedValue<'v, F>>,
-    offset: &mut usize,
-) {
-    let num_chunks = get_num_keccak_f(bytes.len());
-    let num_rows_per_round = get_num_rows_per_round();
-
-    let mut byte_idx = 0;
-    let mut data_rlc = Value::known(F::zero());
-
-    for _ in 0..num_chunks {
-        for round in 0..NUM_ROUNDS + 1 {
-            if round < NUM_WORDS_TO_ABSORB {
-                for idx in 0..NUM_BYTES_PER_WORD {
-                    assign_advice_custom(
-                        region,
-                        keccak_table.input_rlc,
-                        *offset + idx + 1,
-                        data_rlc,
-                    );
-                    if byte_idx < bytes.len() {
-                        data_rlc =
-                            data_rlc * challenge + Value::known(F::from(bytes[byte_idx] as u64));
-                    }
-                    byte_idx += 1;
-                }
-            }
-            let input_rlc = assign_advice_custom(region, keccak_table.input_rlc, *offset, data_rlc);
-            if round == NUM_ROUNDS {
-                input_rlcs.push(input_rlc);
-            }
-
-            *offset += num_rows_per_round;
-        }
-    }
-}
-
-/// Witness generation in `FirstPhase` for a keccak hash digest without
-/// computing RLCs, which are deferred to `SecondPhase`.
-pub fn keccak_phase0<F: Field>(
-    rows: &mut Vec<KeccakRow<F>>,
-    squeeze_digests: &mut Vec<[F; NUM_WORDS_TO_SQUEEZE]>,
-    bytes: &[u8],
-) {
-    let mut bits = into_bits(bytes);
-    let mut s = [[F::zero(); 5]; 5];
-    let absorb_positions = get_absorb_positions();
-    let num_bytes_in_last_block = bytes.len() % RATE;
-    let num_rows_per_round = get_num_rows_per_round();
-    let two = F::from(2u64);
-
-    // Padding
-    bits.push(1);
-    while (bits.len() + 1) % RATE_IN_BITS != 0 {
-        bits.push(0);
-    }
-    bits.push(1);
-
-    let chunks = bits.chunks(RATE_IN_BITS);
-    let num_chunks = chunks.len();
-
-    let mut cell_managers = Vec::with_capacity(NUM_ROUNDS + 1);
-    let mut regions = Vec::with_capacity(NUM_ROUNDS + 1);
-    let mut hash_words = [F::zero(); NUM_WORDS_TO_SQUEEZE];
-
-    for (idx, chunk) in chunks.enumerate() {
-        let is_final_block = idx == num_chunks - 1;
-
-        let mut absorb_rows = Vec::new();
-        // Absorb
-        for (idx, &(i, j)) in absorb_positions.iter().enumerate() {
-            let absorb = pack(&chunk[idx * 64..(idx + 1) * 64]);
-            let from = s[i][j];
-            s[i][j] = field_xor(s[i][j], absorb);
-            absorb_rows.push(AbsorbData { from, absorb, result: s[i][j] });
-        }
-
-        // better memory management to clear already allocated Vecs
-        cell_managers.clear();
-        regions.clear();
-
-        for round in 0..NUM_ROUNDS + 1 {
-            let mut cell_manager = CellManager::new(num_rows_per_round);
-            let mut region = KeccakRegion::new();
-
-            let mut absorb_row = AbsorbData::default();
-            if round < NUM_WORDS_TO_ABSORB {
-                absorb_row = absorb_rows[round].clone();
-            }
-
-            // State data
-            for s in &s {
-                for s in s {
-                    let cell = cell_manager.query_cell_value();
-                    cell.assign(&mut region, 0, *s);
-                }
-            }
-
-            // Absorb data
-            let absorb_from = cell_manager.query_cell_value();
-            let absorb_data = cell_manager.query_cell_value();
-            let absorb_result = cell_manager.query_cell_value();
-            absorb_from.assign(&mut region, 0, absorb_row.from);
-            absorb_data.assign(&mut region, 0, absorb_row.absorb);
-            absorb_result.assign(&mut region, 0, absorb_row.result);
-
-            // Absorb
-            cell_manager.start_region();
-            let part_size = get_num_bits_per_absorb_lookup();
-            let input = absorb_row.from + absorb_row.absorb;
-            let absorb_fat =
-                split::value(&mut cell_manager, &mut region, input, 0, part_size, false, None);
-            cell_manager.start_region();
-            let _absorb_result = transform::value(
-                &mut cell_manager,
-                &mut region,
-                absorb_fat.clone(),
-                true,
-                |v| v & 1,
-                true,
-            );
-
-            // Padding
-            cell_manager.start_region();
-            // Unpack a single word into bytes (for the absorption)
-            // Potential optimization: could do multiple bytes per lookup
-            let packed =
-                split::value(&mut cell_manager, &mut region, absorb_row.absorb, 0, 8, false, None);
-            cell_manager.start_region();
-            let input_bytes =
-                transform::value(&mut cell_manager, &mut region, packed, false, |v| *v, true);
-            cell_manager.start_region();
-            let is_paddings =
-                input_bytes.iter().map(|_| cell_manager.query_cell_value()).collect::<Vec<_>>();
-            debug_assert_eq!(is_paddings.len(), NUM_BYTES_PER_WORD);
-            if round < NUM_WORDS_TO_ABSORB {
-                for (padding_idx, is_padding) in is_paddings.iter().enumerate() {
-                    let byte_idx = round * NUM_BYTES_PER_WORD + padding_idx;
-                    let padding = is_final_block && byte_idx >= num_bytes_in_last_block;
-                    is_padding.assign(&mut region, 0, F::from(padding));
-                }
-            }
-            cell_manager.start_region();
-
-            if round != NUM_ROUNDS {
-                // Theta
-                let part_size = get_num_bits_per_theta_c_lookup();
-                let mut bcf = Vec::new();
-                for s in &s {
-                    let c = s[0] + s[1] + s[2] + s[3] + s[4];
-                    let bc_fat =
-                        split::value(&mut cell_manager, &mut region, c, 1, part_size, false, None);
-                    bcf.push(bc_fat);
-                }
-                cell_manager.start_region();
-                let mut bc = Vec::new();
-                for bc_fat in bcf {
-                    let bc_norm = transform::value(
-                        &mut cell_manager,
-                        &mut region,
-                        bc_fat.clone(),
-                        true,
-                        |v| v & 1,
-                        true,
-                    );
-                    bc.push(bc_norm);
-                }
-                cell_manager.start_region();
-                let mut os = [[F::zero(); 5]; 5];
-                for i in 0..5 {
-                    let t = decode::value(bc[(i + 4) % 5].clone())
-                        + decode::value(rotate(bc[(i + 1) % 5].clone(), 1, part_size));
-                    for j in 0..5 {
-                        os[i][j] = s[i][j] + t;
-                    }
-                }
-                s = os;
-                cell_manager.start_region();
-
-                // Rho/Pi
-                let part_size = get_num_bits_per_base_chi_lookup();
-                let target_word_sizes = target_part_sizes(part_size);
-                let num_word_parts = target_word_sizes.len();
-                let mut rho_pi_chi_cells: [[[Vec<Cell<F>>; 5]; 5]; 3] =
-                    array_init::array_init(|_| {
-                        array_init::array_init(|_| array_init::array_init(|_| Vec::new()))
-                    });
-                let mut column_starts = [0usize; 3];
-                for p in 0..3 {
-                    column_starts[p] = cell_manager.start_region();
-                    let mut row_idx = 0;
-                    for j in 0..5 {
-                        for _ in 0..num_word_parts {
-                            for i in 0..5 {
-                                rho_pi_chi_cells[p][i][j]
-                                    .push(cell_manager.query_cell_value_at_row(row_idx as i32));
-                            }
-                            row_idx = (row_idx + 1) % num_rows_per_round;
-                        }
-                    }
-                }
-                cell_manager.start_region();
-                let mut os_parts: [[Vec<PartValue<F>>; 5]; 5] =
-                    array_init::array_init(|_| array_init::array_init(|_| Vec::new()));
-                for (j, os_part) in os_parts.iter_mut().enumerate() {
-                    for i in 0..5 {
-                        let s_parts = split_uniform::value(
-                            &rho_pi_chi_cells[0][j][(2 * i + 3 * j) % 5],
-                            &mut cell_manager,
-                            &mut region,
-                            s[i][j],
-                            RHO_MATRIX[i][j],
-                            part_size,
-                            true,
-                        );
-
-                        let s_parts = transform_to::value(
-                            &rho_pi_chi_cells[1][j][(2 * i + 3 * j) % 5],
-                            &mut region,
-                            s_parts.clone(),
-                            true,
-                            |v| v & 1,
-                        );
-                        os_part[(2 * i + 3 * j) % 5] = s_parts.clone();
-                    }
-                }
-                cell_manager.start_region();
-
-                // Chi
-                let part_size_base = get_num_bits_per_base_chi_lookup();
-                let three_packed = pack::<F>(&vec![3u8; part_size_base]);
-                let mut os = [[F::zero(); 5]; 5];
-                for j in 0..5 {
-                    for i in 0..5 {
-                        let mut s_parts = Vec::new();
-                        for ((part_a, part_b), part_c) in os_parts[i][j]
-                            .iter()
-                            .zip(os_parts[(i + 1) % 5][j].iter())
-                            .zip(os_parts[(i + 2) % 5][j].iter())
-                        {
-                            let value =
-                                three_packed - two * part_a.value + part_b.value - part_c.value;
-                            s_parts.push(PartValue {
-                                num_bits: part_size_base,
-                                rot: j as i32,
-                                value,
-                            });
-                        }
-                        os[i][j] = decode::value(transform_to::value(
-                            &rho_pi_chi_cells[2][i][j],
-                            &mut region,
-                            s_parts.clone(),
-                            true,
-                            |v| CHI_BASE_LOOKUP_TABLE[*v as usize],
-                        ));
-                    }
-                }
-                s = os;
-                cell_manager.start_region();
-
-                // iota
-                let part_size = get_num_bits_per_absorb_lookup();
-                let input = s[0][0] + pack_u64::<F>(ROUND_CST[round]);
-                let iota_parts = split::value::<F>(
-                    &mut cell_manager,
-                    &mut region,
-                    input,
-                    0,
-                    part_size,
-                    false,
-                    None,
-                );
-                cell_manager.start_region();
-                s[0][0] = decode::value(transform::value(
-                    &mut cell_manager,
-                    &mut region,
-                    iota_parts.clone(),
-                    true,
-                    |v| v & 1,
-                    true,
-                ));
-            }
-
-            // The words to squeeze out: this is the hash digest as words with
-            // NUM_BYTES_PER_WORD (=8) bytes each
-            for (hash_word, a) in hash_words.iter_mut().zip(s.iter()) {
-                *hash_word = a[0];
-            }
-
-            cell_managers.push(cell_manager);
-            regions.push(region);
-        }
-
-        // Now that we know the state at the end of the rounds, set the squeeze data
-        let num_rounds = cell_managers.len();
-        for (idx, word) in hash_words.iter().enumerate() {
-            let cell_manager = &mut cell_managers[num_rounds - 2 - idx];
-            let region = &mut regions[num_rounds - 2 - idx];
-
-            cell_manager.start_region();
-            let squeeze_packed = cell_manager.query_cell_value();
-            squeeze_packed.assign(region, 0, *word);
-
-            cell_manager.start_region();
-            let packed = split::value(cell_manager, region, *word, 0, 8, false, None);
-            cell_manager.start_region();
-            transform::value(cell_manager, region, packed, false, |v| *v, true);
-        }
-        squeeze_digests.push(hash_words);
-
-        for round in 0..NUM_ROUNDS + 1 {
-            let round_cst = pack_u64(ROUND_CST[round]);
-
-            for row_idx in 0..num_rows_per_round {
-                rows.push(KeccakRow {
-                    q_enable: row_idx == 0,
-                    // q_enable_row: true,
-                    q_round: row_idx == 0 && round < NUM_ROUNDS,
-                    q_absorb: row_idx == 0 && round == NUM_ROUNDS,
-                    q_round_last: row_idx == 0 && round == NUM_ROUNDS,
-                    q_padding: row_idx == 0 && round < NUM_WORDS_TO_ABSORB,
-                    q_padding_last: row_idx == 0 && round == NUM_WORDS_TO_ABSORB - 1,
-                    round_cst,
-                    is_final: is_final_block && round == NUM_ROUNDS && row_idx == 0,
-                    cell_values: regions[round].rows.get(row_idx).unwrap_or(&vec![]).clone(),
-                });
-                #[cfg(debug_assertions)]
-                {
-                    let mut r = rows.last().unwrap().clone();
-                    r.cell_values.clear();
-                    log::trace!("offset {:?} row idx {} row {:?}", rows.len() - 1, row_idx, r);
-                }
-            }
-            log::trace!(" = = = = = = round {} end", round);
-        }
-        log::trace!(" ====================== chunk {} end", idx);
-    }
-
-    #[cfg(debug_assertions)]
-    {
-        let hash_bytes = s
-            .into_iter()
-            .take(4)
-            .map(|a| {
-                pack_with_base::<F>(&unpack(a[0]), 2)
-                    .to_repr()
-                    .into_iter()
-                    .take(8)
-                    .collect::<Vec<_>>()
-                    .to_vec()
-            })
-            .collect::<Vec<_>>();
-        debug!("hash: {:x?}", &(hash_bytes[0..4].concat()));
-        // debug!("data rlc: {:x?}", data_rlc);
-    }
-}
-
-/// Computes and assigns the input and output RLC values.
-pub fn multi_keccak_phase1<'a, 'v, F: Field>(
-    region: &mut Region<F>,
-    keccak_table: &KeccakTable,
-    bytes: impl IntoIterator<Item = &'a [u8]>,
-    challenge: Value<F>,
-    squeeze_digests: Vec<[F; NUM_WORDS_TO_SQUEEZE]>,
-) -> (Vec<KeccakAssignedValue<'v, F>>, Vec<KeccakAssignedValue<'v, F>>) {
-    let mut input_rlcs = Vec::with_capacity(squeeze_digests.len());
-    let mut output_rlcs = Vec::with_capacity(squeeze_digests.len());
-
-    let num_rows_per_round = get_num_rows_per_round();
-    for idx in 0..num_rows_per_round {
-        [keccak_table.input_rlc, keccak_table.output_rlc]
-            .map(|column| assign_advice_custom(region, column, idx, Value::known(F::zero())));
-    }
-
-    let mut offset = num_rows_per_round;
-    for bytes in bytes {
-        keccak_phase1(region, keccak_table, bytes, challenge, &mut input_rlcs, &mut offset);
-    }
-    debug_assert!(input_rlcs.len() <= squeeze_digests.len());
-    while input_rlcs.len() < squeeze_digests.len() {
-        keccak_phase1(region, keccak_table, &[], challenge, &mut input_rlcs, &mut offset);
-    }
-
-    offset = num_rows_per_round;
-    for hash_words in squeeze_digests {
-        offset += num_rows_per_round * NUM_ROUNDS;
-        let hash_rlc = hash_words
-            .into_iter()
-            .flat_map(|a| to_bytes::value(&unpack(a)))
-            .map(|x| Value::known(F::from(x as u64)))
-            .reduce(|rlc, x| rlc * challenge + x)
-            .unwrap();
-        let output_rlc = assign_advice_custom(region, keccak_table.output_rlc, offset, hash_rlc);
-        output_rlcs.push(output_rlc);
-        offset += num_rows_per_round;
-    }
-
-    (input_rlcs, output_rlcs)
-}
-
-/// Returns vector of KeccakRow and vector of hash digest outputs.
-pub fn multi_keccak_phase0<F: Field>(
-    bytes: &[Vec<u8>],
-    capacity: Option<usize>,
-) -> (Vec<KeccakRow<F>>, Vec<[F; NUM_WORDS_TO_SQUEEZE]>) {
-    let num_rows_per_round = get_num_rows_per_round();
-    let mut rows =
-        Vec::with_capacity((1 + capacity.unwrap_or(0) * (NUM_ROUNDS + 1)) * num_rows_per_round);
-    // Dummy first row so that the initial data is absorbed
-    // The initial data doesn't really matter, `is_final` just needs to be disabled.
-    rows.append(&mut KeccakRow::dummy_rows(num_rows_per_round));
-    // Actual keccaks
-    let artifacts = bytes
-        .par_iter()
-        .map(|bytes| {
-            let num_keccak_f = get_num_keccak_f(bytes.len());
-            let mut squeeze_digests = Vec::with_capacity(num_keccak_f);
-            let mut rows = Vec::with_capacity(num_keccak_f * (NUM_ROUNDS + 1) * num_rows_per_round);
-            keccak_phase0(&mut rows, &mut squeeze_digests, bytes);
-            (rows, squeeze_digests)
-        })
-        .collect::<Vec<_>>();
-
-    let mut squeeze_digests = Vec::with_capacity(capacity.unwrap_or(0));
-    for (rows_part, squeezes) in artifacts {
-        rows.extend(rows_part);
-        squeeze_digests.extend(squeezes);
-    }
-
-    if let Some(capacity) = capacity {
-        // Pad with no data hashes to the expected capacity
-        while rows.len() < (1 + capacity * (NUM_ROUNDS + 1)) * get_num_rows_per_round() {
-            keccak_phase0(&mut rows, &mut squeeze_digests, &[]);
-        }
-        // Check that we are not over capacity
-        if rows.len() > (1 + capacity * (NUM_ROUNDS + 1)) * get_num_rows_per_round() {
-            panic!("{:?}", Error::BoundsFailure);
-        }
-    }
-    (rows, squeeze_digests)
-}
diff --git a/hashes/zkevm-keccak/src/lib.rs b/hashes/zkevm-keccak/src/lib.rs
index e51bd006..9fe4896b 100644
--- a/hashes/zkevm-keccak/src/lib.rs
+++ b/hashes/zkevm-keccak/src/lib.rs
@@ -3,9 +3,11 @@
 
 use halo2_base::halo2_proofs;
 
+mod keccak_circuit;
+
 /// Keccak packed multi
-pub mod keccak_packed_multi;
+pub use keccak_circuit::keccak_packed_multi;
 /// Util
-pub mod util;
+pub use keccak_circuit::util;
 
-pub use keccak_packed_multi::KeccakCircuitConfig as KeccakConfig;
+pub use keccak_circuit::KeccakCircuitConfig as KeccakConfig;