From 3c88e329f48e4b62542da5ddb6baf48bcae881bd Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Sat, 31 Aug 2024 10:53:54 +0300 Subject: [PATCH 1/4] Logs: Reduce logging IO --- rpcs3/Input/hid_pad_handler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rpcs3/Input/hid_pad_handler.cpp b/rpcs3/Input/hid_pad_handler.cpp index 40dcbdef2484..4e8022bd4619 100644 --- a/rpcs3/Input/hid_pad_handler.cpp +++ b/rpcs3/Input/hid_pad_handler.cpp @@ -209,7 +209,8 @@ void hid_pad_handler::enumerate_devices() } hid_free_enumeration(head); } - hid_log.notice("%s enumeration found %d devices (%f ms)", m_type, device_paths.size(), timer.GetElapsedTimeInMilliSec()); + + (device_paths.empty() ? hid_log.trace : hid_log.notice)("%s enumeration found %d devices (%f ms)", m_type, device_paths.size(), timer.GetElapsedTimeInMilliSec()); std::lock_guard lock(m_enumeration_mutex); m_new_enumerated_devices = device_paths; From 4ac83c2a31c5871a2e1965557194a44638719ec5 Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Sat, 31 Aug 2024 15:33:20 +0300 Subject: [PATCH 2/4] SPU LLVM: Upgrade constants propagation --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 101 +++++-------------------- 1 file changed, 17 insertions(+), 84 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 698e5bd178fd..6c0f43c9299b 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -4316,13 +4316,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { const u32 orig = bb.reg_origin_abs[i]; - if (orig < 0x40000) - { - auto& src = ::at32(m_bbs, orig); - bb.reg_const[i] = src.reg_const[i]; - bb.reg_val32[i] = src.reg_val32[i]; - } - if (!bb.reg_save_dom[i] && bb.reg_use[i] && (orig == 0x40000 || orig + 2 == 0)) { // Destroy offset if external reg value is used @@ -4356,71 +4349,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Propagate some constants switch (last_inst) { - case spu_itype::IL: - { - bb.reg_const[op.rt] = true; - bb.reg_val32[op.rt] = op.si16; - break; - } - case spu_itype::ILA: - { - bb.reg_const[op.rt] = true; - bb.reg_val32[op.rt] = op.i18; - break; - } - case spu_itype::ILHU: - { - bb.reg_const[op.rt] = true; - bb.reg_val32[op.rt] = op.i16 << 16; - break; - } - case spu_itype::ILH: - { - bb.reg_const[op.rt] = true; - bb.reg_val32[op.rt] = op.i16 << 16 | op.i16; - break; - } - case spu_itype::IOHL: - { - bb.reg_val32[op.rt] = bb.reg_val32[op.rt] | op.i16; - break; - } - case spu_itype::ORI: - { - bb.reg_const[op.rt] = bb.reg_const[op.ra]; - bb.reg_val32[op.rt] = bb.reg_val32[op.ra] | op.si10; - break; - } - case spu_itype::OR: - { - bb.reg_const[op.rt] = bb.reg_const[op.ra] && bb.reg_const[op.rb]; - bb.reg_val32[op.rt] = bb.reg_val32[op.ra] | bb.reg_val32[op.rb]; - break; - } - case spu_itype::AI: - { - bb.reg_const[op.rt] = bb.reg_const[op.ra]; - bb.reg_val32[op.rt] = bb.reg_val32[op.ra] + op.si10; - break; - } - case spu_itype::A: - { - bb.reg_const[op.rt] = bb.reg_const[op.ra] && bb.reg_const[op.rb]; - bb.reg_val32[op.rt] = bb.reg_val32[op.ra] + bb.reg_val32[op.rb]; - break; - } - case spu_itype::SFI: - { - bb.reg_const[op.rt] = bb.reg_const[op.ra]; - bb.reg_val32[op.rt] = op.si10 - bb.reg_val32[op.ra]; - break; - } - case spu_itype::SF: - { - bb.reg_const[op.rt] = bb.reg_const[op.ra] && bb.reg_const[op.rb]; - bb.reg_val32[op.rt] = bb.reg_val32[op.rb] - bb.reg_val32[op.ra]; - break; - } case spu_itype::STQD: { if (op.ra == s_reg_sp && bb.stack_sub != 0x80000000 && bb.reg_save_dom[op.rt]) @@ -4449,15 +4377,10 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bb.reg_load_mod[op.rt] = 0x80000000 + op.si10 * 16 - bb.stack_sub; } - // Clear const - bb.reg_const[op.rt] = false; break; } default: { - // Clear const if reg is modified here - if (u8 reg = m_regmod[ia / 4]; reg < s_reg_max) - bb.reg_const[reg] = false; break; } } @@ -4465,12 +4388,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // $SP is modified if (m_regmod[ia / 4] == s_reg_sp) { - if (bb.reg_const[s_reg_sp]) - { - // Making $SP a constant is a funny thing too. - bb.stack_sub = 0x80000000; - } - if (bb.stack_sub != 0x80000000) { switch (last_inst) @@ -6266,7 +6183,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s atomic16->get_rdatomic = true; // Go above and beyond and also set the constant for it - set_const_value(op.rt, MFC_GETLLAR_SUCCESS); + //set_const_value(op.rt, MFC_GETLLAR_SUCCESS); invalidate = false; } } @@ -7105,6 +7022,22 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s fmt::append(func_hash, "%s", fmt::base57(output)); } + for (auto& [addr, block] : infos) + { + auto& bb = ::at32(m_bbs, addr); + + for (u32 i = 0; i < s_reg_max; i++) + { + const auto& reg = block->start_reg_state[i]; + + if (reg.is_const()) + { + bb.reg_const[i] = true; + bb.reg_val32[i] = reg.value; + } + } + } + for (const auto& [pc_commited, pattern] : atomic16_all) { if (!pattern.active) From fa3eb2454ffa2816ae765a3018d8fc230326b38e Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Sat, 31 Aug 2024 16:06:34 +0300 Subject: [PATCH 3/4] SPU LLVM: Simplify register origin discovery --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 70 ++++++++++++-------------- rpcs3/Emu/Cell/SPURecompiler.h | 6 ++- 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 6c0f43c9299b..675548beaa64 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -2719,12 +2719,13 @@ reg_state_t reg_state_t::merge(const reg_state_t& rhs, u32 current_pc) const res.tag = reg_state_t::alloc_tag(); res.origin = current_pc; res.is_instruction = false; + res.is_phi = true; return res; } } } - return make_unknown(current_pc); + return make_unknown(current_pc, current_pc, true); } reg_state_t reg_state_t::build_on_top_of(const reg_state_t& rhs) const @@ -4229,23 +4230,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { - if (tb.chunk == block.chunk && tb.reg_origin[i] + 1) - { - const u32 expected = block.reg_mod[i] ? addr : block.reg_origin[i]; - - if (tb.reg_origin[i] == 0x80000000) - { - tb.reg_origin[i] = expected; - } - else if (tb.reg_origin[i] != expected) - { - // Set -1 if multiple origins merged (requires PHI node) - tb.reg_origin[i] = -1; - - must_repeat |= !tb.targets.empty(); - } - } - if (g_cfg.core.spu_block_size == spu_block_size_type::giga && tb.func == block.func && tb.reg_origin_abs[i] + 2) { const u32 expected = block.reg_mod[i] ? addr : block.reg_origin_abs[i]; @@ -5832,7 +5816,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } case MFC_Cmd: { - const auto [af, av, atagg, _3, _5, apc, ainst] = get_reg(op.rt); + const auto [af, av, atagg, _3, _5, apc, ainst, aphi] = get_reg(op.rt); if (!is_pattern_match) { @@ -6630,7 +6614,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::HBR: { hbr_loc = spu_branch_target(pos, op.roh << 7 | op.rt); - const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra); + const auto [af, av, at, ao, az, apc, ainst, aphi] = get_reg(op.ra); hbr_tg = af & vf::is_const && !op.c ? av & 0x3fffc : -1; break; } @@ -6698,8 +6682,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); const auto rb = get_reg(op.rb); - const auto [af, av, at, ao, az, apc, ainst] = ra; - const auto [bf, bv, bt, bo, bz, bpc, binst] = rb; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; + const auto [bf, bv, bt, bo, bz, bpc, binst, bphi] = rb; inherit_const_value(op.rt, ra, rb, av | bv, pos); break; @@ -6714,7 +6698,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); - const auto [af, av, at, ao, az, apc, ainst] = ra; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; inherit_const_value(op.rt, ra, ra, av ^ op.si10, pos); break; @@ -6730,8 +6714,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); const auto rb = get_reg(op.rb); - const auto [af, av, at, ao, az, apc, ainst] = ra; - const auto [bf, bv, bt, bo, bz, bpc, binst] = rb; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; + const auto [bf, bv, bt, bo, bz, bpc, binst, bphi] = rb; inherit_const_value(op.rt, ra, rb, bv ^ av, pos); break; @@ -6741,8 +6725,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); const auto rb = get_reg(op.rb); - const auto [af, av, at, ao, az, apc, ainst] = ra; - const auto [bf, bv, bt, bo, bz, bpc, binst] = rb; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; + const auto [bf, bv, bt, bo, bz, bpc, binst, bphi] = rb; inherit_const_value(op.rt, ra, rb, ~(bv | av), pos); break; @@ -6764,8 +6748,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); const auto rb = get_reg(op.rb); - const auto [af, av, at, ao, az, apc, ainst] = ra; - const auto [bf, bv, bt, bo, bz, bpc, binst] = rb; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; + const auto [bf, bv, bt, bo, bz, bpc, binst, bphi] = rb; inherit_const_value(op.rt, ra, rb, bv & av, pos); break; @@ -6779,7 +6763,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } const auto ra = get_reg(op.ra); - const auto [af, av, at, ao, az, apc, ainst] = ra; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; inherit_const_value(op.rt, ra, ra, av + op.si10, pos); @@ -6796,8 +6780,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); const auto rb = get_reg(op.rb); - const auto [af, av, at, ao, az, apc, ainst] = ra; - const auto [bf, bv, bt, bo, bz, bpc, binst] = rb; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; + const auto [bf, bv, bt, bo, bz, bpc, binst, bphi] = rb; inherit_const_value(op.rt, ra, rb, bv + av, pos); @@ -6812,7 +6796,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::SFI: { const auto ra = get_reg(op.ra); - const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra); + const auto [af, av, at, ao, az, apc, ainst, aphi] = get_reg(op.ra); inherit_const_value(op.rt, ra, ra, op.si10 - av, pos); break; @@ -6822,8 +6806,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto ra = get_reg(op.ra); const auto rb = get_reg(op.rb); - const auto [af, av, at, ao, az, apc, ainst] = ra; - const auto [bf, bv, bt, bo, bz, bpc, binst] = rb; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; + const auto [bf, bv, bt, bo, bz, bpc, binst, bphi] = rb; inherit_const_value(op.rt, ra, rb, bv - av, pos); @@ -6862,7 +6846,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } const auto ra = get_reg(op.ra); - const auto [af, av, at, ao, az, apc, ainst] = get_reg(op.ra); + const auto [af, av, at, ao, az, apc, ainst, aphi] = get_reg(op.ra); inherit_const_value(op.rt, ra, ra, av >> ((0 - op.i7) & 0x1f), pos); break; @@ -6882,7 +6866,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } const auto ra = get_reg(op.ra); - const auto [af, av, at, ao, az, apc, ainst] = ra; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; inherit_const_value(op.rt, ra, ra, av << (op.i7 & 0x1f), pos); break; @@ -6899,7 +6883,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::CEQI: { const auto ra = get_reg(op.ra); - const auto [af, av, at, ao, az, apc, ainst] = ra; + const auto [af, av, at, ao, az, apc, ainst, aphi] = ra; inherit_const_value(op.rt, ra, ra, av == op.si10 + 0u, pos); @@ -7034,7 +7018,15 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { bb.reg_const[i] = true; bb.reg_val32[i] = reg.value; - } + } + else if (reg.is_instruction) + { + bb.reg_origin[i] = reg.origin; + } + else if (reg.is_phi) + { + bb.reg_origin[i] = -1; + } } } diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 03b69583d847..c5aeb0e8c105 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -210,6 +210,7 @@ class spu_recompiler_base u32 known_zeroes{}; u32 origin = SPU_LS_SIZE; bool is_instruction = false; + bool is_phi = false; bool is_const() const; @@ -243,7 +244,7 @@ class spu_recompiler_base void invalidate_if_created(u32 current_pc); template - static std::conditional_t> make_unknown(u32 pc, u32 current_pc = SPU_LS_SIZE) noexcept + static std::conditional_t> make_unknown(u32 pc, u32 current_pc = SPU_LS_SIZE, bool is_phi = false) noexcept { if constexpr (Count == 1) { @@ -252,6 +253,7 @@ class spu_recompiler_base v.flag = {}; v.origin = pc; v.is_instruction = pc == current_pc; + v.is_phi = is_phi; return v; } else @@ -260,7 +262,7 @@ class spu_recompiler_base for (reg_state_t& state : result) { - state = make_unknown<1>(pc, current_pc); + state = make_unknown<1>(pc, current_pc, is_phi); } return result; From b164a5830f89b67b504c2761781273c5ecc1b2b4 Mon Sep 17 00:00:00 2001 From: Elad Ashkenazi <18193363+elad335@users.noreply.github.com> Date: Mon, 2 Sep 2024 12:48:14 +0300 Subject: [PATCH 4/4] Update SPULLVMRecompiler.cpp --- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 18f9fa27b3c1..91e6f2daaf81 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -1890,6 +1890,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Initialize registers and build PHI nodes if necessary for (u32 i = 0; i < s_reg_max; i++) { + if (bb.reg_const[i]) + { + m_block->reg[i] = make_const_vector(v128::from32p(bb.reg_val32[i]), get_type()); + continue; + } + const u32 src = m_finfo->fn ? bb.reg_origin_abs[i] : bb.reg_origin[i]; if (src > 0x40000)