diff options
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_interpreter.cpp | 75 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp_helper.h | 117 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpdouble.cpp | 35 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpsingle.cpp | 30 | ||||
-rw-r--r-- | src/core/core.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 48 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.h | 17 |
7 files changed, 189 insertions, 137 deletions
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index c2973fb395..315b4cc915 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -992,6 +992,14 @@ typedef struct _mcr_inst { unsigned int inst; } mcr_inst; +typedef struct mcrr_inst { + unsigned int opcode_1; + unsigned int cp_num; + unsigned int crm; + unsigned int rt; + unsigned int rt2; +} mcrr_inst; + typedef struct _mrs_inst { unsigned int R; unsigned int Rd; @@ -1261,11 +1269,6 @@ static get_addr_fp_t get_calc_addr_op(unsigned int inst) { #define CHECK_RM (inst_cream->Rm == 15) #define CHECK_RS (inst_cream->Rs == 15) -#define UNIMPLEMENTED_INSTRUCTION(mnemonic) \ - LOG_ERROR(Core_ARM11, "unimplemented instruction: %s", mnemonic); \ - CITRA_IGNORE_EXIT(-1); \ - return nullptr; - static ARM_INST_PTR INTERPRETER_TRANSLATE(adc)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(adc_inst)); @@ -1871,7 +1874,26 @@ static ARM_INST_PTR INTERPRETER_TRANSLATE(mcr)(unsigned int inst, int index) inst_cream->inst = inst; return inst_base; } -static ARM_INST_PTR INTERPRETER_TRANSLATE(mcrr)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("MCRR"); } + +static ARM_INST_PTR INTERPRETER_TRANSLATE(mcrr)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(mcrr_inst)); + mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->crm = BITS(inst, 0, 3); + inst_cream->opcode_1 = BITS(inst, 4, 7); + inst_cream->cp_num = BITS(inst, 8, 11); + inst_cream->rt = BITS(inst, 12, 15); + inst_cream->rt2 = BITS(inst, 16, 19); + + return inst_base; +} + static ARM_INST_PTR INTERPRETER_TRANSLATE(mla)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mla_inst)); @@ -1930,7 +1952,12 @@ static ARM_INST_PTR INTERPRETER_TRANSLATE(mrc)(unsigned int inst, int index) inst_cream->inst = inst; return inst_base; } -static ARM_INST_PTR INTERPRETER_TRANSLATE(mrrc)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("MRRC"); } + +static ARM_INST_PTR INTERPRETER_TRANSLATE(mrrc)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(mcrr)(inst, index); +} + static ARM_INST_PTR INTERPRETER_TRANSLATE(mrs)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(mrs_inst)); @@ -4754,7 +4781,24 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { FETCH_INST; GOTO_NEXT_INST; } + MCRR_INST: + { + // Stubbed, as the MPCore doesn't have any registers that are accessible + // through this instruction. + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component; + + LOG_ERROR(Core_ARM11, "MCRR executed | Coprocessor: %u, CRm %u, opc1: %u, Rt: %u, Rt2: %u", + inst_cream->cp_num, inst_cream->crm, inst_cream->opcode_1, inst_cream->rt, inst_cream->rt2); + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(mcrr_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + MLA_INST: { if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { @@ -4830,7 +4874,24 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) { FETCH_INST; GOTO_NEXT_INST; } + MRRC_INST: + { + // Stubbed, as the MPCore doesn't have any registers that are accessible + // through this instruction. + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + mcrr_inst* const inst_cream = (mcrr_inst*)inst_base->component; + + LOG_ERROR(Core_ARM11, "MRRC executed | Coprocessor: %u, CRm %u, opc1: %u, Rt: %u, Rt2: %u", + inst_cream->cp_num, inst_cream->crm, inst_cream->opcode_1, inst_cream->rt, inst_cream->rt2); + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(mcrr_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + MRS_INST: { if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h index 6b3dae2804..ccc0212abe 100644 --- a/src/core/arm/skyeye_common/vfp/vfp_helper.h +++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h @@ -35,6 +35,7 @@ #include <cstdio> #include "common/common_types.h" #include "core/arm/skyeye_common/armdefs.h" +#include "core/arm/skyeye_common/vfp/asm_vfp.h" #define do_div(n, base) {n/=base;} @@ -236,33 +237,6 @@ struct vfp_single { #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) -// Unpack a single-precision float. Note that this returns the magnitude -// of the single-precision float mantissa with the 1. if necessary, -// aligned to bit 30. -static inline void vfp_single_unpack(vfp_single* s, s32 val) -{ - u32 significand; - - s->sign = vfp_single_packed_sign(val) >> 16, - s->exponent = vfp_single_packed_exponent(val); - - significand = (u32) val; - significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; - if (s->exponent && s->exponent != 255) - significand |= 0x40000000; - s->significand = significand; -} - -// Re-pack a single-precision float. This assumes that the float is -// already normalised such that the MSB is bit 30, _not_ bit 31. -static inline s32 vfp_single_pack(vfp_single* s) -{ - u32 val = (s->sign << 16) + - (s->exponent << VFP_SINGLE_MANTISSA_BITS) + - (s->significand >> VFP_SINGLE_LOW_BITS); - return (s32)val; -} - enum : u32 { VFP_NUMBER = (1 << 0), VFP_ZERO = (1 << 1), @@ -294,6 +268,39 @@ static inline int vfp_single_type(vfp_single* s) return type; } +// Unpack a single-precision float. Note that this returns the magnitude +// of the single-precision float mantissa with the 1. if necessary, +// aligned to bit 30. +static inline void vfp_single_unpack(vfp_single* s, s32 val, u32* fpscr) +{ + s->sign = vfp_single_packed_sign(val) >> 16, + s->exponent = vfp_single_packed_exponent(val); + + u32 significand = ((u32)val << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; + if (s->exponent && s->exponent != 255) + significand |= 0x40000000; + s->significand = significand; + + // If flush-to-zero mode is enabled, turn the denormal into zero. + // On a VFPv2 architecture, the sign of the zero is always positive. + if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_single_type(s) & VFP_DENORMAL) != 0) { + s->sign = 0; + s->exponent = 0; + s->significand = 0; + *fpscr |= FPSCR_IDC; + } +} + +// Re-pack a single-precision float. This assumes that the float is +// already normalised such that the MSB is bit 30, _not_ bit 31. +static inline s32 vfp_single_pack(vfp_single* s) +{ + u32 val = (s->sign << 16) + + (s->exponent << VFP_SINGLE_MANTISSA_BITS) + + (s->significand >> VFP_SINGLE_LOW_BITS); + return (s32)val; +} + u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func); @@ -328,24 +335,49 @@ struct vfp_double { #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) +static inline int vfp_double_type(vfp_double* s) +{ + int type = VFP_NUMBER; + if (s->exponent == 2047) { + if (s->significand == 0) + type = VFP_INFINITY; + else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) + type = VFP_QNAN; + else + type = VFP_SNAN; + } else if (s->exponent == 0) { + if (s->significand == 0) + type |= VFP_ZERO; + else + type |= VFP_DENORMAL; + } + return type; +} + // Unpack a double-precision float. Note that this returns the magnitude // of the double-precision float mantissa with the 1. if necessary, // aligned to bit 62. -static inline void vfp_double_unpack(vfp_double* s, s64 val) +static inline void vfp_double_unpack(vfp_double* s, s64 val, u32* fpscr) { - u64 significand; - s->sign = vfp_double_packed_sign(val) >> 48; s->exponent = vfp_double_packed_exponent(val); - significand = (u64) val; - significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; + u64 significand = ((u64)val << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; if (s->exponent && s->exponent != 2047) significand |= (1ULL << 62); s->significand = significand; + + // If flush-to-zero mode is enabled, turn the denormal into zero. + // On a VFPv2 architecture, the sign of the zero is always positive. + if ((*fpscr & FPSCR_FLUSH_TO_ZERO) != 0 && (vfp_double_type(s) & VFP_DENORMAL) != 0) { + s->sign = 0; + s->exponent = 0; + s->significand = 0; + *fpscr |= FPSCR_IDC; + } } -// Re-pack a double-precision float. This assumes that the float is +// Re-pack a double-precision float. This assumes that the float is // already normalised such that the MSB is bit 30, _not_ bit 31. static inline s64 vfp_double_pack(vfp_double* s) { @@ -355,25 +387,6 @@ static inline s64 vfp_double_pack(vfp_double* s) return (s64)val; } -static inline int vfp_double_type(vfp_double* s) -{ - int type = VFP_NUMBER; - if (s->exponent == 2047) { - if (s->significand == 0) - type = VFP_INFINITY; - else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) - type = VFP_QNAN; - else - type = VFP_SNAN; - } else if (s->exponent == 0) { - if (s->significand == 0) - type |= VFP_ZERO; - else - type |= VFP_DENORMAL; - } - return type; -} - u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); // A special flag to tell the normalisation code not to normalise. diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp index d76d37fd43..ab9fec39d7 100644 --- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp @@ -291,7 +291,8 @@ static u32 vfp_double_fsqrt(ARMul_State* state, int dd, int unused, int dm, u32 vfp_double vdm, vdd, *vdp; int ret, tm; - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); + tm = vfp_double_type(&vdm); if (tm & (VFP_NAN|VFP_INFINITY)) { vdp = &vdd; @@ -473,7 +474,7 @@ static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 u32 exceptions = 0; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); tm = vfp_double_type(&vdm); @@ -543,7 +544,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 int tm; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); /* * Do we have a denormalised number? @@ -624,7 +625,7 @@ static u32 vfp_double_ftosi(ARMul_State* state, int sd, int unused, int dm, u32 int tm; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); vfp_double_dump("VDM", &vdm); /* @@ -896,11 +897,11 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f struct vfp_double vdd, vdp, vdn, vdm; u32 exceptions; - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -908,7 +909,7 @@ vfp_double_multiply_accumulate(ARMul_State* state, int dd, int dn, int dm, u32 f if (negate & NEG_MULTIPLY) vdp.sign = vfp_sign_negate(vdp.sign); - vfp_double_unpack(&vdn, vfp_get_double(state, dd)); + vfp_double_unpack(&vdn, vfp_get_double(state, dd), &fpscr); if (vdn.exponent == 0 && vdn.significand != 0) vfp_double_normalise_denormal(&vdn); @@ -969,11 +970,11 @@ static u32 vfp_double_fmul(ARMul_State* state, int dd, int dn, int dm, u32 fpscr u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -990,11 +991,11 @@ static u32 vfp_double_fnmul(ARMul_State* state, int dd, int dn, int dm, u32 fpsc u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -1013,11 +1014,11 @@ static u32 vfp_double_fadd(ARMul_State* state, int dd, int dn, int dm, u32 fpscr u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -1035,11 +1036,11 @@ static u32 vfp_double_fsub(ARMul_State* state, int dd, int dn, int dm, u32 fpscr u32 exceptions; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); if (vdn.exponent == 0 && vdn.significand) vfp_double_normalise_denormal(&vdn); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); if (vdm.exponent == 0 && vdm.significand) vfp_double_normalise_denormal(&vdm); @@ -1063,8 +1064,8 @@ static u32 vfp_double_fdiv(ARMul_State* state, int dd, int dn, int dm, u32 fpscr int tm, tn; LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - vfp_double_unpack(&vdn, vfp_get_double(state, dn)); - vfp_double_unpack(&vdm, vfp_get_double(state, dm)); + vfp_double_unpack(&vdn, vfp_get_double(state, dn), &fpscr); + vfp_double_unpack(&vdm, vfp_get_double(state, dm), &fpscr); vdd.sign = vdn.sign ^ vdm.sign; diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp index a78bdc430e..4dfe0254d6 100644 --- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp @@ -330,7 +330,7 @@ static u32 vfp_single_fsqrt(ARMul_State* state, int sd, int unused, s32 m, u32 f struct vfp_single vsm, vsd, *vsp; int ret, tm; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); tm = vfp_single_type(&vsm); if (tm & (VFP_NAN|VFP_INFINITY)) { vsp = &vsd; @@ -498,7 +498,7 @@ static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 f int tm; u32 exceptions = 0; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); tm = vfp_single_type(&vsm); @@ -563,7 +563,7 @@ static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 f int rmode = fpscr & FPSCR_RMODE_MASK; int tm; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); vfp_single_dump("VSM", &vsm); /* @@ -643,7 +643,7 @@ static u32 vfp_single_ftosi(ARMul_State* state, int sd, int unused, s32 m, u32 f int rmode = fpscr & FPSCR_RMODE_MASK; int tm; - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); vfp_single_dump("VSM", &vsm); /* @@ -925,11 +925,11 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp v = vfp_get_float(state, sn); LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, v); - vfp_single_unpack(&vsn, v); + vfp_single_unpack(&vsn, v, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -940,7 +940,7 @@ vfp_single_multiply_accumulate(ARMul_State* state, int sd, int sn, s32 m, u32 fp v = vfp_get_float(state, sd); LOG_DEBUG(Core_ARM11, "s%u = %08x", sd, v); - vfp_single_unpack(&vsn, v); + vfp_single_unpack(&vsn, v, &fpscr); if (vsn.exponent == 0 && vsn.significand != 0) vfp_single_normalise_denormal(&vsn); @@ -1004,11 +1004,11 @@ static u32 vfp_single_fmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); - vfp_single_unpack(&vsn, n); + vfp_single_unpack(&vsn, n, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -1027,11 +1027,11 @@ static u32 vfp_single_fnmul(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); - vfp_single_unpack(&vsn, n); + vfp_single_unpack(&vsn, n, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -1054,11 +1054,11 @@ static u32 vfp_single_fadd(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) /* * Unpack and normalise denormals. */ - vfp_single_unpack(&vsn, n); + vfp_single_unpack(&vsn, n, &fpscr); if (vsn.exponent == 0 && vsn.significand) vfp_single_normalise_denormal(&vsn); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsm, m, &fpscr); if (vsm.exponent == 0 && vsm.significand) vfp_single_normalise_denormal(&vsm); @@ -1094,8 +1094,8 @@ static u32 vfp_single_fdiv(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) LOG_DEBUG(Core_ARM11, "s%u = %08x", sn, n); - vfp_single_unpack(&vsn, n); - vfp_single_unpack(&vsm, m); + vfp_single_unpack(&vsn, n, &fpscr); + vfp_single_unpack(&vsm, m, &fpscr); vsd.sign = vsn.sign ^ vsm.sign; diff --git a/src/core/core.cpp b/src/core/core.cpp index b5c2582304..53aae8c2f7 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -24,9 +24,9 @@ ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core /// Run the core CPU loop void RunLoop(int tight_loop) { - // If the current thread is an idle thread, then don't execute instructions, + // If we don't have a currently active thread then don't execute instructions, // instead advance to the next event and try to yield to the next thread - if (Kernel::GetCurrentThread()->IsIdle()) { + if (Kernel::GetCurrentThread() == nullptr) { LOG_TRACE(Core_ARM11, "Idling"); CoreTiming::Idle(); CoreTiming::Advance(); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 34dc257aac..afaf0cd5d3 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -160,7 +160,7 @@ static void PriorityBoostStarvedThreads() { u64 delta = current_ticks - thread->last_running_ticks; - if (thread->status == THREADSTATUS_READY && delta > boost_timeout && !thread->idle) { + if (thread->status == THREADSTATUS_READY && delta > boost_timeout) { const s32 priority = std::max(ready_queue.get_first()->current_priority - 1, 0); thread->BoostPriority(priority); } @@ -172,8 +172,6 @@ static void PriorityBoostStarvedThreads() { * @param new_thread The thread to switch to */ static void SwitchContext(Thread* new_thread) { - DEBUG_ASSERT_MSG(new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running."); - Thread* previous_thread = GetCurrentThread(); // Save context for previous thread @@ -191,6 +189,8 @@ static void SwitchContext(Thread* new_thread) { // Load context of new thread if (new_thread) { + DEBUG_ASSERT_MSG(new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running."); + current_thread = new_thread; ready_queue.remove(new_thread->current_priority, new_thread); @@ -218,6 +218,10 @@ static Thread* PopNextReadyThread() { // We have to do better than the current thread. // This call returns null when that's not possible. next = ready_queue.pop_first_better(thread->current_priority); + if (!next) { + // Otherwise just keep going with the current thread + next = thread; + } } else { next = ready_queue.pop_first(); } @@ -450,6 +454,8 @@ void Thread::SetPriority(s32 priority) { // If thread was ready, adjust queues if (status == THREADSTATUS_READY) ready_queue.move(this, current_priority, priority); + else + ready_queue.prepare(priority); nominal_priority = current_priority = priority; } @@ -459,16 +465,6 @@ void Thread::BoostPriority(s32 priority) { current_priority = priority; } -SharedPtr<Thread> SetupIdleThread() { - // We need to pass a few valid values to get around parameter checking in Thread::Create. - // TODO(yuriks): Figure out a way to avoid passing the bogus VAddr parameter - auto thread = Thread::Create("idle", Memory::TLS_AREA_VADDR, THREADPRIO_LOWEST, 0, - THREADPROCESSORID_0, 0).MoveFrom(); - - thread->idle = true; - return thread; -} - SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) { DEBUG_ASSERT(!GetCurrentThread()); @@ -485,24 +481,25 @@ SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) { } void Reschedule() { - Thread* prev = GetCurrentThread(); - PriorityBoostStarvedThreads(); + Thread* cur = GetCurrentThread(); Thread* next = PopNextReadyThread(); HLE::g_reschedule = false; - if (next != nullptr) { - LOG_TRACE(Kernel, "context switch %u -> %u", prev->GetObjectId(), next->GetObjectId()); - SwitchContext(next); - } else { - LOG_TRACE(Kernel, "cannot context switch from %u, no higher priority thread!", prev->GetObjectId()); + // Don't bother switching to the same thread + if (next == cur) + return; - for (auto& thread : thread_list) { - LOG_TRACE(Kernel, "\tid=%u prio=0x%02X, status=0x%08X", thread->GetObjectId(), - thread->current_priority, thread->status); - } + if (cur && next) { + LOG_TRACE(Kernel, "context switch %u -> %u", cur->GetObjectId(), next->GetObjectId()); + } else if (cur) { + LOG_TRACE(Kernel, "context switch %u -> idle", cur->GetObjectId()); + } else { + LOG_TRACE(Kernel, "context switch idle -> %u", next->GetObjectId()); } + + SwitchContext(next); } void Thread::SetWaitSynchronizationResult(ResultCode result) { @@ -527,9 +524,6 @@ void ThreadingInit() { thread_list.clear(); ready_queue.clear(); - - // Setup the idle thread - SetupIdleThread(); } void ThreadingShutdown() { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index cfbebab088..6b329c12a9 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -73,12 +73,6 @@ public: void Acquire() override; /** - * Checks if the thread is an idle (stub) thread - * @return True if the thread is an idle (stub) thread, false otherwise - */ - inline bool IsIdle() const { return idle; } - - /** * Gets the thread's current priority * @return The current thread's priority */ @@ -170,9 +164,6 @@ public: std::string name; - /// Whether this thread is intended to never actually be executed, i.e. always idle - bool idle = false; - private: Thread(); ~Thread() override; @@ -231,14 +222,6 @@ void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wa void WaitCurrentThread_ArbitrateAddress(VAddr wait_address); /** - * Sets up the idle thread, this is a thread that is intended to never execute instructions, - * only to advance the timing. It is scheduled when there are no other ready threads in the thread queue - * and will try to yield on every call. - * @return The handle of the idle thread - */ -SharedPtr<Thread> SetupIdleThread(); - -/** * Initialize threading */ void ThreadingInit(); |