Cortex-A8: avoid DSCR reads

There was a lot of needless handshaking overhead in the current
Cortex-A8 DCC/ITR operations, since the status read by each step
was discarded rather than letting the next step know it.

This shrinks the handshaking by:  (a) passing status along from
previous steps, avoiding re-fetching; which enables the big win
(b) relying on a useful invariant:  that the DSCR_INSTR_COMP bit
is set after every call to a DPM method.

A "reg sp_usr" call previously took 17 flushes; now it takes just 9.
This visibly speeds common operations like entry to debug state and
stepping, as well as "arm reg" and so on.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
__archive__
David Brownell 2009-11-24 21:24:44 -08:00
parent e109bb6af2
commit b6210907ea
1 changed files with 104 additions and 49 deletions

View File

@ -89,20 +89,25 @@ static int cortex_a8_init_debug_access(struct target *target)
return retval; return retval;
} }
/* FIXME we waste a *LOT* of round-trips with needless DSCR reads, which /* To reduce needless round-trips, pass in a pointer to the current
* slows down operations considerably. One good way to start reducing * DSCR value. Initialize it to zero if you just need to know the
* them would pass current values into and out of this routine. That * value on return from this function; or (1 << DSCR_INSTR_COMP) if
* should also help synch DCC read/write. * you happen to know that no instruction is pending.
*/ */
static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode) static int cortex_a8_exec_opcode(struct target *target,
uint32_t opcode, uint32_t *dscr_p)
{ {
uint32_t dscr; uint32_t dscr;
int retval; int retval;
struct armv7a_common *armv7a = target_to_armv7a(target); struct armv7a_common *armv7a = target_to_armv7a(target);
struct swjdp_common *swjdp = &armv7a->swjdp_info; struct swjdp_common *swjdp = &armv7a->swjdp_info;
dscr = dscr_p ? *dscr_p : 0;
LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode); LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
do
/* Wait for InstrCompl bit to be set */
while ((dscr & (1 << DSCR_INSTR_COMP)) == 0)
{ {
retval = mem_ap_read_atomic_u32(swjdp, retval = mem_ap_read_atomic_u32(swjdp,
armv7a->debug_base + CPUDBG_DSCR, &dscr); armv7a->debug_base + CPUDBG_DSCR, &dscr);
@ -112,7 +117,6 @@ static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
return retval; return retval;
} }
} }
while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */
mem_ap_write_u32(swjdp, armv7a->debug_base + CPUDBG_ITR, opcode); mem_ap_write_u32(swjdp, armv7a->debug_base + CPUDBG_ITR, opcode);
@ -128,6 +132,9 @@ static int cortex_a8_exec_opcode(struct target *target, uint32_t opcode)
} }
while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */ while ((dscr & (1 << DSCR_INSTR_COMP)) == 0); /* Wait for InstrCompl bit to be set */
if (dscr_p)
*dscr_p = dscr;
return retval; return retval;
} }
@ -144,7 +151,7 @@ static int cortex_a8_read_regs_through_mem(struct target *target, uint32_t addre
cortex_a8_dap_read_coreregister_u32(target, regfile, 0); cortex_a8_dap_read_coreregister_u32(target, regfile, 0);
cortex_a8_dap_write_coreregister_u32(target, address, 0); cortex_a8_dap_write_coreregister_u32(target, address, 0);
cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0)); cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0), NULL);
dap_ap_select(swjdp, swjdp_memoryap); dap_ap_select(swjdp, swjdp_memoryap);
mem_ap_read_buf_u32(swjdp, (uint8_t *)(&regfile[1]), 4*15, address); mem_ap_read_buf_u32(swjdp, (uint8_t *)(&regfile[1]), 4*15, address);
dap_ap_select(swjdp, swjdp_debugap); dap_ap_select(swjdp, swjdp_debugap);
@ -158,10 +165,15 @@ static int cortex_a8_read_cp(struct target *target, uint32_t *value, uint8_t CP,
int retval; int retval;
struct armv7a_common *armv7a = target_to_armv7a(target); struct armv7a_common *armv7a = target_to_armv7a(target);
struct swjdp_common *swjdp = &armv7a->swjdp_info; struct swjdp_common *swjdp = &armv7a->swjdp_info;
uint32_t dscr = 0;
/* MRC(...) to read coprocessor register into r0 */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(CP, op1, 0, CRn, CRm, op2),
&dscr);
cortex_a8_exec_opcode(target, ARMV4_5_MRC(CP, op1, 0, CRn, CRm, op2));
/* Move R0 to DTRTX */ /* Move R0 to DTRTX */
cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
&dscr);
/* Read DCCTX */ /* Read DCCTX */
retval = mem_ap_read_atomic_u32(swjdp, retval = mem_ap_read_atomic_u32(swjdp,
@ -187,16 +199,21 @@ static int cortex_a8_write_cp(struct target *target, uint32_t value,
{ {
LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr); LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
/* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode 0xEE000E15 */ /* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode 0xEE000E15 */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
&dscr);
} }
/* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
retval = mem_ap_write_u32(swjdp, retval = mem_ap_write_u32(swjdp,
armv7a->debug_base + CPUDBG_DTRRX, value); armv7a->debug_base + CPUDBG_DTRRX, value);
/* Move DTRRX to r0 */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
cortex_a8_exec_opcode(target, ARMV4_5_MCR(CP, op1, 0, CRn, CRm, op2)); /* Move DTRRX to r0 */
return retval; cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
/* MCR(...) to write r0 to coprocessor */
return cortex_a8_exec_opcode(target,
ARMV4_5_MCR(CP, op1, 0, CRn, CRm, op2),
&dscr);
} }
static int cortex_a8_read_cp15(struct target *target, uint32_t op1, uint32_t op2, static int cortex_a8_read_cp15(struct target *target, uint32_t op1, uint32_t op2,
@ -238,7 +255,7 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
{ {
int retval = ERROR_OK; int retval = ERROR_OK;
uint8_t reg = regnum&0xFF; uint8_t reg = regnum&0xFF;
uint32_t dscr; uint32_t dscr = 0;
struct armv7a_common *armv7a = target_to_armv7a(target); struct armv7a_common *armv7a = target_to_armv7a(target);
struct swjdp_common *swjdp = &armv7a->swjdp_info; struct swjdp_common *swjdp = &armv7a->swjdp_info;
@ -248,30 +265,35 @@ static int cortex_a8_dap_read_coreregister_u32(struct target *target,
if (reg < 15) if (reg < 15)
{ {
/* Rn to DCCTX, "MCR p14, 0, Rn, c0, c5, 0" 0xEE00nE15 */ /* Rn to DCCTX, "MCR p14, 0, Rn, c0, c5, 0" 0xEE00nE15 */
cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, reg, 0, 5, 0)); cortex_a8_exec_opcode(target,
ARMV4_5_MCR(14, 0, reg, 0, 5, 0),
&dscr);
} }
else if (reg == 15) else if (reg == 15)
{ {
/* "MOV r0, r15"; then move r0 to DCCTX */ /* "MOV r0, r15"; then move r0 to DCCTX */
cortex_a8_exec_opcode(target, 0xE1A0000F); cortex_a8_exec_opcode(target, 0xE1A0000F, &dscr);
cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target,
ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
&dscr);
} }
else else
{ {
/* "MRS r0, CPSR" or "MRS r0, SPSR" /* "MRS r0, CPSR" or "MRS r0, SPSR"
* then move r0 to DCCTX * then move r0 to DCCTX
*/ */
cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1)); cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1), &dscr);
cortex_a8_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target,
ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
&dscr);
} }
/* Read DTRRTX */ /* Wait for DTRRXfull then read DTRRTX */
do while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0)
{ {
retval = mem_ap_read_atomic_u32(swjdp, retval = mem_ap_read_atomic_u32(swjdp,
armv7a->debug_base + CPUDBG_DSCR, &dscr); armv7a->debug_base + CPUDBG_DSCR, &dscr);
} }
while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0); /* Wait for DTRRXfull */
retval = mem_ap_read_atomic_u32(swjdp, retval = mem_ap_read_atomic_u32(swjdp,
armv7a->debug_base + CPUDBG_DTRTX, value); armv7a->debug_base + CPUDBG_DTRTX, value);
@ -298,13 +320,14 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
{ {
LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr); LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
/* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode 0xEE000E15 */ /* Clear DCCRX with MCR(p14, 0, Rd, c0, c5, 0), opcode 0xEE000E15 */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
&dscr);
} }
if (Rd > 17) if (Rd > 17)
return retval; return retval;
/* Write to DCCRX */ /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
LOG_DEBUG("write DCC 0x%08" PRIx32, value); LOG_DEBUG("write DCC 0x%08" PRIx32, value);
retval = mem_ap_write_u32(swjdp, retval = mem_ap_write_u32(swjdp,
armv7a->debug_base + CPUDBG_DTRRX, value); armv7a->debug_base + CPUDBG_DTRRX, value);
@ -312,28 +335,33 @@ static int cortex_a8_dap_write_coreregister_u32(struct target *target,
if (Rd < 15) if (Rd < 15)
{ {
/* DCCRX to Rn, "MCR p14, 0, Rn, c0, c5, 0", 0xEE00nE15 */ /* DCCRX to Rn, "MCR p14, 0, Rn, c0, c5, 0", 0xEE00nE15 */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0)); cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0),
&dscr);
} }
else if (Rd == 15) else if (Rd == 15)
{ {
/* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15
* then "mov r15, r0" * then "mov r15, r0"
*/ */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
cortex_a8_exec_opcode(target, 0xE1A0F000); &dscr);
cortex_a8_exec_opcode(target, 0xE1A0F000, &dscr);
} }
else else
{ {
/* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15
* then "MSR CPSR_cxsf, r0" or "MSR SPSR_cxsf, r0" (all fields) * then "MSR CPSR_cxsf, r0" or "MSR SPSR_cxsf, r0" (all fields)
*/ */
cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1)); &dscr);
cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1),
&dscr);
/* "Prefetch flush" after modifying execution status in CPSR */ /* "Prefetch flush" after modifying execution status in CPSR */
if (Rd == 16) if (Rd == 16)
cortex_a8_exec_opcode(target, cortex_a8_exec_opcode(target,
ARMV4_5_MCR(15, 0, 0, 7, 5, 4)); ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
&dscr);
} }
return retval; return retval;
@ -354,6 +382,9 @@ static int cortex_a8_dap_write_memap_register_u32(struct target *target, uint32_
/* /*
* Cortex-A8 implementation of Debug Programmer's Model * Cortex-A8 implementation of Debug Programmer's Model
* *
* NOTE the invariant: these routines return with DSCR_INSTR_COMP set,
* so there's no need to poll for it before executing an instruction.
*
* NOTE that in several of these cases the "stall" mode might be useful. * NOTE that in several of these cases the "stall" mode might be useful.
* It'd let us queue a few operations together... prepare/finish might * It'd let us queue a few operations together... prepare/finish might
* be the places to enable/disable that mode. * be the places to enable/disable that mode.
@ -371,23 +402,30 @@ static int cortex_a8_write_dcc(struct cortex_a8_common *a8, uint32_t data)
a8->armv7a_common.debug_base + CPUDBG_DTRRX, data); a8->armv7a_common.debug_base + CPUDBG_DTRRX, data);
} }
static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data) static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
uint32_t *dscr_p)
{ {
struct swjdp_common *swjdp = &a8->armv7a_common.swjdp_info; struct swjdp_common *swjdp = &a8->armv7a_common.swjdp_info;
uint32_t dscr; uint32_t dscr = 1 << DSCR_INSTR_COMP;
int retval; int retval;
if (dscr_p)
dscr = *dscr_p;
/* Wait for DTRRXfull */ /* Wait for DTRRXfull */
do { while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0) {
retval = mem_ap_read_atomic_u32(swjdp, retval = mem_ap_read_atomic_u32(swjdp,
a8->armv7a_common.debug_base + CPUDBG_DSCR, a8->armv7a_common.debug_base + CPUDBG_DSCR,
&dscr); &dscr);
} while ((dscr & (1 << DSCR_DTR_TX_FULL)) == 0); }
retval = mem_ap_read_atomic_u32(swjdp, retval = mem_ap_read_atomic_u32(swjdp,
a8->armv7a_common.debug_base + CPUDBG_DTRTX, data); a8->armv7a_common.debug_base + CPUDBG_DTRTX, data);
LOG_DEBUG("read DCC 0x%08" PRIx32, *data); LOG_DEBUG("read DCC 0x%08" PRIx32, *data);
if (dscr_p)
*dscr_p = dscr;
return retval; return retval;
} }
@ -398,9 +436,12 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
uint32_t dscr; uint32_t dscr;
int retval; int retval;
retval = mem_ap_read_atomic_u32(swjdp, /* set up invariant: INSTR_COMP is set after ever DPM operation */
a8->armv7a_common.debug_base + CPUDBG_DSCR, do {
&dscr); retval = mem_ap_read_atomic_u32(swjdp,
a8->armv7a_common.debug_base + CPUDBG_DSCR,
&dscr);
} while ((dscr & (1 << DSCR_INSTR_COMP)) == 0);
/* this "should never happen" ... */ /* this "should never happen" ... */
if (dscr & (1 << DSCR_DTR_RX_FULL)) { if (dscr & (1 << DSCR_DTR_RX_FULL)) {
@ -408,7 +449,8 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
/* Clear DCCRX */ /* Clear DCCRX */
retval = cortex_a8_exec_opcode( retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
&dscr);
} }
return retval; return retval;
@ -425,18 +467,21 @@ static int cortex_a8_instr_write_data_dcc(struct arm_dpm *dpm,
{ {
struct cortex_a8_common *a8 = dpm_to_a8(dpm); struct cortex_a8_common *a8 = dpm_to_a8(dpm);
int retval; int retval;
uint32_t dscr = 1 << DSCR_INSTR_COMP;
retval = cortex_a8_write_dcc(a8, data); retval = cortex_a8_write_dcc(a8, data);
return cortex_a8_exec_opcode( return cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
opcode); opcode,
&dscr);
} }
static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm, static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
uint32_t opcode, uint32_t data) uint32_t opcode, uint32_t data)
{ {
struct cortex_a8_common *a8 = dpm_to_a8(dpm); struct cortex_a8_common *a8 = dpm_to_a8(dpm);
uint32_t dscr = 1 << DSCR_INSTR_COMP;
int retval; int retval;
retval = cortex_a8_write_dcc(a8, data); retval = cortex_a8_write_dcc(a8, data);
@ -444,12 +489,14 @@ static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
/* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 */ /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 */
retval = cortex_a8_exec_opcode( retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
ARMV4_5_MRC(14, 0, 0, 0, 5, 0)); ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
&dscr);
/* then the opcode, taking data from R0 */ /* then the opcode, taking data from R0 */
retval = cortex_a8_exec_opcode( retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
opcode); opcode,
&dscr);
return retval; return retval;
} }
@ -457,9 +504,12 @@ static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
static int cortex_a8_instr_cpsr_sync(struct arm_dpm *dpm) static int cortex_a8_instr_cpsr_sync(struct arm_dpm *dpm)
{ {
struct target *target = dpm->arm->target; struct target *target = dpm->arm->target;
uint32_t dscr = 1 << DSCR_INSTR_COMP;
/* "Prefetch flush" after modifying execution status in CPSR */ /* "Prefetch flush" after modifying execution status in CPSR */
return cortex_a8_exec_opcode(target, ARMV4_5_MCR(15, 0, 0, 7, 5, 4)); return cortex_a8_exec_opcode(target,
ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
&dscr);
} }
static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm, static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
@ -467,13 +517,15 @@ static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
{ {
struct cortex_a8_common *a8 = dpm_to_a8(dpm); struct cortex_a8_common *a8 = dpm_to_a8(dpm);
int retval; int retval;
uint32_t dscr = 1 << DSCR_INSTR_COMP;
/* the opcode, writing data to DCC */ /* the opcode, writing data to DCC */
retval = cortex_a8_exec_opcode( retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
opcode); opcode,
&dscr);
return cortex_a8_read_dcc(a8, data); return cortex_a8_read_dcc(a8, data, &dscr);
} }
@ -481,19 +533,22 @@ static int cortex_a8_instr_read_data_r0(struct arm_dpm *dpm,
uint32_t opcode, uint32_t *data) uint32_t opcode, uint32_t *data)
{ {
struct cortex_a8_common *a8 = dpm_to_a8(dpm); struct cortex_a8_common *a8 = dpm_to_a8(dpm);
uint32_t dscr = 1 << DSCR_INSTR_COMP;
int retval; int retval;
/* the opcode, writing data to R0 */ /* the opcode, writing data to R0 */
retval = cortex_a8_exec_opcode( retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
opcode); opcode,
&dscr);
/* write R0 to DCC */ /* write R0 to DCC */
retval = cortex_a8_exec_opcode( retval = cortex_a8_exec_opcode(
a8->armv7a_common.armv4_5_common.target, a8->armv7a_common.armv4_5_common.target,
ARMV4_5_MCR(14, 0, 0, 0, 5, 0)); ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
&dscr);
return cortex_a8_read_dcc(a8, data); return cortex_a8_read_dcc(a8, data, &dscr);
} }
static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr) static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)