From 72381bd55e4ce2aaed8660551e8f56a2c959c11f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 9 Oct 2009 19:14:43 +0200 Subject: [PATCH 01/21] amd64_edac: clarify DRAM CTL debug reporting Make debug info formulations about the DRAM and DCT configuration of the machine more human readable. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index a38831c8264..0252a61f3d2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1402,27 +1402,36 @@ static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW, &pvt->dram_ctl_select_low); if (err) { - debugf0("Reading F10_DCTL_SEL_LOW failed\n"); + debugf0("Reading F2x110 (DCTL Sel. Low) failed\n"); } else { - debugf0("DRAM_DCTL_SEL_LOW=0x%x DctSelBaseAddr=0x%x\n", - pvt->dram_ctl_select_low, dct_sel_baseaddr(pvt)); + debugf0("F2x110 (DCTL Sel. Low): 0x%08x, " + "High range addresses at: 0x%x\n", + pvt->dram_ctl_select_low, + dct_sel_baseaddr(pvt)); - debugf0(" DRAM DCTs are=%s DRAM Is=%s DRAM-Ctl-" - "sel-hi-range=%s\n", - (dct_ganging_enabled(pvt) ? "GANGED" : "NOT GANGED"), - (dct_dram_enabled(pvt) ? "Enabled" : "Disabled"), - (dct_high_range_enabled(pvt) ? "Enabled" : "Disabled")); + debugf0(" DCT mode: %s, All DCTs on: %s\n", + (dct_ganging_enabled(pvt) ? "ganged" : "unganged"), + (dct_dram_enabled(pvt) ? "yes" : "no")); - debugf0(" DctDatIntLv=%s MemCleared=%s DctSelIntLvAddr=0x%x\n", - (dct_data_intlv_enabled(pvt) ? "Enabled" : "Disabled"), - (dct_memory_cleared(pvt) ? "True " : "False "), + if (!dct_ganging_enabled(pvt)) + debugf0(" Address range split per DCT: %s\n", + (dct_high_range_enabled(pvt) ? "yes" : "no")); + + debugf0(" DCT data interleave for ECC: %s, " + "DRAM cleared since last warm reset: %s\n", + (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), + (dct_memory_cleared(pvt) ? "yes" : "no")); + + debugf0(" DCT channel interleave: %s, " + "DCT interleave bits selector: 0x%x\n", + (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), dct_sel_interleave_addr(pvt)); } err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH, &pvt->dram_ctl_select_high); if (err) - debugf0("Reading F10_DCTL_SEL_HIGH failed\n"); + debugf0("Reading F2x114 (DCT Sel. High) failed\n"); } /* From e97f8bb8ce5611a855c5a0dba949706ec37d4155 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 12 Oct 2009 15:27:45 +0200 Subject: [PATCH 02/21] amd64_edac: make DRAM regions output more human-readable Do not shift the TOP_MEM and TOP_MEM2 values by 23 but rather save the whole 64-bit value read from the MSR. Although the TOP_MEM/TOP_MEM2 bits are only a subset of the 64bit register, the values are correct since the remaining bits are Read-As-Zero and no shifting is needed. Also, cleanup DRAM base/limit debug output. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0252a61f3d2..3408b94b118 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2399,16 +2399,14 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since * those are Read-As-Zero */ - rdmsrl(MSR_K8_TOP_MEM1, msr_val); - pvt->top_mem = msr_val >> 23; - debugf0(" TOP_MEM=0x%08llx\n", pvt->top_mem); + rdmsrl(MSR_K8_TOP_MEM1, pvt->top_mem); + debugf0(" TOP_MEM: 0x%016llx\n", pvt->top_mem); /* check first whether TOP_MEM2 is enabled */ rdmsrl(MSR_K8_SYSCFG, msr_val); if (msr_val & (1U << 21)) { - rdmsrl(MSR_K8_TOP_MEM2, msr_val); - pvt->top_mem2 = msr_val >> 23; - debugf0(" TOP_MEM2=0x%08llx\n", pvt->top_mem2); + rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); + debugf0(" TOP_MEM2: 0x%016llx\n", pvt->top_mem2); } else debugf0(" TOP_MEM2 disabled.\n"); @@ -2434,13 +2432,12 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) * debug output block away. */ if (pvt->dram_rw_en[dram] != 0) { - debugf1(" DRAM_BASE[%d]: 0x%8.08x-%8.08x " - "DRAM_LIMIT: 0x%8.08x-%8.08x\n", + debugf1(" DRAM-BASE[%d]: 0x%016llx " + "DRAM-LIMIT: 0x%016llx\n", dram, - (u32)(pvt->dram_base[dram] >> 32), - (u32)(pvt->dram_base[dram] & 0xFFFFFFFF), - (u32)(pvt->dram_limit[dram] >> 32), - (u32)(pvt->dram_limit[dram] & 0xFFFFFFFF)); + pvt->dram_base[dram], + pvt->dram_limit[dram]); + debugf1(" IntlvEn=%s %s %s " "IntlvSel=%d DstNode=%d\n", pvt->dram_IntlvEn[dram] ? From ba578cb34a71fb08fff14ac0796b934a8c9991e1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:56:35 +1030 Subject: [PATCH 03/21] cpumask: use modern cpumask style in drivers/edac/amd64_edac.c cpumask_t -> struct cpumask, and don't put one on the stack. (Note: this is actually on the stack unless CONFIG_CPUMASK_OFFSTACK=y). Signed-off-by: Rusty Russell Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 3408b94b118..67541e7d1cf 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2631,7 +2631,7 @@ static int amd64_init_csrows(struct mem_ctl_info *mci) static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; - const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id); + const struct cpumask *cpumask = cpumask_of_node(pvt->mc_node_id); int cpu, idx = 0, err = 0; struct msr msrs[cpumask_weight(cpumask)]; u32 value; @@ -2707,7 +2707,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) { - const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id); + const struct cpumask *cpumask = cpumask_of_node(pvt->mc_node_id); int cpu, idx = 0, err = 0; struct msr msrs[cpumask_weight(cpumask)]; u32 value; @@ -2740,7 +2740,7 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid) { int cpu; @@ -2752,25 +2752,30 @@ static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid) /* check MCG_CTL on all the cpus on this node */ static bool amd64_nb_mce_bank_enabled_on_node(int nid) { - cpumask_t mask; + cpumask_var_t mask; struct msr *msrs; int cpu, nbe, idx = 0; bool ret = false; - cpumask_clear(&mask); + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { + amd64_printk(KERN_WARNING, "%s: error allocating mask\n", + __func__); + return false; + } - get_cpus_on_this_dct_cpumask(&mask, nid); + get_cpus_on_this_dct_cpumask(mask, nid); - msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL); + msrs = kzalloc(sizeof(struct msr) * cpumask_weight(mask), GFP_KERNEL); if (!msrs) { amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", __func__); + free_cpumask_var(mask); return false; } - rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs); + rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs); - for_each_cpu(cpu, &mask) { + for_each_cpu(cpu, mask) { nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE; debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", @@ -2786,6 +2791,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid) out: kfree(msrs); + free_cpumask_var(mask); return ret; } From f6d6ae965760906d79ab29bc38507608c5971549 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Nov 2009 15:29:26 +0100 Subject: [PATCH 04/21] amd64_edac: unify MCGCTL ECC switching Unify almost identical code into one function and remove NUMA-specific usage (specifically cpumask_of_node()) in favor of generic topology methods. Remove unused defines, while at it. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 254 +++++++++++++++++++++----------------- drivers/edac/amd64_edac.h | 9 +- 2 files changed, 142 insertions(+), 121 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 67541e7d1cf..70c7d5f5ba5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2624,121 +2624,6 @@ static int amd64_init_csrows(struct mem_ctl_info *mci) return empty; } -/* - * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we" - * enable it. - */ -static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) -{ - struct amd64_pvt *pvt = mci->pvt_info; - const struct cpumask *cpumask = cpumask_of_node(pvt->mc_node_id); - int cpu, idx = 0, err = 0; - struct msr msrs[cpumask_weight(cpumask)]; - u32 value; - u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; - - if (!ecc_enable_override) - return; - - memset(msrs, 0, sizeof(msrs)); - - amd64_printk(KERN_WARNING, - "'ecc_enable_override' parameter is active, " - "Enabling AMD ECC hardware now: CAUTION\n"); - - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); - if (err) - debugf0("Reading K8_NBCTL failed\n"); - - /* turn on UECCn and CECCEn bits */ - pvt->old_nbctl = value & mask; - pvt->nbctl_mcgctl_saved = 1; - - value |= mask; - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); - - rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); - - for_each_cpu(cpu, cpumask) { - if (msrs[idx].l & K8_MSR_MCGCTL_NBE) - set_bit(idx, &pvt->old_mcgctl); - - msrs[idx].l |= K8_MSR_MCGCTL_NBE; - idx++; - } - wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); - - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); - if (err) - debugf0("Reading K8_NBCFG failed\n"); - - debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, - (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", - (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); - - if (!(value & K8_NBCFG_ECC_ENABLE)) { - amd64_printk(KERN_WARNING, - "This node reports that DRAM ECC is " - "currently Disabled; ENABLING now\n"); - - /* Attempt to turn on DRAM ECC Enable */ - value |= K8_NBCFG_ECC_ENABLE; - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); - - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); - if (err) - debugf0("Reading K8_NBCFG failed\n"); - - if (!(value & K8_NBCFG_ECC_ENABLE)) { - amd64_printk(KERN_WARNING, - "Hardware rejects Enabling DRAM ECC checking\n" - "Check memory DIMM configuration\n"); - } else { - amd64_printk(KERN_DEBUG, - "Hardware accepted DRAM ECC Enable\n"); - } - } - debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, - (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", - (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); - - pvt->ctl_error_info.nbcfg = value; -} - -static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) -{ - const struct cpumask *cpumask = cpumask_of_node(pvt->mc_node_id); - int cpu, idx = 0, err = 0; - struct msr msrs[cpumask_weight(cpumask)]; - u32 value; - u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; - - if (!pvt->nbctl_mcgctl_saved) - return; - - memset(msrs, 0, sizeof(msrs)); - - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); - if (err) - debugf0("Reading K8_NBCTL failed\n"); - value &= ~mask; - value |= pvt->old_nbctl; - - /* restore the NB Enable MCGCTL bit */ - pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); - - rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); - - for_each_cpu(cpu, cpumask) { - msrs[idx].l &= ~K8_MSR_MCGCTL_NBE; - msrs[idx].l |= - test_bit(idx, &pvt->old_mcgctl) << K8_MSR_MCGCTL_NBE; - idx++; - } - - wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); -} - /* get all cores on this DCT */ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid) { @@ -2795,6 +2680,144 @@ out: return ret; } +static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) +{ + cpumask_var_t cmask; + struct msr *msrs = NULL; + int cpu, idx = 0; + + if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { + amd64_printk(KERN_WARNING, "%s: error allocating mask\n", + __func__); + return false; + } + + get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id); + + msrs = kzalloc(sizeof(struct msr) * cpumask_weight(cmask), GFP_KERNEL); + if (!msrs) { + amd64_printk(KERN_WARNING, "%s: error allocating msrs\n", + __func__); + return -ENOMEM; + } + + rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); + + for_each_cpu(cpu, cmask) { + + if (on) { + if (msrs[idx].l & K8_MSR_MCGCTL_NBE) + pvt->flags.ecc_report = 1; + + msrs[idx].l |= K8_MSR_MCGCTL_NBE; + } else { + /* + * Turn off ECC reporting only when it was off before + */ + if (!pvt->flags.ecc_report) + msrs[idx].l &= ~K8_MSR_MCGCTL_NBE; + } + idx++; + } + wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); + + kfree(msrs); + free_cpumask_var(cmask); + + return 0; +} + +/* + * Only if 'ecc_enable_override' is set AND BIOS had ECC disabled, do "we" + * enable it. + */ +static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + int err = 0; + u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; + + if (!ecc_enable_override) + return; + + amd64_printk(KERN_WARNING, + "'ecc_enable_override' parameter is active, " + "Enabling AMD ECC hardware now: CAUTION\n"); + + err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); + if (err) + debugf0("Reading K8_NBCTL failed\n"); + + /* turn on UECCn and CECCEn bits */ + pvt->old_nbctl = value & mask; + pvt->nbctl_mcgctl_saved = 1; + + value |= mask; + pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); + + if (amd64_toggle_ecc_err_reporting(pvt, ON)) + amd64_printk(KERN_WARNING, "Error enabling ECC reporting over " + "MCGCTL!\n"); + + err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); + if (err) + debugf0("Reading K8_NBCFG failed\n"); + + debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, + (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", + (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); + + if (!(value & K8_NBCFG_ECC_ENABLE)) { + amd64_printk(KERN_WARNING, + "This node reports that DRAM ECC is " + "currently Disabled; ENABLING now\n"); + + /* Attempt to turn on DRAM ECC Enable */ + value |= K8_NBCFG_ECC_ENABLE; + pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); + + err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); + if (err) + debugf0("Reading K8_NBCFG failed\n"); + + if (!(value & K8_NBCFG_ECC_ENABLE)) { + amd64_printk(KERN_WARNING, + "Hardware rejects Enabling DRAM ECC checking\n" + "Check memory DIMM configuration\n"); + } else { + amd64_printk(KERN_DEBUG, + "Hardware accepted DRAM ECC Enable\n"); + } + } + debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, + (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", + (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); + + pvt->ctl_error_info.nbcfg = value; +} + +static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) +{ + int err = 0; + u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; + + if (!pvt->nbctl_mcgctl_saved) + return; + + err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); + if (err) + debugf0("Reading K8_NBCTL failed\n"); + value &= ~mask; + value |= pvt->old_nbctl; + + /* restore the NB Enable MCGCTL bit */ + pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value); + + if (amd64_toggle_ecc_err_reporting(pvt, OFF)) + amd64_printk(KERN_WARNING, "Error restoring ECC reporting over " + "MCGCTL!\n"); +} + /* * EDAC requires that the BIOS have ECC enabled before taking over the * processing of ECC errors. This is because the BIOS can properly initialize @@ -2921,7 +2944,6 @@ static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl, pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->mc_type_index = mc_type_index; pvt->ops = family_ops(mc_type_index); - pvt->old_mcgctl = 0; /* * We have the dram_f2_ctl device as an argument, now go reserve its diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index c6f359a8520..bba6c944ff1 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -147,6 +147,8 @@ #define MAX_CS_COUNT 8 #define DRAM_REG_COUNT 8 +#define ON true +#define OFF false /* * PCI-defined configuration space registers @@ -386,10 +388,7 @@ enum { #define K8_NBCAP_DUAL_NODE BIT(1) #define K8_NBCAP_DCT_DUAL BIT(0) -/* - * MSR Regs - */ -#define K8_MSR_MCGCTL 0x017b +/* MSRs */ #define K8_MSR_MCGCTL_NBE BIT(4) #define K8_MSR_MC4CTL 0x0410 @@ -487,7 +486,6 @@ struct amd64_pvt { /* Save old hw registers' values before we modified them */ u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */ u32 old_nbctl; - unsigned long old_mcgctl; /* per core on this node */ /* MC Type Index value: socket F vs Family 10h */ u32 mc_type_index; @@ -495,6 +493,7 @@ struct amd64_pvt { /* misc settings */ struct flags { unsigned long cf8_extcfg:1; + unsigned long ecc_report:1; } flags; }; From 6ba5dcdc44624677bba0bef1dcb93a524f88f8c1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 13 Oct 2009 19:26:55 +0200 Subject: [PATCH 05/21] amd64_edac: wrap-up pci config read error handling Add a pci config read wrapper for signaling pci config space access errors instead of them being visible only on a debug build. This is important on amd64_edac since it uses all those pci config register values to access the DRAM/DIMM configuration of the nodes. In addition, the wrapper makes a _lot_ (look at the diffstat!) of error handling code superfluous and improves much of the overall code readability by removing error handling details out of the way. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 213 ++++++++++---------------------------- drivers/edac/amd64_edac.h | 16 +++ 2 files changed, 70 insertions(+), 159 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 70c7d5f5ba5..3e5ece6e7c9 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -164,11 +164,9 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) { struct amd64_pvt *pvt = mci->pvt_info; u32 scrubval = 0; - int status = -1, i, ret = 0; + int status = -1, i; - ret = pci_read_config_dword(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval); - if (ret) - debugf0("Reading K8_SCRCTRL failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval); scrubval = scrubval & 0x001F; @@ -909,26 +907,10 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) /* Read in both of DBAM registers */ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) { - int err = 0; - unsigned int reg; + amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM0, &pvt->dbam0); - reg = DBAM0; - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam0); - if (err) - goto err_reg; - - if (boot_cpu_data.x86 >= 0x10) { - reg = DBAM1; - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam1); - - if (err) - goto err_reg; - } - - return; - -err_reg: - debugf0("Error reading F2x%03x.\n", reg); + if (boot_cpu_data.x86 >= 0x10) + amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM1, &pvt->dbam1); } /* @@ -991,28 +973,21 @@ static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt) */ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) { - int cs, reg, err = 0; + int cs, reg; amd64_set_dct_base_and_mask(pvt); for (cs = 0; cs < pvt->cs_count; cs++) { reg = K8_DCSB0 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsb0[cs]); - if (unlikely(err)) - debugf0("Reading K8_DCSB0[%d] failed\n", cs); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsb0[cs])) debugf0(" DCSB0[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsb0[cs], reg); /* If DCT are NOT ganged, then read in DCT1's base */ if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) { reg = F10_DCSB1 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsb1[cs]); - if (unlikely(err)) - debugf0("Reading F10_DCSB1[%d] failed\n", cs); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, + &pvt->dcsb1[cs])) debugf0(" DCSB1[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsb1[cs], reg); } else { @@ -1022,26 +997,20 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) for (cs = 0; cs < pvt->num_dcsm; cs++) { reg = K8_DCSM0 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsm0[cs]); - if (unlikely(err)) - debugf0("Reading K8_DCSM0 failed\n"); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsm0[cs])) debugf0(" DCSM0[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsm0[cs], reg); /* If DCT are NOT ganged, then read in DCT1's mask */ if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) { reg = F10_DCSM1 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsm1[cs]); - if (unlikely(err)) - debugf0("Reading F10_DCSM1[%d] failed\n", cs); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, + &pvt->dcsm1[cs])) debugf0(" DCSM1[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsm1[cs], reg); - } else + } else { pvt->dcsm1[cs] = 0; + } } } @@ -1078,7 +1047,7 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) { int flag, err = 0; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); + err = amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); if (err) return err; @@ -1114,22 +1083,15 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) { u32 low; u32 off = dram << 3; /* 8 bytes between DRAM entries */ - int err; - err = pci_read_config_dword(pvt->addr_f1_ctl, - K8_DRAM_BASE_LOW + off, &low); - if (err) - debugf0("Reading K8_DRAM_BASE_LOW failed\n"); + amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_BASE_LOW + off, &low); /* Extract parts into separate data entries */ pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8; pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7; pvt->dram_rw_en[dram] = (low & 0x3); - err = pci_read_config_dword(pvt->addr_f1_ctl, - K8_DRAM_LIMIT_LOW + off, &low); - if (err) - debugf0("Reading K8_DRAM_LIMIT_LOW failed\n"); + amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_LIMIT_LOW + off, &low); /* * Extract parts into separate data entries. Limit is the HIGHEST memory @@ -1248,16 +1210,13 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) static int f10_early_channel_count(struct amd64_pvt *pvt) { int dbams[] = { DBAM0, DBAM1 }; - int err = 0, channels = 0; - int i, j; + int i, j, channels = 0; u32 dbam; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); - if (err) + if (amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0)) goto err_reg; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); - if (err) + if (amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1)) goto err_reg; /* If we are in 128 bit mode, then we are using 2 channels */ @@ -1283,8 +1242,7 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) * both controllers since DIMMs can be placed in either one. */ for (i = 0; i < ARRAY_SIZE(dbams); i++) { - err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam); - if (err) + if (amd64_read_pci_cfg(pvt->dram_f2_ctl, dbams[i], &dbam)) goto err_reg; for (j = 0; j < 4; j++) { @@ -1314,7 +1272,7 @@ static void amd64_setup(struct amd64_pvt *pvt) { u32 reg; - pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); + amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG); reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG; @@ -1326,7 +1284,7 @@ static void amd64_teardown(struct amd64_pvt *pvt) { u32 reg; - pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); + amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG; if (pvt->flags.cf8_extcfg) @@ -1355,10 +1313,10 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) high_offset = F10_DRAM_BASE_HIGH + (dram << 3); /* read the 'raw' DRAM BASE Address register */ - pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_base); + amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_base); /* Read from the ECS data register */ - pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_base); + amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_base); /* Extract parts into separate data entries */ pvt->dram_rw_en[dram] = (low_base & 0x3); @@ -1375,10 +1333,10 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3); /* read the 'raw' LIMIT registers */ - pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_limit); + amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_limit); /* Read from the ECS data register for the HIGH portion */ - pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_limit); + amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_limit); debugf0(" HW Regs: BASE=0x%08x-%08x LIMIT= 0x%08x-%08x\n", high_base, low_base, high_limit, low_limit); @@ -1397,13 +1355,9 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) { - int err = 0; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW, - &pvt->dram_ctl_select_low); - if (err) { - debugf0("Reading F2x110 (DCTL Sel. Low) failed\n"); - } else { + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW, + &pvt->dram_ctl_select_low)) { debugf0("F2x110 (DCTL Sel. Low): 0x%08x, " "High range addresses at: 0x%x\n", pvt->dram_ctl_select_low, @@ -1428,10 +1382,8 @@ static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) dct_sel_interleave_addr(pvt)); } - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH, - &pvt->dram_ctl_select_high); - if (err) - debugf0("Reading F2x114 (DCT Sel. High) failed\n"); + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH, + &pvt->dram_ctl_select_high); } /* @@ -2082,40 +2034,24 @@ static int amd64_get_error_info_regs(struct mem_ctl_info *mci, { struct amd64_pvt *pvt; struct pci_dev *misc_f3_ctl; - int err = 0; pvt = mci->pvt_info; misc_f3_ctl = pvt->misc_f3_ctl; - err = pci_read_config_dword(misc_f3_ctl, K8_NBSH, ®s->nbsh); - if (err) - goto err_reg; + if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSH, ®s->nbsh)) + return 0; if (!(regs->nbsh & K8_NBSH_VALID_BIT)) return 0; /* valid error, read remaining error information registers */ - err = pci_read_config_dword(misc_f3_ctl, K8_NBSL, ®s->nbsl); - if (err) - goto err_reg; - - err = pci_read_config_dword(misc_f3_ctl, K8_NBEAL, ®s->nbeal); - if (err) - goto err_reg; - - err = pci_read_config_dword(misc_f3_ctl, K8_NBEAH, ®s->nbeah); - if (err) - goto err_reg; - - err = pci_read_config_dword(misc_f3_ctl, K8_NBCFG, ®s->nbcfg); - if (err) - goto err_reg; + if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSL, ®s->nbsl) || + amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAL, ®s->nbeal) || + amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAH, ®s->nbeah) || + amd64_read_pci_cfg(misc_f3_ctl, K8_NBCFG, ®s->nbcfg)) + return 0; return 1; - -err_reg: - debugf0("Reading error info register failed\n"); - return 0; } /* @@ -2393,7 +2329,7 @@ static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt) static void amd64_read_mc_registers(struct amd64_pvt *pvt) { u64 msr_val; - int dram, err = 0; + int dram; /* * Retrieve TOP_MEM and TOP_MEM2; no masking off of reserved bits since @@ -2412,9 +2348,7 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) amd64_cpu_display_info(pvt); - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap); - if (err) - goto err_reg; + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap); if (pvt->ops->read_dram_ctl_register) pvt->ops->read_dram_ctl_register(pvt); @@ -2451,44 +2385,20 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) amd64_read_dct_base_mask(pvt); - err = pci_read_config_dword(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar); - if (err) - goto err_reg; - + amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar); amd64_read_dbam_reg(pvt); - err = pci_read_config_dword(pvt->misc_f3_ctl, - F10_ONLINE_SPARE, &pvt->online_spare); - if (err) - goto err_reg; + amd64_read_pci_cfg(pvt->misc_f3_ctl, + F10_ONLINE_SPARE, &pvt->online_spare); - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); - if (err) - goto err_reg; - - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); - if (err) - goto err_reg; + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); if (!dct_ganging_enabled(pvt)) { - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1, - &pvt->dclr1); - if (err) - goto err_reg; - - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCHR_1, - &pvt->dchr1); - if (err) - goto err_reg; + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); } - amd64_dump_misc_regs(pvt); - - return; - -err_reg: - debugf0("Reading an MC register failed\n"); - } /* @@ -2562,13 +2472,11 @@ static int amd64_init_csrows(struct mem_ctl_info *mci) struct csrow_info *csrow; struct amd64_pvt *pvt; u64 input_addr_min, input_addr_max, sys_addr; - int i, err = 0, empty = 1; + int i, empty = 1; pvt = mci->pvt_info; - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg); - if (err) - debugf0("Reading K8_NBCFG failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg); debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg, (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", @@ -2734,7 +2642,6 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; - int err = 0; u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; if (!ecc_enable_override) @@ -2744,9 +2651,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) "'ecc_enable_override' parameter is active, " "Enabling AMD ECC hardware now: CAUTION\n"); - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); - if (err) - debugf0("Reading K8_NBCTL failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value); /* turn on UECCn and CECCEn bits */ pvt->old_nbctl = value & mask; @@ -2759,9 +2664,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) amd64_printk(KERN_WARNING, "Error enabling ECC reporting over " "MCGCTL!\n"); - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); - if (err) - debugf0("Reading K8_NBCFG failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", @@ -2776,9 +2679,7 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) value |= K8_NBCFG_ECC_ENABLE; pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value); - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); - if (err) - debugf0("Reading K8_NBCFG failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); if (!(value & K8_NBCFG_ECC_ENABLE)) { amd64_printk(KERN_WARNING, @@ -2798,15 +2699,12 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) { - int err = 0; u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; if (!pvt->nbctl_mcgctl_saved) return; - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value); - if (err) - debugf0("Reading K8_NBCTL failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value); value &= ~mask; value |= pvt->old_nbctl; @@ -2832,13 +2730,10 @@ static const char *ecc_warning = static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) { u32 value; - int err = 0; u8 ecc_enabled = 0; bool nb_mce_en = false; - err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); - if (err) - debugf0("Reading K8_NBCTL failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value); ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); if (!ecc_enabled) diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index bba6c944ff1..16f2df449a0 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -565,6 +565,22 @@ static inline struct low_ops *family_ops(int index) return &amd64_family_types[index].ops; } +static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func) +{ + int err = 0; + + err = pci_read_config_dword(pdev, offset, val); + if (err) + amd64_printk(KERN_WARNING, "%s: error reading F%dx%x.\n", + func, PCI_FUNC(pdev->devfn), offset); + + return err; +} + +#define amd64_read_pci_cfg(pdev, offset, val) \ + amd64_read_pci_cfg_dword(pdev, offset, val, __func__) + /* * For future CPU versions, verify the following as new 'slow' rates appear and * modify the necessary skip values for the supported CPU. From 68798e176012750fe8487bcfa0aa66fee21eae3c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Nov 2009 16:18:33 +0100 Subject: [PATCH 06/21] amd64_edac: cleanup DRAM cfg low debug output Carve out the register-specific debug statements into a separate function, clarify meanings of the single bitfields in the register, remove irrelevant output and macros. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 65 +++++++++++++++++++-------------------- drivers/edac/amd64_edac.h | 2 -- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 3e5ece6e7c9..cdda8d469ca 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -825,31 +825,42 @@ static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt) static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt, int ganged); +static void amd64_dump_dramcfg_low(u32 dclr, int chan) +{ + debugf1("F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); + + debugf1(" DIMM type: %sbuffered; all DIMMs support ECC: %s\n", + (dclr & BIT(16)) ? "un" : "", + (dclr & BIT(19)) ? "yes" : "no"); + + debugf1(" PAR/ERR parity: %s\n", + (dclr & BIT(8)) ? "enabled" : "disabled"); + + debugf1(" DCT 128bit mode width: %s\n", + (dclr & BIT(11)) ? "128b" : "64b"); + + debugf1(" x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", + (dclr & BIT(12)) ? "yes" : "no", + (dclr & BIT(13)) ? "yes" : "no", + (dclr & BIT(14)) ? "yes" : "no", + (dclr & BIT(15)) ? "yes" : "no"); +} + /* Display and decode various NB registers for debug purposes. */ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) { int ganged; - debugf1(" nbcap:0x%8.08x DctDualCap=%s DualNode=%s 8-Node=%s\n", - pvt->nbcap, - (pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "True" : "False", - (pvt->nbcap & K8_NBCAP_DUAL_NODE) ? "True" : "False", - (pvt->nbcap & K8_NBCAP_8_NODE) ? "True" : "False"); - debugf1(" ECC Capable=%s ChipKill Capable=%s\n", - (pvt->nbcap & K8_NBCAP_SECDED) ? "True" : "False", - (pvt->nbcap & K8_NBCAP_CHIPKILL) ? "True" : "False"); - debugf1(" DramCfg0-low=0x%08x DIMM-ECC=%s Parity=%s Width=%s\n", - pvt->dclr0, - (pvt->dclr0 & BIT(19)) ? "Enabled" : "Disabled", - (pvt->dclr0 & BIT(8)) ? "Enabled" : "Disabled", - (pvt->dclr0 & BIT(11)) ? "128b" : "64b"); - debugf1(" DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s DIMM Type=%s\n", - (pvt->dclr0 & BIT(12)) ? "Y" : "N", - (pvt->dclr0 & BIT(13)) ? "Y" : "N", - (pvt->dclr0 & BIT(14)) ? "Y" : "N", - (pvt->dclr0 & BIT(15)) ? "Y" : "N", - (pvt->dclr0 & BIT(16)) ? "UN-Buffered" : "Buffered"); + debugf1("F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); + debugf1(" NB two channel DRAM capable: %s\n", + (pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "yes" : "no"); + + debugf1(" ECC capable: %s, ChipKill ECC capable: %s\n", + (pvt->nbcap & K8_NBCAP_SECDED) ? "yes" : "no", + (pvt->nbcap & K8_NBCAP_CHIPKILL) ? "yes" : "no"); + + amd64_dump_dramcfg_low(pvt->dclr0, 0); debugf1(" online-spare: 0x%8.08x\n", pvt->online_spare); @@ -877,20 +888,8 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) } /* Only if NOT ganged does dcl1 have valid info */ - if (!dct_ganging_enabled(pvt)) { - debugf1(" DramCfg1-low=0x%08x DIMM-ECC=%s Parity=%s " - "Width=%s\n", pvt->dclr1, - (pvt->dclr1 & BIT(19)) ? "Enabled" : "Disabled", - (pvt->dclr1 & BIT(8)) ? "Enabled" : "Disabled", - (pvt->dclr1 & BIT(11)) ? "128b" : "64b"); - debugf1(" DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s " - "DIMM Type=%s\n", - (pvt->dclr1 & BIT(12)) ? "Y" : "N", - (pvt->dclr1 & BIT(13)) ? "Y" : "N", - (pvt->dclr1 & BIT(14)) ? "Y" : "N", - (pvt->dclr1 & BIT(15)) ? "Y" : "N", - (pvt->dclr1 & BIT(16)) ? "UN-Buffered" : "Buffered"); - } + if (!dct_ganging_enabled(pvt)) + amd64_dump_dramcfg_low(pvt->dclr1, 1); /* * Determine if ganged and then dump memory sizes for first controller, diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 16f2df449a0..24e280423de 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -384,8 +384,6 @@ enum { #define K8_NBCAP_CORES (BIT(12)|BIT(13)) #define K8_NBCAP_CHIPKILL BIT(4) #define K8_NBCAP_SECDED BIT(3) -#define K8_NBCAP_8_NODE BIT(2) -#define K8_NBCAP_DUAL_NODE BIT(1) #define K8_NBCAP_DCT_DUAL BIT(0) /* MSRs */ From 8de1d91e628c19c1154ca15b1ea1fcc0a098b793 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 13:39:30 +0200 Subject: [PATCH 07/21] amd64_edac: cleanup rest of amd64_dump_misc_regs Clarify bitfields description, add PCI config function/offset names to registers for easy reference, simplify code layout, remove unneeded info. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cdda8d469ca..c65ad2d57e0 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -862,32 +862,23 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) amd64_dump_dramcfg_low(pvt->dclr0, 0); - debugf1(" online-spare: 0x%8.08x\n", pvt->online_spare); + debugf1("F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); - if (boot_cpu_data.x86 == 0xf) { - debugf1(" dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n", - pvt->dhar, dhar_base(pvt->dhar), - k8_dhar_offset(pvt->dhar)); - debugf1(" DramHoleValid=%s\n", - (pvt->dhar & DHAR_VALID) ? "True" : "False"); + debugf1("F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, " + "offset: 0x%08x\n", + pvt->dhar, + dhar_base(pvt->dhar), + (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt->dhar) + : f10_dhar_offset(pvt->dhar)); - debugf1(" dbam-dkt: 0x%8.08x\n", pvt->dbam0); + debugf1(" DramHoleValid: %s\n", + (pvt->dhar & DHAR_VALID) ? "yes" : "no"); - /* everything below this point is Fam10h and above */ + /* everything below this point is Fam10h and above */ + if (boot_cpu_data.x86 == 0xf) return; - } else { - debugf1(" dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n", - pvt->dhar, dhar_base(pvt->dhar), - f10_dhar_offset(pvt->dhar)); - debugf1(" DramMemHoistValid=%s DramHoleValid=%s\n", - (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) ? - "True" : "False", - (pvt->dhar & DHAR_VALID) ? - "True" : "False"); - } - - /* Only if NOT ganged does dcl1 have valid info */ + /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) amd64_dump_dramcfg_low(pvt->dclr1, 1); From 8566c4df1690f3862ae338a4c533f4bb5a863f9a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 13:48:28 +0200 Subject: [PATCH 08/21] amd64_edac: dump DIMM sizes on K8 too Extend f10_debug_display_dimm_sizes to dump the logical DIMMs configuration on K8 revF too. Remove the ganged arg since we print the DCT operating mode (ganged vs unganged) earlier. Also, DCT csrow configuration is relevant therefore dump it as KERN_DEBUG instead of only on debug builds. Remove misleading DIMM output since there's no reliable way of mapping of chip selects to actual physical DIMMs. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 42 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c65ad2d57e0..c6d1aed2943 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -822,8 +822,7 @@ static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt) } -static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt, - int ganged); +static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt); static void amd64_dump_dramcfg_low(u32 dclr, int chan) { @@ -875,8 +874,10 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) (pvt->dhar & DHAR_VALID) ? "yes" : "no"); /* everything below this point is Fam10h and above */ - if (boot_cpu_data.x86 == 0xf) + if (boot_cpu_data.x86 == 0xf) { + amd64_debug_display_dimm_sizes(0, pvt); return; + } /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) @@ -888,10 +889,10 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) */ ganged = dct_ganging_enabled(pvt); - f10_debug_display_dimm_sizes(0, pvt, ganged); + amd64_debug_display_dimm_sizes(0, pvt); if (!ganged) - f10_debug_display_dimm_sizes(1, pvt, ganged); + amd64_debug_display_dimm_sizes(1, pvt); } /* Read in both of DBAM registers */ @@ -1726,23 +1727,31 @@ static int map_dbam_to_csrow_size(int index) } /* - * debug routine to display the memory sizes of a DIMM (ganged or not) and it + * debug routine to display the memory sizes of all logical DIMMs and its * CSROWs as well */ -static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt, - int ganged) +static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) { int dimm, size0, size1; u32 dbam; u32 *dcsb; - debugf1(" dbam%d: 0x%8.08x CSROW is %s\n", ctrl, - ctrl ? pvt->dbam1 : pvt->dbam0, - ganged ? "GANGED - dbam1 not used" : "NON-GANGED"); + if (boot_cpu_data.x86 == 0xf) { + /* K8 families < revF not supported yet */ + if (pvt->ext_model < OPTERON_CPU_REV_F) + return; + else + WARN_ON(ctrl != 0); + } + + debugf1("F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", + ctrl, ctrl ? pvt->dbam1 : pvt->dbam0); dbam = ctrl ? pvt->dbam1 : pvt->dbam0; dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0; + edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); + /* Dump memory sizes for DIMM and its CSROWs */ for (dimm = 0; dimm < 4; dimm++) { @@ -1754,15 +1763,8 @@ static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt, if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE) size1 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam)); - debugf1(" CTRL-%d DIMM-%d=%5dMB CSROW-%d=%5dMB " - "CSROW-%d=%5dMB\n", - ctrl, - dimm, - size0 + size1, - dimm * 2, - size0, - dimm * 2 + 1, - size1); + edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n", + dimm * 2, size0, dimm * 2 + 1, size1); } } From d16149e8c378ab7011e600980af51d2477aa5307 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 19:55:49 +0200 Subject: [PATCH 09/21] amd64_edac: cleanup f10_early_channel_count Do not read DCLR[01] again since this is done in amd64_read_mc_registers() earlier. There can be more than two physical DIMMs present so clamp the channels value to max 2. Also, do not report DCT data width - it is also done earlier. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c6d1aed2943..ed9b07a4cf8 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1204,28 +1204,21 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) int i, j, channels = 0; u32 dbam; - if (amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0)) - goto err_reg; - - if (amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1)) - goto err_reg; - /* If we are in 128 bit mode, then we are using 2 channels */ if (pvt->dclr0 & F10_WIDTH_128) { - debugf0("Data WIDTH is 128 bits - 2 channels\n"); channels = 2; return channels; } /* - * Need to check if in UN-ganged mode: In such, there are 2 channels, - * but they are NOT in 128 bit mode and thus the above 'dcl0' status bit - * will be OFF. + * Need to check if in unganged mode: In such, there are 2 channels, + * but they are not in 128 bit mode and thus the above 'dclr0' status + * bit will be OFF. * * Need to check DCT0[0] and DCT1[0] to see if only one of them has * their CSEnable bit on. If so, then SINGLE DIMM case. */ - debugf0("Data WIDTH is NOT 128 bits - need more decoding\n"); + debugf0("Data width is not 128 bits - need more decoding\n"); /* * Check DRAM Bank Address Mapping values for each DIMM to see if there @@ -1244,6 +1237,9 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) } } + if (channels > 2) + channels = 2; + debugf0("MCT channel count: %d\n", channels); return channels; From 1433eb9903408d1801d19a9c49893120874abc12 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 21 Oct 2009 13:44:36 +0200 Subject: [PATCH 10/21] amd64_edac: enhance address to DRAM bank mapping Add cs mode to cs size mapping tables for DDR2 and DDR3 and F10 and all K8 flavors and remove klugdy table of pseudo values. Add a low_ops->dbam_to_cs member which is family-specific and replaces low_ops->dbam_map_to_pages since the pages calculation is a one liner now. Further cleanups, while at it: - shorten family name defines - align amd64_family_types struct members Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 194 ++++++++++++++++++-------------------- drivers/edac/amd64_edac.h | 32 +++---- 2 files changed, 107 insertions(+), 119 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ed9b07a4cf8..7e6705449dc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -19,26 +19,48 @@ static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES]; /* - * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only - * for DDR2 DRAM mapping. + * Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and + * later. */ -u32 revf_quad_ddr2_shift[] = { - 0, /* 0000b NULL DIMM (128mb) */ - 28, /* 0001b 256mb */ - 29, /* 0010b 512mb */ - 29, /* 0011b 512mb */ - 29, /* 0100b 512mb */ - 30, /* 0101b 1gb */ - 30, /* 0110b 1gb */ - 31, /* 0111b 2gb */ - 31, /* 1000b 2gb */ - 32, /* 1001b 4gb */ - 32, /* 1010b 4gb */ - 33, /* 1011b 8gb */ - 0, /* 1100b future */ - 0, /* 1101b future */ - 0, /* 1110b future */ - 0 /* 1111b future */ +static int ddr2_dbam_revCG[] = { + [0] = 32, + [1] = 64, + [2] = 128, + [3] = 256, + [4] = 512, + [5] = 1024, + [6] = 2048, +}; + +static int ddr2_dbam_revD[] = { + [0] = 32, + [1] = 64, + [2 ... 3] = 128, + [4] = 256, + [5] = 512, + [6] = 256, + [7] = 512, + [8 ... 9] = 1024, + [10] = 2048, +}; + +static int ddr2_dbam[] = { [0] = 128, + [1] = 256, + [2 ... 4] = 512, + [5 ... 6] = 1024, + [7 ... 8] = 2048, + [9 ... 10] = 4096, + [11] = 8192, +}; + +static int ddr3_dbam[] = { [0] = -1, + [1] = 256, + [2] = 512, + [3 ... 4] = -1, + [5 ... 6] = 1024, + [7 ... 8] = 2048, + [9 ... 10] = 4096, + [11] = 8192, }; /* @@ -187,7 +209,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) /* Map from a CSROW entry to the mask entry that operates on it */ static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) return csrow; else return csrow >> 1; @@ -435,7 +457,7 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 base; /* only revE and later have the DRAM Hole Address Register */ - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_E) { + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { debugf1(" revision %d for node %d does not support DHAR\n", pvt->ext_model, pvt->mc_node_id); return 1; @@ -795,7 +817,7 @@ static void amd64_cpu_display_info(struct amd64_pvt *pvt) edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n"); else if (boot_cpu_data.x86 == 0xf) edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n", - (pvt->ext_model >= OPTERON_CPU_REV_F) ? + (pvt->ext_model >= K8_REV_F) ? "Rev F or later" : "Rev E or earlier"); else /* we'll hardly ever ever get here */ @@ -811,7 +833,7 @@ static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt) int bit; enum dev_type edac_cap = EDAC_FLAG_NONE; - bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= OPTERON_CPU_REV_F) + bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) ? 19 : 17; @@ -936,7 +958,7 @@ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) { + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { pvt->dcsb_base = REV_E_DCSB_BASE_BITS; pvt->dcsm_mask = REV_E_DCSM_MASK_BITS; pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS; @@ -1009,7 +1031,7 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt) { enum mem_type type; - if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= OPTERON_CPU_REV_F) { + if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= K8_REV_F) { /* Rev F and later */ type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; } else { @@ -1042,7 +1064,7 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) if (err) return err; - if ((boot_cpu_data.x86_model >> 4) >= OPTERON_CPU_REV_F) { + if ((boot_cpu_data.x86_model >> 4) >= K8_REV_F) { /* RevF (NPT) and later */ flag = pvt->dclr0 & F10_WIDTH_128; } else { @@ -1158,36 +1180,18 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, } } -/* - * determrine the number of PAGES in for this DIMM's size based on its DRAM - * Address Mapping. - * - * First step is to calc the number of bits to shift a value of 1 left to - * indicate show many pages. Start with the DBAM value as the starting bits, - * then proceed to adjust those shift bits, based on CPU rev and the table. - * See BKDG on the DBAM - */ -static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) +static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode) { - int nr_pages; + int *dbam_map; - if (pvt->ext_model >= OPTERON_CPU_REV_F) { - nr_pages = 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT); - } else { - /* - * RevE and less section; this line is tricky. It collapses the - * table used by RevD and later to one that matches revisions CG - * and earlier. - */ - dram_map -= (pvt->ext_model >= OPTERON_CPU_REV_D) ? - (dram_map > 8 ? 4 : (dram_map > 5 ? - 3 : (dram_map > 2 ? 1 : 0))) : 0; + if (pvt->ext_model >= K8_REV_F) + dbam_map = ddr2_dbam; + else if (pvt->ext_model >= K8_REV_D) + dbam_map = ddr2_dbam_revD; + else + dbam_map = ddr2_dbam_revCG; - /* 25 shift is 32MiB minimum DIMM size in RevE and prior */ - nr_pages = 1 << (dram_map + 25 - PAGE_SHIFT); - } - - return nr_pages; + return dbam_map[cs_mode]; } /* @@ -1249,9 +1253,16 @@ err_reg: } -static int f10_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) +static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode) { - return 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT); + int *dbam_map; + + if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE) + dbam_map = ddr3_dbam; + else + dbam_map = ddr2_dbam; + + return dbam_map[cs_mode]; } /* Enable extended configuration access via 0xCF8 feature */ @@ -1705,23 +1716,6 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, } } -/* - * Input (@index) is the DBAM DIMM value (1 of 4) used as an index into a shift - * table (revf_quad_ddr2_shift) which starts at 128MB DIMM size. Index of 0 - * indicates an empty DIMM slot, as reported by Hardware on empty slots. - * - * Normalize to 128MB by subracting 27 bit shift. - */ -static int map_dbam_to_csrow_size(int index) -{ - int mega_bytes = 0; - - if (index > 0 && index <= DBAM_MAX_VALUE) - mega_bytes = ((128 << (revf_quad_ddr2_shift[index]-27))); - - return mega_bytes; -} - /* * debug routine to display the memory sizes of all logical DIMMs and its * CSROWs as well @@ -1734,7 +1728,7 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) if (boot_cpu_data.x86 == 0xf) { /* K8 families < revF not supported yet */ - if (pvt->ext_model < OPTERON_CPU_REV_F) + if (pvt->ext_model < K8_REV_F) return; else WARN_ON(ctrl != 0); @@ -1753,11 +1747,11 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) size0 = 0; if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE) - size0 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam)); + size0 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam)); size1 = 0; if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE) - size1 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam)); + size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam)); edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n", dimm * 2, size0, dimm * 2 + 1, size1); @@ -1780,8 +1774,8 @@ static int f10_probe_valid_hardware(struct amd64_pvt *pvt) * If we are on a DDR3 machine, we don't know yet if * we support that properly at this time */ - if ((pvt->dchr0 & F10_DCHR_Ddr3Mode) || - (pvt->dchr1 & F10_DCHR_Ddr3Mode)) { + if ((pvt->dchr0 & DDR3_MODE) || + (pvt->dchr1 & DDR3_MODE)) { amd64_printk(KERN_WARNING, "%s() This machine is running with DDR3 memory. " @@ -1817,11 +1811,11 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC, .ops = { - .early_channel_count = k8_early_channel_count, - .get_error_address = k8_get_error_address, - .read_dram_base_limit = k8_read_dram_base_limit, - .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, - .dbam_map_to_pages = k8_dbam_map_to_pages, + .early_channel_count = k8_early_channel_count, + .get_error_address = k8_get_error_address, + .read_dram_base_limit = k8_read_dram_base_limit, + .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, + .dbam_to_cs = k8_dbam_to_chip_select, } }, [F10_CPUS] = { @@ -1829,13 +1823,13 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC, .ops = { - .probe_valid_hardware = f10_probe_valid_hardware, - .early_channel_count = f10_early_channel_count, - .get_error_address = f10_get_error_address, - .read_dram_base_limit = f10_read_dram_base_limit, - .read_dram_ctl_register = f10_read_dram_ctl_register, - .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, - .dbam_map_to_pages = f10_dbam_map_to_pages, + .probe_valid_hardware = f10_probe_valid_hardware, + .early_channel_count = f10_early_channel_count, + .get_error_address = f10_get_error_address, + .read_dram_base_limit = f10_read_dram_base_limit, + .read_dram_ctl_register = f10_read_dram_ctl_register, + .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, + .dbam_to_cs = f10_dbam_to_chip_select, } }, [F11_CPUS] = { @@ -1843,13 +1837,13 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC, .ops = { - .probe_valid_hardware = f10_probe_valid_hardware, - .early_channel_count = f10_early_channel_count, - .get_error_address = f10_get_error_address, - .read_dram_base_limit = f10_read_dram_base_limit, - .read_dram_ctl_register = f10_read_dram_ctl_register, - .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, - .dbam_map_to_pages = f10_dbam_map_to_pages, + .probe_valid_hardware = f10_probe_valid_hardware, + .early_channel_count = f10_early_channel_count, + .get_error_address = f10_get_error_address, + .read_dram_base_limit = f10_read_dram_base_limit, + .read_dram_ctl_register = f10_read_dram_ctl_register, + .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, + .dbam_to_cs = f10_dbam_to_chip_select, } }, }; @@ -2425,7 +2419,7 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) */ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt) { - u32 dram_map, nr_pages; + u32 cs_mode, nr_pages; /* * The math on this doesn't look right on the surface because x/2*4 can @@ -2434,9 +2428,9 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt) * number of bits to shift the DBAM register to extract the proper CSROW * field. */ - dram_map = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF; + cs_mode = (pvt->dbam0 >> ((csrow_nr / 2) * 4)) & 0xF; - nr_pages = pvt->ops->dbam_map_to_pages(pvt, dram_map); + nr_pages = pvt->ops->dbam_to_cs(pvt, cs_mode) << (20 - PAGE_SHIFT); /* * If dual channel then double the memory size of single channel. @@ -2444,7 +2438,7 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt) */ nr_pages <<= (pvt->channel_count - 1); - debugf0(" (csrow=%d) DBAM map index= %d\n", csrow_nr, dram_map); + debugf0(" (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode); debugf0(" nr_pages= %u channel-count = %d\n", nr_pages, pvt->channel_count); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 24e280423de..f8c187ea6e3 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -135,13 +135,9 @@ #define EDAC_MAX_NUMNODES 8 /* Extended Model from CPUID, for CPU Revision numbers */ -#define OPTERON_CPU_LE_REV_C 0 -#define OPTERON_CPU_REV_D 1 -#define OPTERON_CPU_REV_E 2 - -/* NPT processors have the following Extended Models */ -#define OPTERON_CPU_REV_F 4 -#define OPTERON_CPU_REV_FA 5 +#define K8_REV_D 1 +#define K8_REV_E 2 +#define K8_REV_F 4 /* Hardware limit on ChipSelect rows per MC and processors per system */ #define MAX_CS_COUNT 8 @@ -243,7 +239,7 @@ #define F10_DCHR_1 0x194 #define F10_DCHR_FOUR_RANK_DIMM BIT(18) -#define F10_DCHR_Ddr3Mode BIT(8) +#define DDR3_MODE BIT(8) #define F10_DCHR_MblMode BIT(6) @@ -501,7 +497,6 @@ struct scrubrate { }; extern struct scrubrate scrubrates[23]; -extern u32 revf_quad_ddr2_shift[16]; extern const char *tt_msgs[4]; extern const char *ll_msgs[4]; extern const char *rrrr_msgs[16]; @@ -531,17 +526,16 @@ extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS], * functions and per device encoding/decoding logic. */ struct low_ops { - int (*probe_valid_hardware)(struct amd64_pvt *pvt); - int (*early_channel_count)(struct amd64_pvt *pvt); + int (*probe_valid_hardware) (struct amd64_pvt *pvt); + int (*early_channel_count) (struct amd64_pvt *pvt); - u64 (*get_error_address)(struct mem_ctl_info *mci, - struct err_regs *info); - void (*read_dram_base_limit)(struct amd64_pvt *pvt, int dram); - void (*read_dram_ctl_register)(struct amd64_pvt *pvt); - void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, - struct err_regs *info, - u64 SystemAddr); - int (*dbam_map_to_pages)(struct amd64_pvt *pvt, int dram_map); + u64 (*get_error_address) (struct mem_ctl_info *mci, + struct err_regs *info); + void (*read_dram_base_limit) (struct amd64_pvt *pvt, int dram); + void (*read_dram_ctl_register) (struct amd64_pvt *pvt); + void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, + struct err_regs *info, u64 SystemAddr); + int (*dbam_to_cs) (struct amd64_pvt *pvt, int cs_mode); }; struct amd64_family_type { From ad858bfa14277a22a343bb89e99c9af0a97fc456 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 26 Oct 2009 14:55:56 +0100 Subject: [PATCH 11/21] amd64_edac: remove superfluous dbg printk Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 7e6705449dc..86742aaa5e5 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1336,9 +1336,6 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) /* Read from the ECS data register for the HIGH portion */ amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_limit); - debugf0(" HW Regs: BASE=0x%08x-%08x LIMIT= 0x%08x-%08x\n", - high_base, low_base, high_limit, low_limit); - pvt->dram_DstNode[dram] = (low_limit & 0x7); pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7; From 44e9e2ee2196fdec9893371d36c33e703965f804 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 26 Oct 2009 15:00:19 +0100 Subject: [PATCH 12/21] amd64_edac: rename StinkyIdentifier SystemAddress -> sys_addr Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 86742aaa5e5..202f08e543e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1117,7 +1117,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, struct err_regs *info, - u64 SystemAddress) + u64 sys_addr) { struct mem_ctl_info *src_mci; unsigned short syndrome; @@ -1152,28 +1152,28 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * was obtained from email communication with someone at AMD. * (Wish the email was placed in this comment - norsk) */ - channel = ((SystemAddress & BIT(3)) != 0); + channel = ((sys_addr & BIT(3)) != 0); } /* * Find out which node the error address belongs to. This may be * different from the node that detected the error. */ - src_mci = find_mc_by_sys_addr(mci, SystemAddress); + src_mci = find_mc_by_sys_addr(mci, sys_addr); if (!src_mci) { amd64_mc_printk(mci, KERN_ERR, "failed to map error address 0x%lx to a node\n", - (unsigned long)SystemAddress); + (unsigned long)sys_addr); edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); return; } - /* Now map the SystemAddress to a CSROW */ - csrow = sys_addr_to_csrow(src_mci, SystemAddress); + /* Now map the sys_addr to a CSROW */ + csrow = sys_addr_to_csrow(src_mci, sys_addr); if (csrow < 0) { edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR); } else { - error_address_to_page_and_offset(SystemAddress, &page, &offset); + error_address_to_page_and_offset(sys_addr, &page, &offset); edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow, channel, EDAC_MOD_STR); @@ -2108,7 +2108,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct err_regs *info) { struct amd64_pvt *pvt = mci->pvt_info; - u64 SystemAddress; + u64 sys_addr; /* Ensure that the Error Address is VALID */ if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) { @@ -2118,12 +2118,12 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, return; } - SystemAddress = extract_error_address(mci, info); + sys_addr = extract_error_address(mci, info); amd64_mc_printk(mci, KERN_ERR, - "CE ERROR_ADDRESS= 0x%llx\n", SystemAddress); + "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); - pvt->ops->map_sysaddr_to_csrow(mci, info, SystemAddress); + pvt->ops->map_sysaddr_to_csrow(mci, info, sys_addr); } /* Handle any Un-correctable Errors (UEs) */ @@ -2131,7 +2131,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct err_regs *info) { int csrow; - u64 SystemAddress; + u64 sys_addr; u32 page, offset; struct mem_ctl_info *log_mci, *src_mci = NULL; @@ -2144,31 +2144,31 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, return; } - SystemAddress = extract_error_address(mci, info); + sys_addr = extract_error_address(mci, info); /* * Find out which node the error address belongs to. This may be * different from the node that detected the error. */ - src_mci = find_mc_by_sys_addr(mci, SystemAddress); + src_mci = find_mc_by_sys_addr(mci, sys_addr); if (!src_mci) { amd64_mc_printk(mci, KERN_CRIT, "ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n", - (unsigned long)SystemAddress); + (unsigned long)sys_addr); edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); return; } log_mci = src_mci; - csrow = sys_addr_to_csrow(log_mci, SystemAddress); + csrow = sys_addr_to_csrow(log_mci, sys_addr); if (csrow < 0) { amd64_mc_printk(mci, KERN_CRIT, "ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n", - (unsigned long)SystemAddress); + (unsigned long)sys_addr); edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); } else { - error_address_to_page_and_offset(SystemAddress, &page, &offset); + error_address_to_page_and_offset(sys_addr, &page, &offset); edac_mc_handle_ue(log_mci, page, offset, csrow, EDAC_MOD_STR); } } From 1f6bcee75e83bc5b580bfa5b909b1b5ce106b800 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 13 Nov 2009 14:02:57 +0100 Subject: [PATCH 13/21] amd64_edac: remove unneeded extract_error_address wrapper Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 202f08e543e..c403af2e008 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -763,21 +763,6 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow, *input_addr_max = base | mask | pvt->dcs_mask_notused; } -/* - * Extract error address from MCA NB Address Low (section 3.6.4.5) and MCA NB - * Address High (section 3.6.4.6) register values and return the result. Address - * is located in the info structure (nbeah and nbeal), the encoding is device - * specific. - */ -static u64 extract_error_address(struct mem_ctl_info *mci, - struct err_regs *info) -{ - struct amd64_pvt *pvt = mci->pvt_info; - - return pvt->ops->get_error_address(mci, info); -} - - /* Map the Error address to a PAGE and PAGE OFFSET. */ static inline void error_address_to_page_and_offset(u64 error_address, u32 *page, u32 *offset) @@ -2118,7 +2103,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, return; } - sys_addr = extract_error_address(mci, info); + sys_addr = pvt->ops->get_error_address(mci, info); amd64_mc_printk(mci, KERN_ERR, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); @@ -2130,10 +2115,11 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, static void amd64_handle_ue(struct mem_ctl_info *mci, struct err_regs *info) { + struct amd64_pvt *pvt = mci->pvt_info; + struct mem_ctl_info *log_mci, *src_mci = NULL; int csrow; u64 sys_addr; u32 page, offset; - struct mem_ctl_info *log_mci, *src_mci = NULL; log_mci = mci; @@ -2144,7 +2130,7 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, return; } - sys_addr = extract_error_address(mci, info); + sys_addr = pvt->ops->get_error_address(mci, info); /* * Find out which node the error address belongs to. This may be From cec7924f568eddcccdbfd814b136554b1b8dc624 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Oct 2009 19:12:02 +0100 Subject: [PATCH 14/21] edac, mce: update AMD F10h revD check F10h revD start with model number 8. Signed-off-by: Borislav Petkov --- drivers/edac/edac_mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 689cc6a6214..c693fcc2213 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -306,7 +306,7 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors) * value encoding has changed so interpret those differently */ if ((boot_cpu_data.x86 == 0x10) && - (boot_cpu_data.x86_model > 8)) { + (boot_cpu_data.x86_model > 7)) { if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); } else { From 239642fe19adc19ba0a69e96f3b1904dfd6a3b9f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Nov 2009 15:33:16 +0100 Subject: [PATCH 15/21] edac: add memory types strings for debugging Instead of using deeply-nested conditionals for dumping the DIMM type in debug mode, add a strings array of the supported DIMM types. This is useful in cases where an edac driver supports multiple DRAM types and is only defined in debug builds. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 5 +---- drivers/edac/edac_core.h | 1 + drivers/edac/edac_mc.c | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c403af2e008..708d065efc9 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1024,10 +1024,7 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt) type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; } - debugf1(" Memory type is: %s\n", - (type == MEM_DDR2) ? "MEM_DDR2" : - (type == MEM_RDDR2) ? "MEM_RDDR2" : - (type == MEM_DDR) ? "MEM_DDR" : "MEM_RDDR"); + debugf1(" Memory type is: %s\n", edac_mem_types[type]); return type; } diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 12f355cafdb..001b2e797fb 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -74,6 +74,7 @@ #ifdef CONFIG_EDAC_DEBUG extern int edac_debug_level; +extern const char *edac_mem_types[]; #ifndef CONFIG_EDAC_DEBUG_VERBOSE #define edac_debug_printk(level, fmt, arg...) \ diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index b629c41756f..3630308e7b8 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -76,6 +76,30 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci) debugf3("\tpvt_info = %p\n\n", mci->pvt_info); } +/* + * keep those in sync with the enum mem_type + */ +const char *edac_mem_types[] = { + "Empty csrow", + "Reserved csrow type", + "Unknown csrow type", + "Fast page mode RAM", + "Extended data out RAM", + "Burst Extended data out RAM", + "Single data rate SDRAM", + "Registered single data rate SDRAM", + "Double data rate SDRAM", + "Registered Double data rate SDRAM", + "Rambus DRAM", + "Unbuffered DDR2 RAM", + "Fully buffered DDR2", + "Registered DDR2 RAM", + "Rambus XDR", + "Unbuffered DDR3 RAM", + "Registered DDR3 RAM", +}; +EXPORT_SYMBOL_GPL(edac_mem_types); + #endif /* CONFIG_EDAC_DEBUG */ /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. From 6b4c0bdeb00f35cad2d3e0dc0d97bb4950a8f86e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Nov 2009 15:37:57 +0100 Subject: [PATCH 16/21] amd64_edac: detect DDR3 memory type Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 708d065efc9..d9cde7132e8 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1017,10 +1017,11 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt) enum mem_type type; if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= K8_REV_F) { - /* Rev F and later */ - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; + if (pvt->dchr0 & DDR3_MODE) + type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; + else + type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; } else { - /* Rev E and earlier */ type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; } From 986a42a25059143d153e30a0cc36630bd0e623c6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 11 Nov 2009 20:42:46 +0100 Subject: [PATCH 17/21] amd64_edac: remove early hw support check The .probe_valid_hardware low_ops member checked whether the DCTs are in DDR3 mode and bailed out if so. Now that all the needed changes for DDR3 support is in place, remove it. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 47 +-------------------------------------- drivers/edac/amd64_edac.h | 1 - 2 files changed, 1 insertion(+), 47 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d9cde7132e8..351334ead69 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1738,42 +1738,6 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) } } -/* - * Very early hardware probe on pci_probe thread to determine if this module - * supports the hardware. - * - * Return: - * 0 for OK - * 1 for error - */ -static int f10_probe_valid_hardware(struct amd64_pvt *pvt) -{ - int ret = 0; - - /* - * If we are on a DDR3 machine, we don't know yet if - * we support that properly at this time - */ - if ((pvt->dchr0 & DDR3_MODE) || - (pvt->dchr1 & DDR3_MODE)) { - - amd64_printk(KERN_WARNING, - "%s() This machine is running with DDR3 memory. " - "This is not currently supported. " - "DCHR0=0x%x DCHR1=0x%x\n", - __func__, pvt->dchr0, pvt->dchr1); - - amd64_printk(KERN_WARNING, - " Contact '%s' module MAINTAINER to help add" - " support.\n", - EDAC_MOD_STR); - - ret = 1; - - } - return ret; -} - /* * There currently are 3 types type of MC devices for AMD Athlon/Opterons * (as per PCI DEVICE_IDs): @@ -1803,7 +1767,6 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC, .ops = { - .probe_valid_hardware = f10_probe_valid_hardware, .early_channel_count = f10_early_channel_count, .get_error_address = f10_get_error_address, .read_dram_base_limit = f10_read_dram_base_limit, @@ -1817,7 +1780,6 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC, .ops = { - .probe_valid_hardware = f10_probe_valid_hardware, .early_channel_count = f10_early_channel_count, .get_error_address = f10_get_error_address, .read_dram_base_limit = f10_read_dram_base_limit, @@ -2851,17 +2813,10 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) { int node_id = pvt->mc_node_id; struct mem_ctl_info *mci; - int ret, err = 0; + int ret; amd64_read_mc_registers(pvt); - ret = -ENODEV; - if (pvt->ops->probe_valid_hardware) { - err = pvt->ops->probe_valid_hardware(pvt); - if (err) - goto err_exit; - } - /* * We need to determine how many memory channels there are. Then use * that information for calculating the size of the dynamic instance diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index f8c187ea6e3..e84f164034d 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -526,7 +526,6 @@ extern struct mcidev_sysfs_attribute amd64_dbg_attrs[NUM_DBG_ATTRS], * functions and per device encoding/decoding logic. */ struct low_ops { - int (*probe_valid_hardware) (struct amd64_pvt *pvt); int (*early_channel_count) (struct amd64_pvt *pvt); u64 (*get_error_address) (struct mem_ctl_info *mci, From bfc04aec7d687282b5e7adb26799d3eb50d05f01 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Nov 2009 19:05:07 +0100 Subject: [PATCH 18/21] amd64_edac: add a leaner syndrome decoding algorithm Instead of using the whole syndrome tables for channel decoding, use a set of eigenvectors with which the tables can be generated to search for the syndrome in error. The algorithm operates independently of symbol size and can be used for both x4 and x8 syndromes. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 280 +++++++++++++++++++++----------------- 1 file changed, 154 insertions(+), 126 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 351334ead69..0969a404f84 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -792,7 +792,7 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } -static int get_channel_from_ecc_syndrome(unsigned short syndrome); +static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); static void amd64_cpu_display_info(struct amd64_pvt *pvt) { @@ -1113,7 +1113,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, /* CHIPKILL enabled */ if (info->nbcfg & K8_NBCFG_CHIPKILL) { - channel = get_channel_from_ecc_syndrome(syndrome); + channel = get_channel_from_ecc_syndrome(mci, syndrome); if (channel < 0) { /* * Syndrome didn't map, so we don't know which of the @@ -1672,7 +1672,7 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * syndrome to isolate which channel the error was on. */ if (pvt->nbcfg & K8_NBCFG_CHIPKILL) - chan = get_channel_from_ecc_syndrome(syndrome); + chan = get_channel_from_ecc_syndrome(mci, syndrome); if (chan >= 0) { edac_mc_handle_ce(mci, page, offset, syndrome, @@ -1808,142 +1808,170 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, } /* - * syndrome mapping table for ECC ChipKill devices + * These are tables of eigenvectors (one per line) which can be used for the + * construction of the syndrome tables. The modified syndrome search algorithm + * uses those to find the symbol in error and thus the DIMM. * - * The comment in each row is the token (nibble) number that is in error. - * The least significant nibble of the syndrome is the mask for the bits - * that are in error (need to be toggled) for the particular nibble. - * - * Each row contains 16 entries. - * The first entry (0th) is the channel number for that row of syndromes. - * The remaining 15 entries are the syndromes for the respective Error - * bit mask index. - * - * 1st index entry is 0x0001 mask, indicating that the rightmost bit is the - * bit in error. - * The 2nd index entry is 0x0010 that the second bit is damaged. - * The 3rd index entry is 0x0011 indicating that the rightmost 2 bits - * are damaged. - * Thus so on until index 15, 0x1111, whose entry has the syndrome - * indicating that all 4 bits are damaged. - * - * A search is performed on this table looking for a given syndrome. - * - * See the AMD documentation for ECC syndromes. This ECC table is valid - * across all the versions of the AMD64 processors. - * - * A fast lookup is to use the LAST four bits of the 16-bit syndrome as a - * COLUMN index, then search all ROWS of that column, looking for a match - * with the input syndrome. The ROW value will be the token number. - * - * The 0'th entry on that row, can be returned as the CHANNEL (0 or 1) of this - * error. + * Algorithm courtesy of Ross LaFetra from AMD. */ -#define NUMBER_ECC_ROWS 36 -static const unsigned short ecc_chipkill_syndromes[NUMBER_ECC_ROWS][16] = { - /* Channel 0 syndromes */ - {/*0*/ 0, 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57, - 0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df }, - {/*1*/ 0, 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7, - 0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f }, - {/*2*/ 0, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f }, - {/*3*/ 0, 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057, - 0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df }, - {/*4*/ 0, 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097, - 0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f }, - {/*5*/ 0, 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857, - 0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf }, - {/*6*/ 0, 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467, - 0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f }, - {/*7*/ 0, 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27, - 0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff }, - {/*8*/ 0, 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177, - 0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f }, - {/*9*/ 0, 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07, - 0xca08, 0xf709, 0xdc0a, 0xe10b, 0x4f0c, 0x720d, 0x590e, 0x640f }, - {/*a*/ 0, 0x9801, 0xec02, 0x7403, 0x6b04, 0xf305, 0x8706, 0x1f07, - 0xbd08, 0x2509, 0x510a, 0xc90b, 0xd60c, 0x4e0d, 0x3a0e, 0xa20f }, - {/*b*/ 0, 0xd131, 0x6212, 0xb323, 0x3884, 0xe9b5, 0x5a96, 0x8ba7, - 0x1cc8, 0xcdf9, 0x7eda, 0xafeb, 0x244c, 0xf57d, 0x465e, 0x976f }, - {/*c*/ 0, 0xe1d1, 0x7262, 0x93b3, 0xb834, 0x59e5, 0xca56, 0x2b87, - 0xdc18, 0x3dc9, 0xae7a, 0x4fab, 0x542c, 0x85fd, 0x164e, 0xf79f }, - {/*d*/ 0, 0x6051, 0xb0a2, 0xd0f3, 0x1094, 0x70c5, 0xa036, 0xc067, - 0x20e8, 0x40b9, 0x904a, 0x601b, 0x307c, 0x502d, 0x80de, 0xe08f }, - {/*e*/ 0, 0xa4c1, 0xf842, 0x5c83, 0xe6f4, 0x4235, 0x1eb6, 0xba77, - 0x7b58, 0xdf99, 0x831a, 0x27db, 0x9dac, 0x396d, 0x65ee, 0xc12f }, - {/*f*/ 0, 0x11c1, 0x2242, 0x3383, 0xc8f4, 0xd935, 0xeab6, 0xfb77, - 0x4c58, 0x5d99, 0x6e1a, 0x7fdb, 0x84ac, 0x956d, 0xa6ee, 0xb72f }, - - /* Channel 1 syndromes */ - {/*10*/ 1, 0x45d1, 0x8a62, 0xcfb3, 0x5e34, 0x1be5, 0xd456, 0x9187, - 0xa718, 0xe2c9, 0x2d7a, 0x68ab, 0xf92c, 0xbcfd, 0x734e, 0x369f }, - {/*11*/ 1, 0x63e1, 0xb172, 0xd293, 0x14b4, 0x7755, 0xa5c6, 0xc627, - 0x28d8, 0x4b39, 0x99aa, 0xfa4b, 0x3c6c, 0x5f8d, 0x8d1e, 0xeeff }, - {/*12*/ 1, 0xb741, 0xd982, 0x6ec3, 0x2254, 0x9515, 0xfbd6, 0x4c97, - 0x33a8, 0x84e9, 0xea2a, 0x5d6b, 0x11fc, 0xa6bd, 0xc87e, 0x7f3f }, - {/*13*/ 1, 0xdd41, 0x6682, 0xbbc3, 0x3554, 0xe815, 0x53d6, 0xce97, - 0x1aa8, 0xc7e9, 0x7c2a, 0xa1fb, 0x2ffc, 0xf2bd, 0x497e, 0x943f }, - {/*14*/ 1, 0x2bd1, 0x3d62, 0x16b3, 0x4f34, 0x64e5, 0x7256, 0x5987, - 0x8518, 0xaec9, 0xb87a, 0x93ab, 0xca2c, 0xe1fd, 0xf74e, 0xdc9f }, - {/*15*/ 1, 0x83c1, 0xc142, 0x4283, 0xa4f4, 0x2735, 0x65b6, 0xe677, - 0xf858, 0x7b99, 0x391a, 0xbadb, 0x5cac, 0xdf6d, 0x9dee, 0x1e2f }, - {/*16*/ 1, 0x8fd1, 0xc562, 0x4ab3, 0xa934, 0x26e5, 0x6c56, 0xe387, - 0xfe18, 0x71c9, 0x3b7a, 0xb4ab, 0x572c, 0xd8fd, 0x924e, 0x1d9f }, - {/*17*/ 1, 0x4791, 0x89e2, 0xce73, 0x5264, 0x15f5, 0xdb86, 0x9c17, - 0xa3b8, 0xe429, 0x2a5a, 0x6dcb, 0xf1dc, 0xb64d, 0x783e, 0x3faf }, - {/*18*/ 1, 0x5781, 0xa9c2, 0xfe43, 0x92a4, 0xc525, 0x3b66, 0x6ce7, - 0xe3f8, 0xb479, 0x4a3a, 0x1dbb, 0x715c, 0x26dd, 0xd89e, 0x8f1f }, - {/*19*/ 1, 0xbf41, 0xd582, 0x6ac3, 0x2954, 0x9615, 0xfcd6, 0x4397, - 0x3ea8, 0x81e9, 0xeb2a, 0x546b, 0x17fc, 0xa8bd, 0xc27e, 0x7d3f }, - {/*1a*/ 1, 0x9891, 0xe1e2, 0x7273, 0x6464, 0xf7f5, 0x8586, 0x1617, - 0xb8b8, 0x2b29, 0x595a, 0xcacb, 0xdcdc, 0x4f4d, 0x3d3e, 0xaeaf }, - {/*1b*/ 1, 0xcce1, 0x4472, 0x8893, 0xfdb4, 0x3f55, 0xb9c6, 0x7527, - 0x56d8, 0x9a39, 0x12aa, 0xde4b, 0xab6c, 0x678d, 0xef1e, 0x23ff }, - {/*1c*/ 1, 0xa761, 0xf9b2, 0x5ed3, 0xe214, 0x4575, 0x1ba6, 0xbcc7, - 0x7328, 0xd449, 0x8a9a, 0x2dfb, 0x913c, 0x365d, 0x688e, 0xcfef }, - {/*1d*/ 1, 0xff61, 0x55b2, 0xaad3, 0x7914, 0x8675, 0x2ca6, 0xd3c7, - 0x9e28, 0x6149, 0xcb9a, 0x34fb, 0xe73c, 0x185d, 0xb28e, 0x4def }, - {/*1e*/ 1, 0x5451, 0xa8a2, 0xfcf3, 0x9694, 0xc2c5, 0x3e36, 0x6a67, - 0xebe8, 0xbfb9, 0x434a, 0x171b, 0x7d7c, 0x292d, 0xd5de, 0x818f }, - {/*1f*/ 1, 0x6fc1, 0xb542, 0xda83, 0x19f4, 0x7635, 0xacb6, 0xc377, - 0x2e58, 0x4199, 0x9b1a, 0xf4db, 0x37ac, 0x586d, 0x82ee, 0xed2f }, - - /* ECC bits are also in the set of tokens and they too can go bad - * first 2 cover channel 0, while the second 2 cover channel 1 - */ - {/*20*/ 0, 0xbe01, 0xd702, 0x6903, 0x2104, 0x9f05, 0xf606, 0x4807, - 0x3208, 0x8c09, 0xe50a, 0x5b0b, 0x130c, 0xad0d, 0xc40e, 0x7a0f }, - {/*21*/ 0, 0x4101, 0x8202, 0xc303, 0x5804, 0x1905, 0xda06, 0x9b07, - 0xac08, 0xed09, 0x2e0a, 0x6f0b, 0x640c, 0xb50d, 0x760e, 0x370f }, - {/*22*/ 1, 0xc441, 0x4882, 0x8cc3, 0xf654, 0x3215, 0xbed6, 0x7a97, - 0x5ba8, 0x9fe9, 0x132a, 0xd76b, 0xadfc, 0x69bd, 0xe57e, 0x213f }, - {/*23*/ 1, 0x7621, 0x9b32, 0xed13, 0xda44, 0xac65, 0x4176, 0x3757, - 0x6f88, 0x19a9, 0xf4ba, 0x829b, 0xb5cc, 0xc3ed, 0x2efe, 0x58df } +static u16 x4_vectors[] = { + 0x2f57, 0x1afe, 0x66cc, 0xdd88, + 0x11eb, 0x3396, 0x7f4c, 0xeac8, + 0x0001, 0x0002, 0x0004, 0x0008, + 0x1013, 0x3032, 0x4044, 0x8088, + 0x106b, 0x30d6, 0x70fc, 0xe0a8, + 0x4857, 0xc4fe, 0x13cc, 0x3288, + 0x1ac5, 0x2f4a, 0x5394, 0xa1e8, + 0x1f39, 0x251e, 0xbd6c, 0x6bd8, + 0x15c1, 0x2a42, 0x89ac, 0x4758, + 0x2b03, 0x1602, 0x4f0c, 0xca08, + 0x1f07, 0x3a0e, 0x6b04, 0xbd08, + 0x8ba7, 0x465e, 0x244c, 0x1cc8, + 0x2b87, 0x164e, 0x642c, 0xdc18, + 0x40b9, 0x80de, 0x1094, 0x20e8, + 0x27db, 0x1eb6, 0x9dac, 0x7b58, + 0x11c1, 0x2242, 0x84ac, 0x4c58, + 0x1be5, 0x2d7a, 0x5e34, 0xa718, + 0x4b39, 0x8d1e, 0x14b4, 0x28d8, + 0x4c97, 0xc87e, 0x11fc, 0x33a8, + 0x8e97, 0x497e, 0x2ffc, 0x1aa8, + 0x16b3, 0x3d62, 0x4f34, 0x8518, + 0x1e2f, 0x391a, 0x5cac, 0xf858, + 0x1d9f, 0x3b7a, 0x572c, 0xfe18, + 0x15f5, 0x2a5a, 0x5264, 0xa3b8, + 0x1dbb, 0x3b66, 0x715c, 0xe3f8, + 0x4397, 0xc27e, 0x17fc, 0x3ea8, + 0x1617, 0x3d3e, 0x6464, 0xb8b8, + 0x23ff, 0x12aa, 0xab6c, 0x56d8, + 0x2dfb, 0x1ba6, 0x913c, 0x7328, + 0x185d, 0x2ca6, 0x7914, 0x9e28, + 0x171b, 0x3e36, 0x7d7c, 0xebe8, + 0x4199, 0x82ee, 0x19f4, 0x2e58, + 0x4807, 0xc40e, 0x130c, 0x3208, + 0x1905, 0x2e0a, 0x5804, 0xac08, + 0x213f, 0x132a, 0xadfc, 0x5ba8, + 0x19a9, 0x2efe, 0xb5cc, 0x6f88, }; -/* - * Given the syndrome argument, scan each of the channel tables for a syndrome - * match. Depending on which table it is found, return the channel number. - */ -static int get_channel_from_ecc_syndrome(unsigned short syndrome) +static u16 x8_vectors[] = { + 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, + 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, + 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, + 0x0411, 0x0822, 0x1044, 0x0158, 0x02b0, 0x2360, 0x46c0, 0xab80, + 0x0811, 0x1022, 0x012c, 0x0258, 0x04b0, 0x4660, 0x8cc0, 0x2780, + 0x2071, 0x40e2, 0xa0c4, 0x0108, 0x0210, 0x0420, 0x0840, 0x1080, + 0x4071, 0x80e2, 0x0104, 0x0208, 0x0410, 0x0820, 0x1040, 0x2080, + 0x8071, 0x0102, 0x0204, 0x0408, 0x0810, 0x1020, 0x2040, 0x4080, + 0x019d, 0x03d6, 0x136c, 0x2198, 0x50b0, 0xb2e0, 0x0740, 0x0e80, + 0x0189, 0x03ea, 0x072c, 0x0e58, 0x1cb0, 0x56e0, 0x37c0, 0xf580, + 0x01fd, 0x0376, 0x06ec, 0x0bb8, 0x1110, 0x2220, 0x4440, 0x8880, + 0x0163, 0x02c6, 0x1104, 0x0758, 0x0eb0, 0x2be0, 0x6140, 0xc280, + 0x02fd, 0x01c6, 0x0b5c, 0x1108, 0x07b0, 0x25a0, 0x8840, 0x6180, + 0x0801, 0x012e, 0x025c, 0x04b8, 0x1370, 0x26e0, 0x57c0, 0xb580, + 0x0401, 0x0802, 0x015c, 0x02b8, 0x22b0, 0x13e0, 0x7140, 0xe280, + 0x0201, 0x0402, 0x0804, 0x01b8, 0x11b0, 0x31a0, 0x8040, 0x7180, + 0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080, + 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, + 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, +}; + +static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs, + int v_dim) { - int row; - int column; + unsigned int i, err_sym; - /* Determine column to scan */ - column = syndrome & 0xF; + for (err_sym = 0; err_sym < num_vecs / v_dim; err_sym++) { + u16 s = syndrome; + int v_idx = err_sym * v_dim; + int v_end = (err_sym + 1) * v_dim; - /* Scan all rows, looking for syndrome, or end of table */ - for (row = 0; row < NUMBER_ECC_ROWS; row++) { - if (ecc_chipkill_syndromes[row][column] == syndrome) - return ecc_chipkill_syndromes[row][0]; + /* walk over all 16 bits of the syndrome */ + for (i = 1; i < (1U << 16); i <<= 1) { + + /* if bit is set in that eigenvector... */ + if (v_idx < v_end && vectors[v_idx] & i) { + u16 ev_comp = vectors[v_idx++]; + + /* ... and bit set in the modified syndrome, */ + if (s & i) { + /* remove it. */ + s ^= ev_comp; + + if (!s) + return err_sym; + } + + } else if (s & i) + /* can't get to zero, move to next symbol */ + break; + } } debugf0("syndrome(%x) not found\n", syndrome); return -1; } +static int map_err_sym_to_channel(int err_sym, int sym_size) +{ + if (sym_size == 4) + switch (err_sym) { + case 0x20: + case 0x21: + return 0; + break; + case 0x22: + case 0x23: + return 1; + break; + default: + return err_sym >> 4; + break; + } + /* x8 symbols */ + else + switch (err_sym) { + /* imaginary bits not in a DIMM */ + case 0x10: + WARN(1, KERN_ERR "Invalid error symbol: 0x%x\n", + err_sym); + return -1; + break; + + case 0x11: + return 0; + break; + case 0x12: + return 1; + break; + default: + return err_sym >> 3; + break; + } + return -1; +} + +static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) +{ + struct amd64_pvt *pvt = mci->pvt_info; + u32 value = 0; + int err_sym = 0; + + amd64_read_pci_cfg(pvt->misc_f3_ctl, 0x180, &value); + + /* F3x180[EccSymbolSize]=1, x8 symbols */ + if (boot_cpu_data.x86 == 0x10 && + boot_cpu_data.x86_model > 7 && + value & BIT(25)) { + err_sym = decode_syndrome(syndrome, x8_vectors, + ARRAY_SIZE(x8_vectors), 8); + return map_err_sym_to_channel(err_sym, 8); + } else { + err_sym = decode_syndrome(syndrome, x4_vectors, + ARRAY_SIZE(x4_vectors), 4); + return map_err_sym_to_channel(err_sym, 4); + } +} + /* * Check for valid error in the NB Status High register. If so, proceed to read * NB Status Low, NB Address Low and NB Address High registers and store data From bdc30a0c8c7427a1c1d2e4d149d372d4d77781ee Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 13 Nov 2009 15:10:43 +0100 Subject: [PATCH 19/21] amd64_edac: correct sys address to chip select mapping The routine does the reverse mapping of the error address of a CECC back to the node id, DRAM controller and chip select of the DIMM which caused the error. We should lookup the channel using the syndromes _only_ when the DCTs are ganged so fix that. Also, add an early exit when there's an error while scanning for the csrow thus decreasing indentation levels for better readability. Finally, fixup comments. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 64 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0969a404f84..533f5ff2ec3 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1645,10 +1645,11 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, } /* - * This the F10h reference code from AMD to map a @sys_addr to NodeID, - * CSROW, Channel. + * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps + * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW). * - * The @sys_addr is usually an error address received from the hardware. + * The @sys_addr is usually an error address received from the hardware + * (MCX_ADDR). */ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, struct err_regs *info, @@ -1661,39 +1662,34 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); - if (csrow >= 0) { - error_address_to_page_and_offset(sys_addr, &page, &offset); - - syndrome = HIGH_SYNDROME(info->nbsl) << 8; - syndrome |= LOW_SYNDROME(info->nbsh); - - /* - * Is CHIPKILL on? If so, then we can attempt to use the - * syndrome to isolate which channel the error was on. - */ - if (pvt->nbcfg & K8_NBCFG_CHIPKILL) - chan = get_channel_from_ecc_syndrome(mci, syndrome); - - if (chan >= 0) { - edac_mc_handle_ce(mci, page, offset, syndrome, - csrow, chan, EDAC_MOD_STR); - } else { - /* - * Channel unknown, report all channels on this - * CSROW as failed. - */ - for (chan = 0; chan < mci->csrows[csrow].nr_channels; - chan++) { - edac_mc_handle_ce(mci, page, offset, - syndrome, - csrow, chan, - EDAC_MOD_STR); - } - } - - } else { + if (csrow < 0) { edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + return; } + + error_address_to_page_and_offset(sys_addr, &page, &offset); + + syndrome = HIGH_SYNDROME(info->nbsl) << 8; + syndrome |= LOW_SYNDROME(info->nbsh); + + /* + * We need the syndromes for channel detection only when we're + * ganged. Otherwise @chan should already contain the channel at + * this point. + */ + if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL) + chan = get_channel_from_ecc_syndrome(mci, syndrome); + + if (chan >= 0) + edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan, + EDAC_MOD_STR); + else + /* + * Channel unknown, report all channels on this CSROW as failed. + */ + for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++) + edac_mc_handle_ce(mci, page, offset, syndrome, + csrow, chan, EDAC_MOD_STR); } /* From 18ba54ac1286b4fdb0e61d49fa3ad9363e7cd032 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 7 Dec 2009 19:04:23 +0100 Subject: [PATCH 20/21] amd64_edac: fix use-uninitialised bug drivers/edac/amd64_edac.c: In function 'amd64_edac_init': drivers/edac/amd64_edac.c:2840: warning: 'ret' may be used uninitialized in this function Cc: Doug Thompson Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 533f5ff2ec3..5fdd6daa40e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2837,7 +2837,7 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) { int node_id = pvt->mc_node_id; struct mem_ctl_info *mci; - int ret; + int ret = -ENODEV; amd64_read_mc_registers(pvt); From df5b1606bd077401831759171c355dc38cfaa59a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 16 Nov 2009 11:42:47 +0100 Subject: [PATCH 21/21] amd64_edac: bump driver version This was long overdue ... Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index e84f164034d..41bc561e598 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -129,7 +129,7 @@ * sections 3.5.4 and 3.5.5 for more information. */ -#define EDAC_AMD64_VERSION " Ver: 3.2.0 " __DATE__ +#define EDAC_AMD64_VERSION " Ver: 3.3.0 " __DATE__ #define EDAC_MOD_STR "amd64_edac" #define EDAC_MAX_NUMNODES 8