From 87e24f4b67e68d9fd8df16e0bf9c66d1ad2a2533 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Mar 2012 23:59:25 +0100 Subject: [PATCH 1/2] perf/x86: Fix local vs remote memory events for NHM/WSM Verified using the below proglet.. before: [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0 remote write Performance counter stats for './numa 0': 2,101,554 node-stores 2,096,931 node-store-misses 5.021546079 seconds time elapsed [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1 local write Performance counter stats for './numa 1': 501,137 node-stores 199 node-store-misses 5.124451068 seconds time elapsed After: [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0 remote write Performance counter stats for './numa 0': 2,107,516 node-stores 2,097,187 node-store-misses 5.012755149 seconds time elapsed [root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1 local write Performance counter stats for './numa 1': 2,063,355 node-stores 165 node-store-misses 5.082091494 seconds time elapsed #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #define SIZE (32*1024*1024) volatile int done; void sig_done(int sig) { done = 1; } int main(int argc, char **argv) { cpu_set_t *mask, *mask2; size_t size; int i, err, t; int nrcpus = 1024; char *mem; unsigned long nodemask = 0x01; /* node 0 */ DIR *node; struct dirent *de; int read = 0; int local = 0; if (argc < 2) { printf("usage: %s [0-3]\n", argv[0]); printf(" bit0 - local/remote\n"); printf(" bit1 - read/write\n"); exit(0); } switch (atoi(argv[1])) { case 0: printf("remote write\n"); break; case 1: printf("local write\n"); local = 1; break; case 2: printf("remote read\n"); read = 1; break; case 3: printf("local read\n"); local = 1; read = 1; break; } mask = CPU_ALLOC(nrcpus); size = CPU_ALLOC_SIZE(nrcpus); CPU_ZERO_S(size, mask); node = opendir("/sys/devices/system/node/node0/"); if (!node) perror("opendir"); while ((de = readdir(node))) { int cpu; if (sscanf(de->d_name, "cpu%d", &cpu) == 1) CPU_SET_S(cpu, size, mask); } closedir(node); mask2 = CPU_ALLOC(nrcpus); CPU_ZERO_S(size, mask2); for (i = 0; i < size; i++) CPU_SET_S(i, size, mask2); CPU_XOR_S(size, mask2, mask2, mask); // invert if (!local) mask = mask2; err = sched_setaffinity(0, size, mask); if (err) perror("sched_setaffinity"); mem = mmap(0, SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE); if (err) perror("mbind"); signal(SIGALRM, sig_done); alarm(5); if (!read) { while (!done) { for (i = 0; i < SIZE; i++) mem[i] = 0x01; } } else { while (!done) { for (i = 0; i < SIZE; i++) t += *(volatile char *)(mem + i); } } return 0; } Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3bd37bdf1b8..61d4f79a550 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -385,14 +385,15 @@ static __initconst const u64 westmere_hw_cache_event_ids #define NHM_LOCAL_DRAM (1 << 14) #define NHM_NON_DRAM (1 << 15) -#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM) +#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_REMOTE (NHM_REMOTE_DRAM) #define NHM_DMND_READ (NHM_DMND_DATA_RD) #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) -#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD) +#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) static __initconst const u64 nehalem_hw_cache_extra_regs @@ -416,16 +417,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs }, [ C(NODE) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM, - [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM, + [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, + [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, }, [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM, - [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM, + [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, + [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM, - [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM, + [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, + [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, }, }, }; From 8aa8a7c80ccdfac2df5ee48a51a4a7bee2143d4f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 13 Mar 2012 16:51:02 +0100 Subject: [PATCH 2/2] perf record: Fix buffer overrun bug in tracepoint_id_to_path() This patch fixes a buffer overrun bug in tracepoint_id_to_path(). The bug manisfested itself as a memory error reported by perf record. I ran into it with perf sched: $ perf sched rec noploop 2 noploop for 2 seconds [ perf record: Woken up 14 times to write data ] [ perf record: Captured and wrote 42.701 MB perf.data (~1865622 samples) ] Fatal: No memory to alloc tracepoints list It turned out that tracepoint_id_to_path() was reading the tracepoint id using read() but the buffer was not large enough to include the \n terminator for id with 4 digits or more. The patch fixes the problem by extending the buffer to a more reasonable size covering all possible id length include \n terminator. Note that atoll() stops at the first non digit character, thus it is not necessary to clear the buffer between each read. Signed-off-by: Stephane Eranian Acked-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra Cc: fweisbec@gmail.com Cc: dsahern@gmail.com Link: http://lkml.kernel.org/r/20120313155102.GA6465@quad Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b029296d20d..c7a6f6faf91 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -165,7 +165,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; - char id_buf[4]; + char id_buf[24]; int fd; u64 id; char evt_path[MAXPATHLEN];