diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index e2f6a99f14a..cc29086e30c 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -22,6 +22,7 @@ #define ARCH_P4_CNTRVAL_BITS (40) #define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) +#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1)) #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index f7a0993c1e7..ff751a9f182 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -770,9 +770,14 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) return 1; } - /* it might be unflagged overflow */ - rdmsrl(hwc->event_base + hwc->idx, v); - if (!(v & ARCH_P4_CNTRVAL_MASK)) + /* + * In some circumstances the overflow might issue an NMI but did + * not set P4_CCCR_OVF bit. Because a counter holds a negative value + * we simply check for high bit being set, if it's cleared it means + * the counter has reached zero value and continued counting before + * real NMI signal was received: + */ + if (!(v & ARCH_P4_UNFLAGGED_BIT)) return 1; return 0; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 999835b6112..656222fcf76 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -782,6 +782,10 @@ retry: raw_spin_unlock_irq(&ctx->lock); } +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_event *event, int enable); + static int event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, @@ -794,6 +798,17 @@ event_sched_in(struct perf_event *event, event->state = PERF_EVENT_STATE_ACTIVE; event->oncpu = smp_processor_id(); + + /* + * Unthrottle events, since we scheduled we might have missed several + * ticks already, also for a heavily scheduling task there is little + * guarantee it'll get a tick in a timely manner. + */ + if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { + perf_log_throttle(event, 1); + event->hw.interrupts = 0; + } + /* * The new state must be visible before we turn it on in the hardware: */ @@ -1596,10 +1611,6 @@ void __perf_event_task_sched_in(struct task_struct *task) } } -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_event *event, int enable); - static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq;