Commit 84e5b549 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-4.11-20170306' of...

Merge tag 'perf-core-for-mingo-4.11-20170306' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Allow sorting by symbol_size in 'perf report' and 'perf top' (Charles Baylis)

  E.g.:

  # perf report -s symbol_size,symbol

  Samples: 9K of event 'cycles:k', Event count (approx.): 2870461623
  Overhead  Symbol size  Symbol
    14.55%          326  [k] flush_tlb_mm_range
     7.20%         1045  [k] filemap_map_pages
     5.82%          124  [k] vma_interval_tree_insert
     5.18%         2430  [k] unmap_page_range
     2.57%          571  [k] vma_interval_tree_remove
     1.94%          494  [k] page_add_file_rmap
     1.82%          740  [k] page_remove_rmap
     1.66%         1017  [k] release_pages
     1.57%         1636  [k] update_blocked_averages
     1.57%           76  [k] unlock_page

- Add support for -p/--pid, -a/--all-cpus and -C/--cpu in 'perf ftrace' (Namhyung Kim)

Change in behaviour:

- Make system wide (-a) the default option if no target was specified and one
  of following conditions is met:

  - No workload specified (current behaviour)

  - A workload is specified but all requested events are system wide ones,
    like uncore ones. (Jiri Olsa)

Fixes:

- Add missing initialization to the instruction decoder used in the
  intel PT/BTS code, which was causing lots of failures in 'perf test',
  looking for a value when there was none (Adrian Hunter)

Infrastructure changes:

- Add arch code needed to adopt the kernel's refcount_t to aid in
  catching bugs when using atomic_t as a reference counter, basically
  cmpxchg related functions (Arnaldo Carvalho de Melo)

- Convert the code using atomic_t as reference counts to refcount_t
  (Elena Rashetova)

- Add feature test for sched_getcpu() to more easily check for its
  presence in the many libc implementations and accross different
  versions of such C libraries (Arnaldo Carvalho de Melo)

- Issue a HW watchdog disable hint in 'perf stat' for when some of the
  requested events can't get counted because a PMU counter is taken by that
  watchdog (Borislav Petkov).

- Add mapping for Intel's KnightsMill PMU events (Karol Wachowski)

Documentation changes:

- Clarify the term 'convergence' in:

   perf bench numa numa-mem -h --show_convergence (Jiri Olsa)

Kernel code changes:

- Ensure probe location is at function entry in kretprobes (Naveen N. Rao)

- Allow return probes with offsets and absolute addresses (Naveen N. Rao)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents c1ae3cfa 001916b9
......@@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes
functions). Unlike the Tracepoint based event, this can be added and removed
dynamically, on the fly.
To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
Similar to the events tracer, this doesn't need to be activated via
current_tracer. Instead of that, add probe points via
......
......@@ -7,7 +7,7 @@
Overview
--------
Uprobe based trace events are similar to kprobe based trace events.
To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
Similar to the kprobe-event tracer, this doesn't need to be activated via
current_tracer. Instead of that, add probe points via
......
......@@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
CONFIG_UPROBE_EVENTS=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y
CONFIG_TRACE_ENUM_MAP_FILE=y
......
......@@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
CONFIG_UPROBE_EVENTS=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y
CONFIG_TRACE_ENUM_MAP_FILE=y
......
......@@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
CONFIG_UPROBE_EVENTS=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y
CONFIG_TRACE_ENUM_MAP_FILE=y
......
......@@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
CONFIG_UPROBE_EVENTS=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_KPROBES_SANITY_TEST=y
......
......@@ -67,7 +67,7 @@
#endif
/* Ensure if the instruction can be boostable */
extern int can_boost(kprobe_opcode_t *instruction);
extern int can_boost(kprobe_opcode_t *instruction, void *addr);
/* Recover instruction if given address is probed */
extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
unsigned long addr);
......
......@@ -167,12 +167,12 @@ NOKPROBE_SYMBOL(skip_prefixes);
* Returns non-zero if opcode is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
int can_boost(kprobe_opcode_t *opcodes)
int can_boost(kprobe_opcode_t *opcodes, void *addr)
{
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
if (search_exception_tables((unsigned long)opcodes))
if (search_exception_tables((unsigned long)addr))
return 0; /* Page fault may occur on this address. */
retry:
......@@ -417,7 +417,7 @@ static int arch_copy_kprobe(struct kprobe *p)
* __copy_instruction can modify the displacement of the instruction,
* but it doesn't affect boostable check.
*/
if (can_boost(p->ainsn.insn))
if (can_boost(p->ainsn.insn, p->addr))
p->ainsn.boostable = 0;
else
p->ainsn.boostable = -1;
......
......@@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
while (len < RELATIVEJUMP_SIZE) {
ret = __copy_instruction(dest + len, src + len);
if (!ret || !can_boost(dest + len))
if (!ret || !can_boost(dest + len, src + len))
return -EINVAL;
len += ret;
}
......
......@@ -267,6 +267,7 @@ extern int arch_init_kprobes(void);
extern void show_registers(struct pt_regs *regs);
extern void kprobes_inc_nmissed_count(struct kprobe *p);
extern bool arch_within_kprobe_blacklist(unsigned long addr);
extern bool arch_function_offset_within_entry(unsigned long offset);
extern bool within_kprobe_blacklist(unsigned long addr);
......
......@@ -1875,12 +1875,25 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);
bool __weak arch_function_offset_within_entry(unsigned long offset)
{
return !offset;
}
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
struct kretprobe_instance *inst;
int i;
void *addr;
unsigned long offset;
addr = kprobe_addr(&rp->kp);
if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset))
return -EINVAL;
if (!arch_function_offset_within_entry(offset))
return -EINVAL;
if (kretprobe_blacklist_size) {
addr = kprobe_addr(&rp->kp);
......
......@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
config KPROBE_EVENT
config KPROBE_EVENTS
depends on KPROBES
depends on HAVE_REGS_AND_STACK_ACCESS_API
bool "Enable kprobes-based dynamic events"
......@@ -447,7 +447,7 @@ config KPROBE_EVENT
This option is also required by perf-probe subcommand of perf tools.
If you want to use perf tools, this option is strongly recommended.
config UPROBE_EVENT
config UPROBE_EVENTS
bool "Enable uprobes-based dynamic events"
depends on ARCH_SUPPORTS_UPROBES
depends on MMU
......@@ -466,7 +466,7 @@ config UPROBE_EVENT
config BPF_EVENTS
depends on BPF_SYSCALL
depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
bool
default y
help
......
......@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_PM),y)
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
......@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
......
......@@ -4341,22 +4341,23 @@ static const char readme_msg[] =
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
#ifdef CONFIG_KPROBE_EVENT
#ifdef CONFIG_KPROBE_EVENTS
" kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
#ifdef CONFIG_UPROBE_EVENT
#ifdef CONFIG_UPROBE_EVENTS
" uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
"\t accepts: event-definitions (one definition per line)\n"
"\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENT
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENT
#ifdef CONFIG_UPROBE_EVENTS
"\t place: <path>:<offset>\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
......
......@@ -681,10 +681,6 @@ static int create_trace_kprobe(int argc, char **argv)
return -EINVAL;
}
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
return -EINVAL;
}
/* an address specified */
ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
if (ret) {
......@@ -700,8 +696,9 @@ static int create_trace_kprobe(int argc, char **argv)
pr_info("Failed to parse symbol.\n");
return ret;
}
if (offset && is_return) {
pr_info("Return probe must be used without offset.\n");
if (offset && is_return &&
!arch_function_offset_within_entry(offset)) {
pr_info("Given offset is not valid for return probe.\n");
return -EINVAL;
}
}
......
......@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \
#define FETCH_TYPE_STRING 0
#define FETCH_TYPE_STRSIZE 1
#ifdef CONFIG_KPROBE_EVENT
#ifdef CONFIG_KPROBE_EVENTS
struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
......@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
{
return NULL;
}
#endif /* CONFIG_KPROBE_EVENT */
#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {
struct fetch_param fetch;
......
......@@ -7,6 +7,8 @@
#define LOCK_PREFIX "\n\tlock; "
#include <asm/cmpxchg.h>
/*
* Atomic operations that C can't guarantee us. Useful for
* resource counting etc..
......@@ -62,4 +64,9 @@ static inline int atomic_dec_and_test(atomic_t *v)
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
}
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
}
#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
#ifndef TOOLS_ASM_X86_CMPXCHG_H
#define TOOLS_ASM_X86_CMPXCHG_H
#include <linux/compiler.h>
/*
* Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
*/
extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
* -1 because sizeof will never return -1, thereby making those switch
* case statements guaranteeed dead code which the compiler will
* eliminate, and allowing the "missing symbol in the default case" to
* indicate a usage error.
*/
#define __X86_CASE_B 1
#define __X86_CASE_W 2
#define __X86_CASE_L 4
#ifdef __x86_64__
#define __X86_CASE_Q 8
#else
#define __X86_CASE_Q -1 /* sizeof will never return -1 */
#endif
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#endif /* TOOLS_ASM_X86_CMPXCHG_H */
......@@ -63,6 +63,7 @@ FEATURE_TESTS_BASIC := \
lzma \
get_cpuid \
bpf \
sched_getcpu \
sdt
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
......
......@@ -48,12 +48,13 @@ FILES= \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
test-jvmti.bin
test-jvmti.bin \
test-sched_getcpu.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
CC := $(CROSS_COMPILE)gcc -MD
CXX := $(CROSS_COMPILE)g++ -MD
CC ?= $(CROSS_COMPILE)gcc -MD
CXX ?= $(CROSS_COMPILE)g++ -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
......@@ -91,6 +92,9 @@ $(OUTPUT)test-libelf.bin:
$(OUTPUT)test-glibc.bin:
$(BUILD)
$(OUTPUT)test-sched_getcpu.bin:
$(BUILD)
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment