Commit ffa86c2f authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-4.12-20170314' of...

Merge tag 'perf-core-for-mingo-4.12-20170314' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Add PERF_RECORD_NAMESPACES so that the kernel can record information
  required to associate samples to namespaces, helping in container
  problem characterization.

  Now the 'perf record has a --namespace' option to ask for such info,
  and when present, it can be used, initially, via a new sort order,
  'cgroup_id', allowing histogram entry bucketization by a (device, inode)
  based cgroup identifier (Hari Bathini)

- Add --next option to 'perf sched timehist', showing what is the next
  thread to run (Brendan Gregg)

Fixes:

- Fix segfault with basic block 'cycles' sort dimension (Changbin Du)

- Add c2c to command-list.txt, making it appear in the 'perf help'
  output (Changbin Du)

- Fix zeroing of 'abs_path' variable in the perf hists browser switch
  file code (Changbin Du)

- Hide tips messages when -q/--quiet is given to 'perf report' (Namhyung Kim)

Infrastructure changes:

- Use ref_reloc_sym + offset to setup kretprobes (Naveen Rao)

- Ignore generated files pmu-events/{jevents,pmu-events.c} for git (Changbin Du)

Documentation changes:

- Document +field style argument support for --field option (Changbin Du)

- Clarify 'perf c2c --stats' help message (Namhyung Kim)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 84e5b549 5f6bee34
......@@ -8,8 +8,9 @@ Overview
--------
These events are similar to tracepoint based events. Instead of Tracepoint,
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
kprobes can probe (this means, all functions body except for __kprobes
functions). Unlike the Tracepoint based event, this can be added and removed
kprobes can probe (this means, all functions except those with
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
Unlike the Tracepoint based event, this can be added and removed
dynamically, on the fly.
To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
......
......@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
/* Callchains */
......@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
......
......@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
......@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/*
......@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */
};
......
......@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
#include "internal.h"
......@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
......@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
......@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
}
/*
......@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event);
}
/*
* namespaces tracking
*/
struct perf_namespaces_event {
struct task_struct *task;
struct {
struct perf_event_header header;
u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};
static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}
static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;
if (!perf_event_namespaces_match(event))
return;
perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;
namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);
perf_output_put(&handle, namespaces_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
perf_output_end(&handle);
}
static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;
error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}
void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;
if (!atomic_read(&nr_namespaces_events))
return;
namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};
ns_link_info = namespaces_event.event_id.link_info;
perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);
#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif
perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}
/*
* mmap tracking
*/
......@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
......@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}
if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
......
......@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}
perf_event_namespaces(current);
bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);
......
......@@ -1740,11 +1740,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
}
EXPORT_SYMBOL_GPL(unregister_kprobes);
int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
int __weak kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return NOTIFY_DONE;
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
......
......@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
static struct kmem_cache *nsproxy_cachep;
......@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);
perf_event_namespaces(tsk);
out:
fput(file);
return err;
......
......@@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;
union {
__u32 wakeup_events; /* wakeup every n events */
......@@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};
struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};
enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,
NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
/*
......@@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,
/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,
PERF_RECORD_MAX, /* non-ABI */
};
......
......@@ -31,3 +31,5 @@ config.mak.autogen
.config-detected
util/intel-pt-decoder/inat-tables.c
arch/*/include/generated/
pmu-events/pmu-events.c
pmu-events/jevents
......@@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
Record events of type PERF_RECORD_NAMESPACES.
--transaction::
Record transaction flags for transaction related events.
......
......@@ -72,7 +72,8 @@ OPTIONS
--sort=::
Sort histogram entries by given key(s) - multiple keys can be specified
in CSV format. Following sort keys are available:
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight, local_weight.
pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
local_weight, cgroup_id.
Each key has following meaning:
......@@ -92,6 +93,7 @@ OPTIONS
- weight: Event specific weight, e.g. memory latency or transaction
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
......@@ -173,6 +175,9 @@ OPTIONS
By default, every sort keys not specified in -F will be appended
automatically.
If the keys starts with a prefix '+', then it will append the specified
field(s) to the default field order. For example: perf report -F +period,sample.
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
......
......@@ -132,6 +132,10 @@ OPTIONS for 'perf sched timehist'
--migrations::
Show migration events.
-n::
--next::
Show next task.
-I::
--idle-hist::
Show idle-related events only.
......
......@@ -248,6 +248,9 @@ OPTIONS
--show-mmap-events
Display mmap related events (e.g. MMAP, MMAP2).
--show-namespace-events
Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
--show-switch-events
Display context switch events i.e. events of type PERF_RECORD_SWITCH or
PERF_RECORD_SWITCH_CPU_WIDE.
......
......@@ -10,6 +10,7 @@
#include "symbol.h"
#include "map.h"
#include "probe-event.h"
#include "probe-file.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
......@@ -79,13 +80,18 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
* However, if the user specifies an offset, we fall back to using the
* GEP since all userspace applications (objdump/readelf) show function
* disassembly with offsets from the GEP.
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
!map || !sym)
if (pev->point.offset || !map || !sym)
return;
/* For kretprobes, add an offset only if the kernel supports it */
if (!pev->uprobes && pev->point.retprobe) {
#ifdef HAVE_LIBELF_SUPPORT
if (!kretprobe_offset_is_supported())
#endif
return;
}
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
......
......@@ -393,6 +393,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
......
......@@ -2334,7 +2334,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
static void perf_c2c_display(struct perf_session *session)
{
if (c2c.use_stdio)
if (use_browser == 0)
perf_c2c__hists_fprintf(stdout, session);
else
perf_c2c__hists_browse(&c2c.hists.hists);
......@@ -2536,7 +2536,7 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Use the stdio interface"),
"Display only statistic tables (implies --stdio)"),
OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
"Display full length of symbols"),
OPT_BOOLEAN(0, "no-source", &no_source,
......
......@@ -364,6 +364,7 @@ static struct perf_tool tool = {
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
.ordering_requires_timestamps = true,
};
......
......@@ -333,6 +333,18 @@ static int perf_event__repipe_comm(struct perf_tool *tool,
return err;
}
static int perf_event__repipe_namespaces(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
int err = perf_event__process_namespaces(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
static int perf_event__repipe_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
......@@ -660,6 +672,7 @@ static int __cmd_inject(struct perf_inject *inject)
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
inject->tool.comm = perf_event__repipe_comm;
inject->tool.namespaces = perf_event__repipe_namespaces;
inject->tool.exit = perf_event__repipe_exit;
inject->tool.id_index = perf_event__repipe_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
......
......@@ -964,6 +964,7 @@ static struct perf_tool perf_kmem = {
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.mmap2 = perf_event__process_mmap2,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
......
......@@ -1044,6 +1044,7 @@ static int read_events(struct perf_kvm_stat *kvm)
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
.ordered_events = true,
};
struct perf_data_file file = {
......@@ -1348,6 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
kvm->tool.exit = perf_event__process_exit;
kvm->tool.fork = perf_event__process_fork;
kvm->tool.lost = process_lost_event;
kvm->tool.namespaces = perf_event__process_namespaces;
kvm->tool.ordered_events = true;
perf_tool__fill_defaults(&kvm->tool);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment