Commit e50e654b authored by Stewart Smith's avatar Stewart Smith Committed by Andrew Jeffery

Add --trace support (in blktrace format)

In an effort understand what PNOR requests come from the host, it'd be
good to be able to trace what requests come in and visualise them.
blktrace is some Linux infrastructure for tracing block device activity
all the way through the linux block layer, for which there is a variety
of existing tooling. These tools process the (typically) kernel produced
blktrace output. We can produce this same output programatically from
mboxd though.

This patch gives us the (option) to start mboxd in a mode where it will
write a blktrace file out, which can be fed into tools like blkparse(1)
or tools like iowatcher[1] to generate charts (and video).

A quirk of the blktrace format is that it's very geared towards a full
IO subsystem, so we can't directly map window operations (what we know
in mboxd) to specific IO ops (i.e. we don't get "firmware read one page
out of this window before closing it"). So, for each Window opening (or
reusing a cached one), we write THREE blktrace events: a Queue,
Dispatch, and Complete.

We can usk tools like blkparse to do everything from get a detailed list
of what windows were opened and for how long:

  0,0    0        1     0.000000000     0  Q   R 0 + 8 [(null)]
  0,0    0        2     0.000000000     0  D   R 0 + 8 [(null)]
  0,0    0        3     0.000182022     0  C   R 0 + 8 [0]
  0,0    0        4     0.042416351     0  Q   R 4144 + 2040 [(null)]
  0,0    0        5     0.042416351     0  D   R 4144 + 2040 [(null)]
  0,0    0        6     0.060802662     0  C   R 4144 + 2040 [0]
  0,0    0        7     0.084775813     0  Q   R 64 + 288 [(null)]
  0,0    0        8     0.084775813     0  D   R 64 + 288 [(null)]
  0,0    0        9     0.087835720     0  C   R 64 + 288 [0]
  0,0    0       10     1.429234244     0  Q   R 8488 + 2048 [(null)]

to getting a simple summary at the end of how many windows were opened
read and read/write:

CPU0 (0,0):
 Reads Queued:          90,   74,040KiB	 Writes Queued:           6,    2,664KiB
 Read Dispatches:       90,   74,040KiB	 Write Dispatches:        6,    2,664KiB
 Reads Requeued:         0		 Writes Requeued:         0
 Reads Completed:       90,   74,040KiB	 Writes Completed:        6,    2,664KiB
 Read Merges:            0,        0KiB	 Write Merges:            0,        0KiB
 Read depth:             1        	 Write depth:             1
 IO unplugs:             0        	 Timer unplugs:           0

If you change the window size to something tiny, like 4096 bytes, you
can get detailed paging information for hostboot at the expense of IPL
time.

Pretty graphs and animations:
https://www.flamingspork.com/blog/?p=4419

[1] iowatcher: http://masoncoding.com/iowatcher/

Change-Id: I5dd02b6bc616c441abf54d87a5d67c972cbaf228
Signed-off-by: default avatarStewart Smith <stewart@linux.ibm.com>
[AJ: Resolve merge conflicts, some tidy ups]
Signed-off-by: Andrew Jeffery's avatarAndrew Jeffery <andrew@aj.id.au>
parent 4519bb82
......@@ -54,7 +54,8 @@ const char* USAGE =
"\t\t\t\t(default: fill the reserved memory region)\n"
"\t-w | --window-size\tThe window size (power of 2) in MB\n"
"\t\t\t\t(default: 1MB)\n"
"\t-f | --flash\t\tSize of flash in [K|M] bytes\n\n";
"\t-f | --flash\t\tSize of flash in [K|M] bytes\n\n"
"\t-t | --trace\t\tFile to write trace data to (in blktrace format)\n\n";
static int dbus_init(struct mbox_context *context,
const struct transport_ops **ops)
......@@ -236,6 +237,7 @@ static bool parse_cmdline(int argc, char **argv,
{ "window-num", optional_argument, 0, 'n' },
{ "verbose", no_argument, 0, 'v' },
{ "syslog", no_argument, 0, 's' },
{ "trace", optional_argument, 0, 't' },
{ "version", no_argument, 0, 'V' },
{ "help", no_argument, 0, 'h' },
{ 0, 0, 0, 0 }
......@@ -309,6 +311,17 @@ static bool parse_cmdline(int argc, char **argv,
case 'V':
printf("%s V%s\n", THIS_NAME, PACKAGE_VERSION);
exit(0);
case 't':
context->blktracefd = open(argv[optind],
O_CREAT|O_TRUNC|O_WRONLY,
0666);
MSG_INFO("Recording blktrace output to %s\n",
argv[optind]);
if (context->blktracefd == -1) {
perror("Couldn't open blktrace file for writing");
exit(2);
}
break;
case 'h':
return false; /* This will print the usage message */
default:
......
......@@ -5,6 +5,7 @@
#define MBOX_H
#include <assert.h>
#include <linux/blktrace_api.h>
#include <mtd/mtd-abi.h>
#include <systemd/sd-bus.h>
#include <poll.h>
......@@ -101,6 +102,11 @@ struct mbox_context {
uint32_t mem_size;
/* LPC Bus Base Address (bytes) */
uint32_t lpc_base;
/* Tracing */
int blktracefd;
struct blk_io_trace trace;
int64_t blktrace_start;
};
#endif /* MBOX_H */
......@@ -5,6 +5,7 @@
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include "backend.h"
#include "common.h"
......@@ -13,6 +14,7 @@
#include "protocol.h"
#include "windows.h"
#define BLOCK_SIZE_SHIFT_V1 12 /* 4K */
static inline uint8_t protocol_get_bmc_event_mask(struct mbox_context *context)
......@@ -155,6 +157,108 @@ static inline uint16_t get_lpc_addr_shifted(struct mbox_context *context)
return lpc_addr >> context->backend.block_size_shift;
}
static inline int64_t blktrace_gettime(void)
{
struct timespec ts;
int64_t n;
clock_gettime(CLOCK_REALTIME, &ts);
n = (int64_t)(ts.tv_sec) * (int64_t)1000000000 + (int64_t)(ts.tv_nsec);
return n;
}
static void blktrace_flush_start(struct mbox_context *context)
{
struct blk_io_trace *trace = &context->trace;
struct timespec now;
if (!context->blktracefd)
return;
if (!context->blktrace_start) {
clock_gettime(CLOCK_REALTIME, &now);
context->blktrace_start = blktrace_gettime();
}
trace->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
trace->sequence++;
trace->time = blktrace_gettime() - context->blktrace_start;
trace->sector = context->current->flash_offset / 512;
trace->bytes = context->current->size;
if (context->current_is_write)
trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_WRITE);
else
trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_READ);
trace->pid = 0;
trace->device = 0;
trace->cpu = 0;
trace->error = 0;
trace->pdu_len = 0;
write(context->blktracefd, &trace, sizeof(trace));
trace->sequence++;
trace->time = blktrace_gettime() - context->blktrace_start;
trace->action &= ~BLK_TA_QUEUE;
trace->action |= BLK_TA_ISSUE;
write(context->blktracefd, &trace, sizeof(trace));}
static void blktrace_flush_done(struct mbox_context *context)
{
struct blk_io_trace *trace = &context->trace;
if (!context->blktracefd)
return;
trace->sequence++;
trace->time = blktrace_gettime() - context->blktrace_start;
trace->action &= ~BLK_TA_ISSUE;
trace->action |= BLK_TA_COMPLETE;
write(context->blktracefd, &trace, sizeof(trace));
}
static void blktrace_window_start(struct mbox_context *context)
{
struct blk_io_trace *trace = &context->trace;
if (!context->blktracefd)
return;
if (!context->blktrace_start)
context->blktrace_start = blktrace_gettime();
trace->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
trace->sequence++;
trace->time = blktrace_gettime() - context->blktrace_start;
trace->action = BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_READ);
trace->pid = 0;
trace->device = 0;
trace->cpu = 0;
trace->error = 0;
trace->pdu_len = 0;
}
static void blktrace_window_done(struct mbox_context *context)
{
struct blk_io_trace *trace = &context->trace;
if (!context->blktracefd)
return;
trace->sector = context->current->flash_offset / 512;
trace->bytes = context->current->size;
write(context->blktracefd, &trace, sizeof(trace));
trace->sequence++;
trace->action &= ~BLK_TA_QUEUE;
trace->action |= BLK_TA_ISSUE;
write(context->blktracefd, &trace, sizeof(trace));
trace->sequence++;
trace->time = blktrace_gettime() - context->blktrace_start;
trace->action &= ~BLK_TA_ISSUE;
trace->action |= BLK_TA_COMPLETE;
write(context->blktracefd, &trace, sizeof(trace));
}
static int protocol_v1_create_window(struct mbox_context *context,
struct protocol_create_window *io)
{
......@@ -180,7 +284,9 @@ static int protocol_v1_create_window(struct mbox_context *context,
* write_flush() to make sure we pick the right one.
*/
if (context->current_is_write) {
blktrace_flush_start(context);
rc = context->protocol->flush(context, NULL);
blktrace_flush_done(context);
if (rc < 0) {
MSG_ERR("Couldn't Flush Write Window\n");
return rc;
......@@ -192,6 +298,7 @@ static int protocol_v1_create_window(struct mbox_context *context,
/* Offset the host has requested */
MSG_INFO("Host requested flash @ 0x%.8x\n", offset);
/* Check if we have an existing window */
blktrace_window_start(context);
context->current = windows_search(context, offset,
context->version == API_VERSION_1);
......@@ -206,6 +313,7 @@ static int protocol_v1_create_window(struct mbox_context *context,
return rc;
}
}
blktrace_window_done(context);
context->current_is_write = !io->req.ro;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment