perf: Optimize perf_output_begin() -- lost_event case
authorPeter Zijlstra <[email protected]>
Thu, 31 Oct 2013 16:29:29 +0000 (17:29 +0100)
committerIngo Molnar <[email protected]>
Wed, 6 Nov 2013 11:34:21 +0000 (12:34 +0100)
Avoid touching the lost_event and sample_data cachelines twince. Its
not like we end up doing less work, but it might help to keep all
accesses to these cachelines in one place.

Due to code shuffle, this looses 4 bytes on x86_64-defconfig.

Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Michael Neuling <[email protected]>
Cc: "Paul E. McKenney" <[email protected]>
Cc: [email protected]
Cc: Vince Weaver <[email protected]>
Cc: Victor Kaplansky <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Anton Blanchard <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
kernel/events/ring_buffer.c

index 6ed16ecfd0a38dc70681690674d73ebd5ff4584a..e4d70f33792f136ead7736bd102c3c0f51c82729 100644 (file)
@@ -106,7 +106,6 @@ int perf_output_begin(struct perf_output_handle *handle,
        struct ring_buffer *rb;
        unsigned long tail, offset, head;
        int have_lost;
-       struct perf_sample_data sample_data;
        struct {
                struct perf_event_header header;
                u64                      id;
@@ -132,10 +131,9 @@ int perf_output_begin(struct perf_output_handle *handle,
 
        have_lost = local_read(&rb->lost);
        if (unlikely(have_lost)) {
-               lost_event.header.size = sizeof(lost_event);
-               perf_event_header__init_id(&lost_event.header, &sample_data,
-                                          event);
-               size += lost_event.header.size;
+               size += sizeof(lost_event);
+               if (event->attr.sample_id_all)
+                       size += event->id_header_size;
        }
 
        perf_output_get_handle(handle);
@@ -169,11 +167,16 @@ int perf_output_begin(struct perf_output_handle *handle,
        handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
 
        if (unlikely(have_lost)) {
+               struct perf_sample_data sample_data;
+
+               lost_event.header.size = sizeof(lost_event);
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
                lost_event.id          = event->id;
                lost_event.lost        = local_xchg(&rb->lost, 0);
 
+               perf_event_header__init_id(&lost_event.header,
+                                          &sample_data, event);
                perf_output_put(handle, lost_event);
                perf_event__output_id_sample(event, handle, &sample_data);
        }