Modern multicore chips show complex behavior with respect to performance and
power. Starting with the Intel Sandy Bridge processor, it has become possible
to directly measure the power dissipation of a CPU chip and correlate this data
with the performance properties of the running code. Going beyond a simple
bottleneck analysis, we employ the recently published Execution-Cache-Memory
(ECM) model to describe the single- and multi-core performance of streaming
kernels. The model refines the well-known roofline model, since it can predict
the scaling and the saturation behavior of bandwidth-limited loop kernels on a
multicore chip. The saturation point is especially relevant for considerations
of energy consumption. From power dissipation measurements of benchmark
programs with vastly different requirements to the hardware, we derive a
simple, phenomenological power model for the Sandy Bridge processor. Together
with the ECM model, we are able to explain many peculiarities in the
performance and power behavior of multicore processors, and derive guidelines
for energy-efficient execution of parallel programs. Finally, we show that the
ECM and power models can be successfully used to describe the scaling and power
behavior of a lattice-Boltzmann flow solver code.
Description
Exploring performance and power properties of modern multicore chips via
simple machine models
%0 Generic
%1 hager2012exploring
%A Hager, Georg
%A Treibig, Jan
%A Habich, Johannes
%A Wellein, Gerhard
%D 2012
%K papers
%R 10.1002/cpe.3180
%T Exploring performance and power properties of modern multicore chips via
simple machine models
%U http://arxiv.org/abs/1208.2908
%X Modern multicore chips show complex behavior with respect to performance and
power. Starting with the Intel Sandy Bridge processor, it has become possible
to directly measure the power dissipation of a CPU chip and correlate this data
with the performance properties of the running code. Going beyond a simple
bottleneck analysis, we employ the recently published Execution-Cache-Memory
(ECM) model to describe the single- and multi-core performance of streaming
kernels. The model refines the well-known roofline model, since it can predict
the scaling and the saturation behavior of bandwidth-limited loop kernels on a
multicore chip. The saturation point is especially relevant for considerations
of energy consumption. From power dissipation measurements of benchmark
programs with vastly different requirements to the hardware, we derive a
simple, phenomenological power model for the Sandy Bridge processor. Together
with the ECM model, we are able to explain many peculiarities in the
performance and power behavior of multicore processors, and derive guidelines
for energy-efficient execution of parallel programs. Finally, we show that the
ECM and power models can be successfully used to describe the scaling and power
behavior of a lattice-Boltzmann flow solver code.
@misc{hager2012exploring,
abstract = {Modern multicore chips show complex behavior with respect to performance and
power. Starting with the Intel Sandy Bridge processor, it has become possible
to directly measure the power dissipation of a CPU chip and correlate this data
with the performance properties of the running code. Going beyond a simple
bottleneck analysis, we employ the recently published Execution-Cache-Memory
(ECM) model to describe the single- and multi-core performance of streaming
kernels. The model refines the well-known roofline model, since it can predict
the scaling and the saturation behavior of bandwidth-limited loop kernels on a
multicore chip. The saturation point is especially relevant for considerations
of energy consumption. From power dissipation measurements of benchmark
programs with vastly different requirements to the hardware, we derive a
simple, phenomenological power model for the Sandy Bridge processor. Together
with the ECM model, we are able to explain many peculiarities in the
performance and power behavior of multicore processors, and derive guidelines
for energy-efficient execution of parallel programs. Finally, we show that the
ECM and power models can be successfully used to describe the scaling and power
behavior of a lattice-Boltzmann flow solver code.},
added-at = {2014-04-22T16:56:29.000+0200},
author = {Hager, Georg and Treibig, Jan and Habich, Johannes and Wellein, Gerhard},
biburl = {https://www.bibsonomy.org/bibtex/2217dfce8671166931437474267a51b6c/shailen.sobhee},
description = {Exploring performance and power properties of modern multicore chips via
simple machine models},
doi = {10.1002/cpe.3180},
interhash = {aad9bf36986ffd392f2a0a1cee155819},
intrahash = {217dfce8671166931437474267a51b6c},
keywords = {papers},
note = {cite arxiv:1208.2908Comment: 23 pages, 10 figures. Typos corrected, DOI added},
timestamp = {2014-04-22T16:56:29.000+0200},
title = {Exploring performance and power properties of modern multicore chips via
simple machine models},
url = {http://arxiv.org/abs/1208.2908},
year = 2012
}