The rapid pace and successful application of machine learning research and development has seen widespread deployment of deep convolutional neural networks (CNNs). Alongside these algorithmic efforts, the compute- and memory-intensive nature of CNNs has stimulated a large amount of work in the field of hardware acceleration for these networks. In this paper, we profile the memory requirements of CNNs in terms of both on-chip memory size and off-chip memory bandwidth, in order to understand the impact of the memory system on accelerator design. We show that there are fundamental tradeoffs between performance, bandwidth, and on-chip memory. Further, this paper explores how the wide variety of CNNs for different application domains each have fundamentally different characteristics. We show that bandwidth and memory requirements for different networks, and occasionally for different layers within a network, can each vary by multiple orders of magnitude. This makes designing fast and efficient hardware for all CNN applications difficult. To remedy this, we outline heuristic design points that attempt to optimize for select dataflow scenarios.
%0 Conference Paper
%1 8573527
%A Siu, K.
%A Stuart, D. M.
%A Mahmoud, M.
%A Moshovos, A.
%B 2018 IEEE International Symposium on Workload Characterization (IISWC)
%D 2018
%K order1 real-time
%P 111-121
%R 10.1109/IISWC.2018.8573527
%T Memory Requirements for Convolutional Neural Network Hardware Accelerators
%U https://ieeexplore.ieee.org/document/8573527
%X The rapid pace and successful application of machine learning research and development has seen widespread deployment of deep convolutional neural networks (CNNs). Alongside these algorithmic efforts, the compute- and memory-intensive nature of CNNs has stimulated a large amount of work in the field of hardware acceleration for these networks. In this paper, we profile the memory requirements of CNNs in terms of both on-chip memory size and off-chip memory bandwidth, in order to understand the impact of the memory system on accelerator design. We show that there are fundamental tradeoffs between performance, bandwidth, and on-chip memory. Further, this paper explores how the wide variety of CNNs for different application domains each have fundamentally different characteristics. We show that bandwidth and memory requirements for different networks, and occasionally for different layers within a network, can each vary by multiple orders of magnitude. This makes designing fast and efficient hardware for all CNN applications difficult. To remedy this, we outline heuristic design points that attempt to optimize for select dataflow scenarios.
@inproceedings{8573527,
abstract = {The rapid pace and successful application of machine learning research and development has seen widespread deployment of deep convolutional neural networks (CNNs). Alongside these algorithmic efforts, the compute- and memory-intensive nature of CNNs has stimulated a large amount of work in the field of hardware acceleration for these networks. In this paper, we profile the memory requirements of CNNs in terms of both on-chip memory size and off-chip memory bandwidth, in order to understand the impact of the memory system on accelerator design. We show that there are fundamental tradeoffs between performance, bandwidth, and on-chip memory. Further, this paper explores how the wide variety of CNNs for different application domains each have fundamentally different characteristics. We show that bandwidth and memory requirements for different networks, and occasionally for different layers within a network, can each vary by multiple orders of magnitude. This makes designing fast and efficient hardware for all CNN applications difficult. To remedy this, we outline heuristic design points that attempt to optimize for select dataflow scenarios.},
added-at = {2020-04-21T20:55:57.000+0200},
author = {{Siu}, K. and {Stuart}, D. M. and {Mahmoud}, M. and {Moshovos}, A.},
biburl = {https://www.bibsonomy.org/bibtex/2656d72f6fd27589a1c793a067e533144/sohnki},
booktitle = {2018 IEEE International Symposium on Workload Characterization (IISWC)},
description = {Memory Requirements for Convolutional Neural Network Hardware Accelerators - IEEE Conference Publication},
doi = {10.1109/IISWC.2018.8573527},
interhash = {f21dc90e7a928bb53c893e2d7dcbcf99},
intrahash = {656d72f6fd27589a1c793a067e533144},
keywords = {order1 real-time},
month = {Sep.},
pages = {111-121},
timestamp = {2020-06-02T20:00:22.000+0200},
title = {Memory Requirements for Convolutional Neural Network Hardware Accelerators},
url = {https://ieeexplore.ieee.org/document/8573527},
year = 2018
}