{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T05:10:08Z","timestamp":1771045808470,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":69,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,11,2]],"date-time":"2020-11-02T00:00:00Z","timestamp":1604275200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["1909900"],"award-info":[{"award-number":["1909900"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,11,2]]},"DOI":"10.1145\/3400302.3415639","type":"proceedings-article","created":{"date-parts":[[2020,12,18]],"date-time":"2020-12-18T01:16:38Z","timestamp":1608254198000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":89,"title":["GAMMA"],"prefix":"10.1145","author":[{"given":"Sheng-Chun","family":"Kao","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology"}]},{"given":"Tushar","family":"Krishna","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology"}]}],"member":"320","published-online":{"date-parts":[[2020,12,17]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2017. NVDLA Deep Learning Accelerator. http:\/\/nvdla.org.  2017. NVDLA Deep Learning Accelerator. http:\/\/nvdla.org."},{"key":"e_1_3_2_1_2_1","unstructured":"2020. MAESTRO tool. http:\/\/maestro.ece.gatech.edu\/.  2020. MAESTRO tool. http:\/\/maestro.ece.gatech.edu\/."},{"key":"e_1_3_2_1_3_1","volume-title":"Tensorflow: A system for large-scale machine learning. In OSDI 16. 265--283.","author":"Mart\u00edn Abadi","year":"2016"},{"key":"e_1_3_2_1_4_1","unstructured":"Byung Hoon Ahn etal 2019. Reinforcement Learning and Adaptive Sampling for Optimized DNN Compilation. arXiv preprint arXiv:1905.12799 (2019).  Byung Hoon Ahn et al. 2019. Reinforcement Learning and Adaptive Sampling for Optimized DNN Compilation. arXiv preprint arXiv:1905.12799 (2019)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2742060.2743766"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Srimat Chakradhar et al. 2010. A dynamically configurable coprocessor for convolutional neural networks. In ISCA. 247--257.  Srimat Chakradhar et al. 2010. A dynamically configurable coprocessor for convolutional neural networks. In ISCA. 247--257.","DOI":"10.1145\/1816038.1815993"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_8_1","unstructured":"Tianqi Chen et al. 2018. Learning to optimize tensor programs. In Advances in Neural Information Processing Systems. 3389--3400.  Tianqi Chen et al. 2018. Learning to optimize tensor programs. In Advances in Neural Information Processing Systems. 3389--3400."},{"key":"e_1_3_2_1_9_1","volume-title":"TVM: An automated end-to-end optimizing compiler for deep learning. In OSDI 18. 578--594.","author":"Tianqi Chen","year":"2018"},{"key":"e_1_3_2_1_10_1","first-page":"367","article-title":"Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks","volume":"44","author":"Chen Yu-Hsin","year":"2016","journal-title":"ISCA"},{"key":"e_1_3_2_1_11_1","volume-title":"Yu-Hsin and others","author":"Chen","year":"2016"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.790600"},{"key":"e_1_3_2_1_13_1","unstructured":"Christopher C Cox. 2017. A Comparison of Active and Passive Portfolio Management. (2017).  Christopher C Cox. 2017. A Comparison of Active and Passive Portfolio Management. (2017)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3358198"},{"key":"e_1_3_2_1_15_1","unstructured":"Zidong Du et al. 2015. ShiDianNao: Shifting vision processing closer to the sensor. In ISCA.  Zidong Du et al. 2015. ShiDianNao: Shifting vision processing closer to the sensor. In ISCA."},{"key":"e_1_3_2_1_16_1","volume-title":"Cvpr 2011 Workshops. IEEE, 109--116","author":"Cl\u00e9ment"},{"key":"e_1_3_2_1_17_1","volume-title":"Tetris: Scalable and efficient neural network acceleration with 3d memory. In ASPLOS. 751--764.","author":"Mingyu Gao","year":"2017"},{"key":"e_1_3_2_1_18_1","volume-title":"Tangram: Optimized coarse-grained dataflow for scalable NN accelerators. In ASPLOS. 807--820.","author":"Mingyu Gao","year":"2019"},{"key":"e_1_3_2_1_19_1","unstructured":"Suyog Gupta et al. 2015. Deep learning with limited numerical precision. In ICML. 1737--1746.  Suyog Gupta et al. 2015. Deep learning with limited numerical precision. In ICML. 1737--1746."},{"key":"e_1_3_2_1_20_1","volume-title":"Towards a new evolutionary computation","author":"Hansen Nikolaus"},{"key":"e_1_3_2_1_21_1","unstructured":"Kaiming He et al. 2016. Deep residual learning for image recognition. In CVPR. 770--778.  Kaiming He et al. 2016. Deep residual learning for image recognition. In CVPR. 770--778."},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on PPSN3. Springer, 26--36","author":"Michael"},{"key":"e_1_3_2_1_23_1","volume-title":"Genetic algorithms. Scientific american 267, 1","author":"Holland John H","year":"1992"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.265940"},{"key":"e_1_3_2_1_25_1","unstructured":"Norman P Jouppi etal 2017. In-datacenter performance analysis of a tensor processing unit. In ISCA. IEEE 1--12.  Norman P Jouppi et al. 2017. In-datacenter performance analysis of a tensor processing unit. In ISCA. IEEE 1--12."},{"key":"e_1_3_2_1_26_1","volume-title":"ICNN","volume":"4","author":"James","year":"1942"},{"key":"e_1_3_2_1_27_1","unstructured":"Diederik P Kingma etal 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013).  Diederik P Kingma et al. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the ACM on Programming Languages 1, OOPSLA","author":"Fredrik","year":"2017"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2627373.2627387"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3296957.3173176"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Hyoukjun Kwon et al. 2019. Understanding Reuse Performance and Hardware Cost of DNN Dataflow: A Data-Centric Approach. In MICRO. 754--768.  Hyoukjun Kwon et al. 2019. Understanding Reuse Performance and Hardware Cost of DNN Dataflow: A Data-Centric Approach. In MICRO. 754--768.","DOI":"10.1145\/3352460.3358252"},{"key":"e_1_3_2_1_32_1","volume-title":"Deepx: A software accelerator for low-power deep learning inference on mobile devices","author":"Lane Nicholas D","year":"2016"},{"key":"e_1_3_2_1_33_1","volume-title":"Flexflow: A flexible dataflow accelerator architecture for convolutional neural networks","author":"Wenyan Lu","year":"2017"},{"key":"e_1_3_2_1_34_1","unstructured":"Yufei Ma et al. 2017. Optimizing loop operation and dataflow in FPGA acceleration of deep convolutional neural networks. In FPGA'17. 45--54.  Yufei Ma et al. 2017. Optimizing loop operation and dataflow in FPGA acceleration of deep convolutional neural networks. In FPGA'17. 45--54."},{"key":"e_1_3_2_1_35_1","volume-title":"Timeloop: A systematic approach to dnn accelerator evaluation","author":"Angshuman Parashar","year":"2019"},{"key":"e_1_3_2_1_36_1","unstructured":"Seongwook Park et al. 2015. 4.6 A1. 93TOPS\/W scalable deep learning\/inference processor with tetra-parallel MIMD architecture for big-data applications. In 2015 ISSCC Digest of Technical Papers. IEEE 1--3.  Seongwook Park et al. 2015. 4.6 A1. 93TOPS\/W scalable deep learning\/inference processor with tetra-parallel MIMD architecture for big-data applications. In 2015 ISSCC Digest of Technical Papers. IEEE 1--3."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Maurice Peemen et al. 2013. Memory-centric accelerator design for convolutional neural networks. In 31st ICCD. IEEE 13--19.  Maurice Peemen et al. 2013. Memory-centric accelerator design for convolutional neural networks. In 31st ICCD. IEEE 13--19.","DOI":"10.1109\/ICCD.2013.6657019"},{"key":"e_1_3_2_1_38_1","volume-title":"Handbook of Optimization","author":"Price Kenneth V"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2499370.2462176"},{"key":"e_1_3_2_1_40_1","unstructured":"J. Rapin and O. Teytaud. 2018. Nevergrad - A gradient-free optimization platform. https:\/\/GitHub.com\/FacebookResearch\/Nevergrad.  J. Rapin and O. Teytaud. 2018. Nevergrad - A gradient-free optimization platform. https:\/\/GitHub.com\/FacebookResearch\/Nevergrad."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Brandon Reagen et al. 2017. A case for efficient accelerator design space exploration via Bayesian optimization. In ISLPED. IEEE 1--6.  Brandon Reagen et al. 2017. A case for efficient accelerator design space exploration via Bayesian optimization. In ISLPED. IEEE 1--6.","DOI":"10.1109\/ISLPED.2017.8009208"},{"key":"e_1_3_2_1_42_1","volume-title":"Stuttgart, 1973","author":"Rechenberg Ingo","year":"1994"},{"key":"e_1_3_2_1_43_1","unstructured":"Tim Salimans et al. 2017. Evolution strategies as a scalable alternative to reinforcement learning. arXiv (2017).  Tim Salimans et al. 2017. Evolution strategies as a scalable alternative to reinforcement learning. arXiv (2017)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Mark Sandler et al. 2018. Mobilenetv2: Inverted residuals and linear bottlenecks. In CVPR. 4510--4520.  Mark Sandler et al. 2018. Mobilenetv2: Inverted residuals and linear bottlenecks. In CVPR. 4510--4520.","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2490301.2451150"},{"key":"e_1_3_2_1_46_1","volume-title":"Simba: Scaling deep-learning inference with multi-chip-module-based architecture. In MICRO. 14--27.","author":"Shao Yakun Sophia","year":"2019"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Yongming Shen et al. 2017. Maximizing CNN accelerator efficiency through resource partitioning. In ISCA. IEEE 535--547.  Yongming Shen et al. 2017. Maximizing CNN accelerator efficiency through resource partitioning. In ISCA. IEEE 535--547.","DOI":"10.1145\/3140659.3080221"},{"key":"e_1_3_2_1_48_1","volume-title":"5th Heterogeneous Computing Workshop (HCW'96)","author":"Pankaj"},{"key":"e_1_3_2_1_49_1","unstructured":"Karen Simonyan et al. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014).  Karen Simonyan et al. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_50_1","unstructured":"Harmel Singh et al. 1996. Mapping and scheduling heterogeneous task graphs using genetic algorithms. In 5th IEEE heterogeneous computing workshop (HCW'96). 86--97.  Harmel Singh et al. 1996. Mapping and scheduling heterogeneous task graphs using genetic algorithms. In 5th IEEE heterogeneous computing workshop (HCW'96). 86--97."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Linghao Song et al. 2019. HyPar: Towards hybrid parallelism for deep learning accelerator array. In HPCA. IEEE 56--68.  Linghao Song et al. 2019. HyPar: Towards hybrid parallelism for deep learning accelerator array. In HPCA. IEEE 56--68.","DOI":"10.1109\/HPCA.2019.00027"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Mingcong Song et al. 2018. Towards efficient microarchitectural design for accelerating unsupervised gan-based deep learning. In HPCA. IEEE 66--77.  Mingcong Song et al. 2018. Towards efficient microarchitectural design for accelerating unsupervised gan-based deep learning. In HPCA. IEEE 66--77.","DOI":"10.1109\/HPCA.2018.00016"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Michel Steuwer et al. 2017. Lift: a functional data-parallel IR for high-performance GPU code generation. In CGO. IEEE 74--85.  Michel Steuwer et al. 2017. Lift: a functional data-parallel IR for high-performance GPU code generation. In CGO. IEEE 74--85.","DOI":"10.1109\/CGO.2017.7863730"},{"key":"e_1_3_2_1_54_1","unstructured":"Arthur Stoutchinin et al. 2019. Optimally scheduling CNN convolutions for efficient memory access. arXiv preprint arXiv:1902.01492 (2019).  Arthur Stoutchinin et al. 2019. Optimally scheduling CNN convolutions for efficient memory access. arXiv preprint arXiv:1902.01492 (2019)."},{"key":"e_1_3_2_1_55_1","unstructured":"Felipe Petroski Such etal 2017. Deep neuroevolution: Genetic algorithms are a competitive alternative for training deep neural networks for reinforcement learning. arXiv (2017).  Felipe Petroski Such et al. 2017. Deep neuroevolution: Genetic algorithms are a competitive alternative for training deep neural networks for reinforcement learning. arXiv (2017)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Naveen Suda et al. 2016. Throughput-optimized OpenCL-based FPGA accelerator for large-scale convolutional neural networks. In FPGA'16. 16--25.  Naveen Suda et al. 2016. Throughput-optimized OpenCL-based FPGA accelerator for large-scale convolutional neural networks. In FPGA'16. 16--25.","DOI":"10.1145\/2847263.2847276"},{"key":"e_1_3_2_1_57_1","unstructured":"Ilya Sutskever et al. 2014. Sequence to sequence learning with neural networks. In Advances in neural information processing systems. 3104--3112.  Ilya Sutskever et al. 2014. Sequence to sequence learning with neural networks. In Advances in neural information processing systems. 3104--3112."},{"key":"e_1_3_2_1_58_1","volume-title":"21st AAAI conference on artificial intelligence.","author":"Christian"},{"key":"e_1_3_2_1_59_1","volume-title":"Efficientnet: Rethinking model scaling for convolutional neural networks. arXiv preprint arXiv:1905.11946","author":"Mingxing Tan","year":"2019"},{"key":"e_1_3_2_1_60_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2820--2828","author":"Mingxing"},{"key":"e_1_3_2_1_61_1","unstructured":"Nicolas Vasilache et al. 2018. Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions. arXiv preprint arXiv:1802.04730 (2018).  Nicolas Vasilache et al. 2018. Tensor comprehensions: Framework-agnostic high-performance machine learning abstractions. arXiv preprint arXiv:1802.04730 (2018)."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080244"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2931584"},{"key":"e_1_3_2_1_64_1","volume-title":"Proc. Heterogeneous Computing Workshop. 72--85","author":"Lee"},{"key":"e_1_3_2_1_65_1","volume-title":"DLVM: A modern compiler infrastructure for deep learning systems. arXiv preprint arXiv:1711.03016","author":"Richard Wei","year":"2017"},{"key":"e_1_3_2_1_66_1","unstructured":"Xuechao Wei et al. 2017. Automated systolic array architecture synthesis for high throughput CNN inference on FPGAs. In DAC. 1--6.  Xuechao Wei et al. 2017. Automated systolic array architecture synthesis for high throughput CNN inference on FPGAs. In DAC. 1--6."},{"key":"e_1_3_2_1_67_1","volume-title":"Interstellar: Using Halide's Scheduling Language to Analyze DNN Accelerators. In ASPLOS. 369--383.","author":"Xuan Yang","year":"2020"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"crossref","unstructured":"Chen Zhang et al. 2015. Optimizing fpga-based accelerator design for deep convolutional neural networks. In FPGA'15. 161--170.  Chen Zhang et al. 2015. Optimizing fpga-based accelerator design for deep convolutional neural networks. In FPGA'15. 161--170.","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"crossref","unstructured":"Chen Zhang et al. 2015. Optimizing fpga-based accelerator design for deep convolutional neural networks. In FPGA'15. 161--170.  Chen Zhang et al. 2015. Optimizing fpga-based accelerator design for deep convolutional neural networks. In FPGA'15. 161--170.","DOI":"10.1145\/2684746.2689060"}],"event":{"name":"ICCAD '20: IEEE\/ACM International Conference on Computer-Aided Design","location":"Virtual Event USA","acronym":"ICCAD '20","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE CEDA","IEEE CS"]},"container-title":["Proceedings of the 39th International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3400302.3415639","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3400302.3415639","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3400302.3415639","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:31:41Z","timestamp":1750195901000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3400302.3415639"}},"subtitle":["automating the HW mapping of DNN models on accelerators via genetic algorithm"],"short-title":[],"issued":{"date-parts":[[2020,11,2]]},"references-count":69,"alternative-id":["10.1145\/3400302.3415639","10.1145\/3400302"],"URL":"https:\/\/doi.org\/10.1145\/3400302.3415639","relation":{},"subject":[],"published":{"date-parts":[[2020,11,2]]},"assertion":[{"value":"2020-12-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}