{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T14:45:17Z","timestamp":1776782717102,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T00:00:00Z","timestamp":1554336000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","award":["SHF-1408911"],"award-info":[{"award-number":["SHF-1408911"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"SRC Center for Research on Intelligent Storage and Processing-in-memory (CRISP)"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,4,4]]},"DOI":"10.1145\/3297858.3304014","type":"proceedings-article","created":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T18:38:43Z","timestamp":1554403123000},"page":"807-820","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":152,"title":["TANGRAM"],"prefix":"10.1145","author":[{"given":"Mingyu","family":"Gao","sequence":"first","affiliation":[{"name":"Stanford University &amp; Tsinghua University, Stanford, CA, USA"}]},{"given":"Xuan","family":"Yang","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"given":"Jing","family":"Pu","sequence":"additional","affiliation":[{"name":"Stanford University &amp; Google, Inc., Stanford, CA, USA"}]},{"given":"Mark","family":"Horowitz","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"given":"Christos","family":"Kozyrakis","sequence":"additional","affiliation":[{"name":"Stanford University &amp; Google, Inc., Stanford, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2019,4,4]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.11"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/3195638.3195664"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2004.21"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.40"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.13"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3124552"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_12_1","volume-title":"Neuflow: A Runtime Reconfigurable Dataflow Processor for Vision. In 2011 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW) . 109--116","author":"Farabet Cl\u00e9ment","year":"2011","unstructured":"Cl\u00e9ment Farabet, Berin Martini, Benoit Corda, Polina Akselrod, Eugenio Culurciello, and Yann LeCun. 2011. Neuflow: A Runtime Reconfigurable Dataflow Processor for Vision. In 2011 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW) . 109--116."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.30"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555779"},{"key":"e_1_3_2_1_17_1","volume-title":"Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 770--778","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 770--778."},{"key":"e_1_3_2_1_18_1","volume-title":"Densely Connected Convolutional Networks. In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 4700--4708","author":"Huang Gao","unstructured":"Gao Huang, Zhuang Liu, Laurens van der Maaten, and Kilian Q. Weinberger. 2017. Densely Connected Convolutional Networks. In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 4700--4708."},{"key":"e_1_3_2_1_19_1","volume-title":"Exploring Hidden Dimensions in Parallelizing Convolutional Neural Networks. In 35th International Conference on Machine Learning (ICML) .","author":"Jia Zhihao","year":"2018","unstructured":"Zhihao Jia, Sina Lin, Charles R. Qi, and Alex Aiken. 2018. Exploring Hidden Dimensions in Parallelizing Convolutional Neural Networks. In 35th International Conference on Machine Learning (ICML) ."},{"key":"e_1_3_2_1_20_1","volume-title":"Beyond Data and Model Parallelism for Deep Neural Networks. In 2nd Conference on Systems and Machine Learning (SysML) .","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, Matei Zaharia, and Alex Aiken. 2019. Beyond Data and Model Parallelism for Deep Neural Networks. In 2nd Conference on Systems and Machine Learning (SysML) ."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.41"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/2999134.2999257"},{"key":"e_1_3_2_1_24_1","volume-title":"Nature","volume":"521","author":"LeCun Yann","year":"2015","unstructured":"Yann LeCun, Yoshua Bengio, and Geoffrey Hinton. 2015. Deep Learning . Nature , Vol. 521, 7553 (2015), 436--444."},{"key":"e_1_3_2_1_25_1","volume-title":"26th International Conference on Field Programmable Logic and Applications (FPL). 1--9.","author":"Li Huimin","year":"2016","unstructured":"Huimin Li, Xitian Fan, Li Jiao, Wei Cao, Xuegong Zhou, and Lingli Wang. 2016. A High Performance FPGA-based Accelerator for Large-Scale Convolutional Neural Networks. In 26th International Conference on Field Programmable Logic and Applications (FPL). 1--9."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669172"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.42"},{"key":"e_1_3_2_1_28_1","volume-title":"FlexFlow: A Flexible Dataflow Accelerator Architecture for Convolutional Neural Networks. In 23rd International Symposium on High Performance Computer Architecture (HPCA). 553--564","author":"Lu Wenyan","year":"2017","unstructured":"Wenyan Lu, Guihai Yan, Jiajun Li, Shijun Gong, Yinhe Han, and Xiaowei Li. 2017. FlexFlow: A Flexible Dataflow Accelerator Architecture for Convolutional Neural Networks. In 23rd International Symposium on High Performance Computer Architecture (HPCA). 553--564."},{"key":"e_1_3_2_1_29_1","unstructured":"Micron Technology Inc. 2007. TN-41-01: Calculating Memory System Power for DDR3 . https:\/\/www.micron.com\/support\/tools-and-utilities\/power-calc ."},{"key":"e_1_3_2_1_30_1","unstructured":"Micron Technology Inc. 2014. Mobile LPDDR4 SDRAM: 272b: x64 Mobile LPDDR4 SDRAM Features ."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_32_1","volume-title":"Memory-Centric Accelerator Design for Convolutional Neural Networks. In 31st International Conference on Computer Design (ICCD) . 13--19","author":"Peemen Maurice","year":"2013","unstructured":"Maurice Peemen, Arnaud AA Setio, Bart Mesman, and Henk Corporaal. 2013. Memory-Centric Accelerator Design for Convolutional Neural Networks. In 31st International Conference on Computer Design (ICCD) . 13--19."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.32"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2011.4"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485963"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.12"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.5555\/3195638.3195659"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080221"},{"key":"e_1_3_2_1_39_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition . arXiv preprint arXiv:1409.1556 (Sept","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition . arXiv preprint arXiv:1409.1556 (Sept 2014)."},{"key":"e_1_3_2_1_40_1","volume-title":"PipeLayer: A Pipelined ReRAM-Based Accelerator for Deep Learning. In 23rd International Symposium on High Performance Computer Architecture (HPCA). 541--552","author":"Song Linghao","year":"2017","unstructured":"Linghao Song, Xuehai Qian, Hai Li, and Yiran Chen. 2017. PipeLayer: A Pipelined ReRAM-Based Accelerator for Deep Learning. In 23rd International Symposium on High Performance Computer Architecture (HPCA). 541--552."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969173"},{"key":"e_1_3_2_1_42_1","volume-title":"Going Deeper with Convolutions. In 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 1--9.","author":"Szegedy Christian","year":"2015","unstructured":"Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich. 2015. Going Deeper with Convolutions. In 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 1--9."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080214"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080244"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_46_1","volume-title":"Nikhil Bhagdikar, Stephen Richardson, Shahar Kvatinsky, Jonathan Ragan-Kelley, Ardavan Pedram, and Mark Horowitz.","author":"Yang Xuan","year":"2016","unstructured":"Xuan Yang, Jing Pu, Blaine Burton Rister, Nikhil Bhagdikar, Stephen Richardson, Shahar Kvatinsky, Jonathan Ragan-Kelley, Ardavan Pedram, and Mark Horowitz. 2016. A Systematic Approach to Blocking Convolutional Neural Networks . arXiv preprint arXiv:1606.04209 (Jun 2016)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080215"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.5555\/3195638.3195662"}],"event":{"name":"ASPLOS '19: Architectural Support for Programming Languages and Operating Systems","location":"Providence RI USA","acronym":"ASPLOS '19","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3297858.3304014","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3297858.3304014","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3297858.3304014","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:53:14Z","timestamp":1750204394000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3297858.3304014"}},"subtitle":["Optimized Coarse-Grained Dataflow for Scalable NN Accelerators"],"short-title":[],"issued":{"date-parts":[[2019,4,4]]},"references-count":49,"alternative-id":["10.1145\/3297858.3304014","10.1145\/3297858"],"URL":"https:\/\/doi.org\/10.1145\/3297858.3304014","relation":{},"subject":[],"published":{"date-parts":[[2019,4,4]]},"assertion":[{"value":"2019-04-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}