{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:38:42Z","timestamp":1740123522641,"version":"3.37.3"},"reference-count":34,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,5,25]],"date-time":"2021-05-25T00:00:00Z","timestamp":1621900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,5,25]],"date-time":"2021-05-25T00:00:00Z","timestamp":1621900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,1]]},"DOI":"10.1007\/s11227-021-03893-3","type":"journal-article","created":{"date-parts":[[2021,5,25]],"date-time":"2021-05-25T14:02:57Z","timestamp":1621951377000},"page":"238-257","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Mapping and virtual neuron assignment algorithms for MAERI accelerator"],"prefix":"10.1007","volume":"78","author":[{"given":"Midia","family":"Reshadi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8147-5221","authenticated-orcid":false,"given":"Seyedeh Yasaman Hosseini","family":"Mirmahaleh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,5,25]]},"reference":[{"key":"3893_CR1","doi-asserted-by":"crossref","unstructured":"Andri R, Cavigelli L, Rossi D, Benini L (2018) Hyperdrive: a systolically scalable binary-weight CNN inference engine for mW IoT end-nodes. In: 2018 IEEE Computer Society Annual Symposium on VLSI (ISVLSI). IEEE, pp 509\u2013515","DOI":"10.1109\/ISVLSI.2018.00099"},{"issue":"12","key":"3893_CR2","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1109\/JPROC.2017.2761740","volume":"105","author":"V Sze","year":"2017","unstructured":"Sze V, Chen YH, Yang TJ, Emer JS (2017) Efficient processing of deep neural networks: a tutorial and survey. Proc IEEE 105(12):2295\u20132329","journal-title":"Proc IEEE"},{"key":"3893_CR3","doi-asserted-by":"crossref","unstructured":"Qin E, Samajdar A, Kwon H, Nadella V, Srinivasan S, Das D, Kaul B, Krishna T (2020) Sigma: a sparse and irregular gemm accelerator with flexible interconnects for dnn training. In: 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, pp 58\u201370","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"3893_CR4","doi-asserted-by":"crossref","unstructured":"Ascia G, Catania V, Jose J, Monteleone S, Palesi M, Patti D (2020) Improving inference latency and energy of network-on-chip based convolutional neural networks through weights compression. In: 2020 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW). IEEE, pp 54\u201363","DOI":"10.1109\/IPDPSW50202.2020.00017"},{"issue":"2","key":"3893_CR5","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1145\/3296957.3173176","volume":"53","author":"H Kwon","year":"2018","unstructured":"Kwon H, Samajdar A, Krishna T (2018) Maeri: enabling flexible dataflow mapping over dnn accelerators via reconfigurable interconnects. ACM SIGPLAN Notices 53(2):461\u2013475","journal-title":"ACM SIGPLAN Notices"},{"issue":"144","key":"3893_CR6","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1016\/j.jpdc.2020.04.011","volume":"1","author":"SY Mirmahaleh","year":"2020","unstructured":"Mirmahaleh SY, Reshadi M, Bagherzadeh N (2020) Flow mapping on mesh-based deep learning accelerator. J Parallel and Distrib Comput 1(144):80\u201397","journal-title":"J Parallel and Distrib Comput"},{"key":"3893_CR7","doi-asserted-by":"crossref","unstructured":"Chen KC, Ebrahimi M, Wang TY, Yang YC. NoC-based DNN accelerator (2019) A future design paradigm. In: Proceedings of the 13th IEEE\/ACM International Symposium on Networks-on-Chip. pp 1\u20138","DOI":"10.1145\/3313231.3352376"},{"key":"3893_CR8","doi-asserted-by":"crossref","unstructured":"Chen KC, Wang TY, Yang YC (2019) Cycle-accurate noc-based convolutional neural network simulator. In: Proceedings of the International Conference on Omni-Layer Intelligent Systems. pp 199\u2013204","DOI":"10.1145\/3312614.3312655"},{"key":"3893_CR9","doi-asserted-by":"publisher","first-page":"103145","DOI":"10.1016\/j.micpro.2020.103145","volume":"3","author":"KC Chen","year":"2020","unstructured":"Chen KC, Ebrahimi M, Wang TY, Yang YC, Liao YH (2020) A NoC-based simulator for design and evaluation of deep neural networks. Microprocess Microsyst 3:103145","journal-title":"Microprocess Microsyst"},{"key":"3893_CR10","unstructured":"Samajdar A, Zhu Y, Whatmough P, Mattina M, Krishna T (2018) Scale-sim: systolic cnn accelerator simulator. arXiv preprint arXiv:1811.02883"},{"key":"3893_CR11","doi-asserted-by":"publisher","unstructured":"Lahdhiri H, Palesi M, Monteleone S, Patti D, Ascia G, Lorandel J, Bourdel E, Catania V (2020) DNNZip: selective layers compression technique in deep neural network accelerators. In: 2020 23rd Euromicro Conference on Digital System Design (DSD). pp 526\u2013533. https:\/\/doi.org\/10.1109\/DSD51259.2020.00088","DOI":"10.1109\/DSD51259.2020.00088"},{"key":"3893_CR12","unstructured":"Kwon H, Pellauer M, Krishna T (2018) MAESTRO: an open-source infrastructure for modeling dataflows within deep learning accelerators. arXiv preprint arXiv:1805.02566v1"},{"key":"3893_CR13","doi-asserted-by":"crossref","unstructured":"Zhao Z, Kwon H, Kuhar S, Sheng W, Mao Z, Krishna T (2019) mRNA: enabling efficient mapping space exploration for a reconfiguration neural accelerator. In: 2019 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). IEEE, pp 282\u2013292","DOI":"10.1109\/ISPASS.2019.00040"},{"issue":"1","key":"3893_CR14","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1109\/JSSC.2016.2616357","volume":"52","author":"YH Chen","year":"2016","unstructured":"Chen YH, Krishna T, Emer JS, Sze V (2016) Eyeriss: an energy-efficient reconfigurable accelerator for deep convolutional neural networks. IEEE J Solid-State Circuits 52(1):127\u2013138","journal-title":"IEEE J Solid-State Circuits"},{"key":"3893_CR15","unstructured":"Chen YH, Emer J, Sze V (2018) Eyeriss v2: a flexible and high-performance accelerator for emerging deep neural networks. arXiv preprint arxiv:1807.07928"},{"key":"3893_CR16","doi-asserted-by":"crossref","unstructured":"Kwon H, Samajdar A, Krishna T (2017) Rethinking nocs for spatial neural network accelerators. In: 2017 Eleventh IEEE\/ACM International Symposium on Networks-on-Chip (NOCS). IEEE","DOI":"10.1145\/3130218.3130230"},{"key":"3893_CR17","doi-asserted-by":"crossref","unstructured":"Du Z, Fasthuber R, Chen T, Ienne P, Li L, Luo T, Feng X, Chen Y, Temam O (2015) ShiDianNao: Shifting vision processing closer to the sensor. In: Proceedings of the 42nd Annual International Symposium on Computer Architecture. pp 92\u2013104","DOI":"10.1145\/2749469.2750389"},{"key":"3893_CR18","doi-asserted-by":"crossref","unstructured":"Jouppi NP, Young C, Patil N, Patterson D, Agrawal G, Bajwa R, Bates S, Bhatia S, Boden N, Borchers A, Boyle R (2017) In-datacenter performance analysis of a tensor processing unit. In: Proceedings of the 44th Annual International Symposium on Computer Architecture. pp 1\u201312","DOI":"10.1145\/3079856.3080246"},{"key":"3893_CR19","unstructured":"Abadi M, Agarwal A, Barham P, Brevdo E, Chen Z, Citro C, Corrado GS, Davis A, Dean J, Devin M, Ghemawat S (2016) Tensorflow: large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467"},{"key":"3893_CR20","doi-asserted-by":"crossref","unstructured":"Kwon H, Chatarasi P, Pellauer M, Parashar A, Sarkar V, Krishna T (2019) Understanding reuse, performance, and hardware cost of dnn dataflow: a data-centric approach. In: Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture. pp 754\u2013768","DOI":"10.1145\/3352460.3358252"},{"key":"3893_CR21","doi-asserted-by":"crossref","unstructured":"Lu W, Yan G, Li J, Gong S, Han Y, Li X (2017) Flexflow: A flexible dataflow accelerator architecture for convolutional neural networks. In: 2017 IEEE International Symposium on High Performance Computer Architecture (HPCA). IEEE, pp 553\u2013564","DOI":"10.1109\/HPCA.2017.29"},{"issue":"6","key":"3893_CR22","doi-asserted-by":"publisher","first-page":"495","DOI":"10.1145\/2499370.2462163","volume":"48","author":"T Nowatzki","year":"2013","unstructured":"Nowatzki T, Sartin-Tarm M, De Carli L, Sankaralingam K, Estan C, Robatmili B (2013) A general constraint-centric scheduling framework for spatial architectures. ACM SIGPLAN Notices 48(6):495\u2013506","journal-title":"ACM SIGPLAN Notices"},{"key":"3893_CR23","doi-asserted-by":"crossref","unstructured":"Nowatzki T, Gangadhar V, Ardalani N, Sankaralingam K (2017) Stream-dataflow acceleration. In: 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA). IEEE, pp 416\u2013429","DOI":"10.1145\/3079856.3080255"},{"key":"3893_CR24","unstructured":"Tang T, Xie Y (2018) Mlpat: a power area timing modeling framework for machine learning accelerators. In: Proc. DOSSA Workshop. pp 1\u20133"},{"key":"3893_CR25","doi-asserted-by":"crossref","unstructured":"Gao M, Pu J, Yang X, Horowitz M, Kozyrakis C (2017) Tetris: scalable and efficient neural network acceleration with 3d memory. In: Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems. pp 751\u2013764","DOI":"10.1145\/3093315.3037702"},{"key":"3893_CR26","doi-asserted-by":"crossref","unstructured":"Firuzan A, Modarressi M, Daneshtalab M, Reshadi M (2018) Reconfigurable network-on-chip for 3D neural network accelerators. In: 2018 Twelfth IEEE\/ACM International Symposium on Networks-on-Chip (NOCS). IEEE, pp 1\u20138","DOI":"10.1109\/NOCS.2018.8512170"},{"issue":"3","key":"3893_CR27","first-page":"513","volume":"36","author":"C Wang","year":"2016","unstructured":"Wang C, Gong L, Yu Q, Li X, Xie Y, Zhou X (2016) DLAU: a scalable deep learning accelerator unit on FPGA. IEEE Trans Comput Aided Des Integr Circuits Syst 36(3):513\u2013517","journal-title":"IEEE Trans Comput Aided Des Integr Circuits Syst"},{"key":"3893_CR28","doi-asserted-by":"crossref","unstructured":"Mirmahaleh SY, Reshadi M, Shabani H, Guo X, Bagherzadeh N (2019) Flow mapping and data distribution on mesh-based deep learning accelerator. In: Proceedings of the 13th IEEE\/ACM International Symposium on Networks-on-Chip. pp 1\u20138","DOI":"10.1145\/3313231.3352378"},{"issue":"1","key":"3893_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2953878","volume":"27","author":"V Catania","year":"2016","unstructured":"Catania V, Mineo A, Monteleone S, Palesi M, Patti D (2016) Cycle-accurate network on chip simulation with noxim. ACM Trans Model Comput Simula (TOMACS) 27(1):1\u201325","journal-title":"ACM Trans Model Comput Simula (TOMACS)"},{"issue":"1","key":"3893_CR30","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1145\/2654822.2541967","volume":"42","author":"T Chen","year":"2014","unstructured":"Chen T, Du Z, Sun N, Wang J, Wu C, Chen Y, Temam O (2014) Diannao: a small-footprint high-throughput accelerator for ubiquitous machine-learning. ACM SIGARCH Comput Archit News 42(1):269\u2013284","journal-title":"ACM SIGARCH Comput Archit News"},{"key":"3893_CR31","doi-asserted-by":"crossref","unstructured":"Chen Y, Luo T, Liu S, Zhang S, He L, Wang J, Li L, Chen T, Xu Z, Sun N, Temam O (2014) Dadiannao: a machine-learning supercomputer. In: 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture. IEEE, pp 609\u2013622","DOI":"10.1109\/MICRO.2014.58"},{"issue":"1","key":"3893_CR32","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1145\/2786763.2694358","volume":"43","author":"D Liu","year":"2015","unstructured":"Liu D, Chen T, Liu S, Zhou J, Zhou S, Teman O, Feng X, Zhou X, Chen Y (2015) Pudiannao: a polyvalent machine learning accelerator. ACM SIGARCH Comput Archit News 43(1):369\u2013381","journal-title":"ACM SIGARCH Comput Archit News"},{"key":"3893_CR33","unstructured":"Chatarasi P, Kwon H, Raina N, Malik S, Haridas V, Parashar A, Pellauer M, Krishna T, Sarkar V (2020) Marvel: a data-centric compiler for DNN operators on spatial accelerators. arXiv preprint arXiv:2002.07752"},{"key":"3893_CR34","unstructured":"https:\/\/github.com\/georgia-tech-synergy-lab\/mRNA"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-03893-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-021-03893-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-03893-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,4]],"date-time":"2022-01-04T12:14:21Z","timestamp":1641298461000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-021-03893-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,25]]},"references-count":34,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,1]]}},"alternative-id":["3893"],"URL":"https:\/\/doi.org\/10.1007\/s11227-021-03893-3","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2021,5,25]]},"assertion":[{"value":"13 May 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 May 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}