{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:13:47Z","timestamp":1774631627705,"version":"3.50.1"},"reference-count":61,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2018,2,1]],"date-time":"2018-02-01T00:00:00Z","timestamp":1517443200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"name":"Swiss National Science Foundation MicroLearn: Micropower Deep Learning armasuisse Science and Technology","award":["162524"],"award-info":[{"award-number":["162524"]}]},{"name":"ERC MultiTherman project","award":["ERC-AdG-291125"],"award-info":[{"award-number":["ERC-AdG-291125"]}]},{"name":"European Unions Horizon 2020 research and innovation programme OPRECOMP","award":["732631"],"award-info":[{"award-number":["732631"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2018,2,1]]},"DOI":"10.1109\/tpds.2017.2752706","type":"journal-article","created":{"date-parts":[[2017,9,15]],"date-time":"2017-09-15T18:40:35Z","timestamp":1505500835000},"page":"420-434","source":"Crossref","is-referenced-by-count":68,"title":["Neurostream: Scalable and Energy Efficient Deep Learning with Smart Memory Cubes"],"prefix":"10.1109","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4934-0332","authenticated-orcid":false,"given":"Erfan","family":"Azarkhish","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0651-5393","authenticated-orcid":false,"given":"Davide","family":"Rossi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3852-4662","authenticated-orcid":false,"given":"Igor","family":"Loi","sequence":"additional","affiliation":[]},{"given":"Luca","family":"Benini","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","year":"2016"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2016.2616357"},{"key":"ref33","year":"0"},{"key":"ref32","article-title":"Identity mappings in deep residual networks","volume":"abs 1603 5027","author":"he","year":"2016","journal-title":"CoRR"},{"key":"ref31","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/2744769.2744788"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2412549"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2503292"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3850\/9783981537079_0849"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/MWSCAS.2012.6292202"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2016.7418034"},{"key":"ref61","article-title":"GPU-based deep learning inference: A performance and power analysis","year":"2015"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2016.7417941"},{"key":"ref29","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.220"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref20","article-title":"Scaling deep learning on multiple in-memory processors","author":"xu","year":"2015","journal-title":"Workshop on Near-Data Processing"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001140"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001178"},{"key":"ref24","first-page":"19","author":"azarkhish","year":"2016","journal-title":"Design and Evaluation of a Processing-in-Memory Architecture for the Smart Memory Cube"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3093337.3037702"},{"key":"ref26","year":"2015","journal-title":"Hybrid Memory Cube Specification 2 1"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2016.2570283"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/2742854.2747288"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2014.2340013"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.2172\/1104707"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2014.6757327"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cdt.2013.0031"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/VLSIT.2012.6242474"},{"key":"ref55","first-page":"145","article-title":"Memory-centric system interconnect design with hybrid memory cubes","author":"kim","year":"2013","journal-title":"Proc Int l Conf Parallel Architectures and Compilation Techniques"},{"key":"ref54","year":"2017"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2013.6738831"},{"key":"ref52","year":"2016"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2014.2357019"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001179"},{"key":"ref12","article-title":"cuDNN: Efficient primitives for deep learning","volume":"abs 1410 759","author":"chetlur","year":"2014","journal-title":"CoRR"},{"key":"ref13","first-page":"161","article-title":"Optimizing FPGA-based accelerator design for deep convolutional neural networks","author":"zhang","year":"2015","journal-title":"Proc ACM\/SIGDA Int Symp Field-Programmable Gate Arrays"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2011.5981829"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/2966986.2967011"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2015.41"},{"key":"ref18","first-page":"1","article-title":"A 803 GOp\/s\/W convolutional network accelerator","volume":"pp","author":"cavigelli","year":"2016","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1145\/2872887.2750389","article-title":"ShiDianNao: Shifting vision processing closer to the sensor","volume":"43","author":"du","year":"2015","journal-title":"SIGARCH Comput Archit News"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2015.00036"},{"key":"ref3","author":"lin","year":"2014","journal-title":"Microsoft coco Common objects in context"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"543","DOI":"10.1145\/2522848.2531745","article-title":"Combining modality specific deep neural networks for emotion recognition in video","author":"kahou","year":"2013","journal-title":"Proc 15th ACM Int Conf Multimodal Interaction"},{"key":"ref5","article-title":"Very deep convolutional networks for large-scale image recognition","volume":"abs 1409 1556","author":"simonyan","year":"2014","journal-title":"CoRR"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581275"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/2987354.2987355"},{"key":"ref49","year":"2016"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.289"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2442980"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2414943"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/j.sse.2015.11.015"},{"key":"ref47","article-title":"A near-threshold RISC-V core with DSP extensions for scalable IoT endpoint devices","volume":"abs 1608 8376","author":"gautschi","year":"2016","journal-title":"CoRR"},{"key":"ref42","article-title":"TensorFlow: Large-scale machine learning on heterogeneous distributed systems","volume":"abs 1603 4467","author":"abadi","year":"2016","journal-title":"CoRR"},{"key":"ref41","article-title":"Deep learning benchmarks of NVIDIA tesla P100 PCIe, tesla K80, and tesla M40 GPUs","author":"murphy","year":"2017"},{"key":"ref44","first-page":"1235","article-title":"MLlib: Machine learning in apache spark","volume":"17","author":"meng","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref43","year":"2016"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/8258926\/08038819.pdf?arnumber=8038819","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,25]],"date-time":"2023-08-25T23:12:37Z","timestamp":1693005157000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8038819\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,2,1]]},"references-count":61,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2017.2752706","relation":{},"ISSN":["1045-9219"],"issn-type":[{"value":"1045-9219","type":"print"}],"subject":[],"published":{"date-parts":[[2018,2,1]]}}}