{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T19:34:31Z","timestamp":1771702471216,"version":"3.50.1"},"reference-count":27,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1109\/ipdps.2017.20","type":"proceedings-article","created":{"date-parts":[[2017,7,3]],"date-time":"2017-07-03T20:41:58Z","timestamp":1499114518000},"page":"615-624","source":"Crossref","is-referenced-by-count":68,"title":["swDNN: A Library for Accelerating Deep Learning Applications on Sunway TaihuLight"],"prefix":"10.1109","author":[{"given":"Jiarui","family":"Fang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haohuan","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenlai","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bingwei","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weijie","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangwen","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"1097","article-title":"Im-agenet classification with deep convolutional neural networks","author":"alex","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1145\/2644865.2541967","article-title":"Diannao: A small-footprint high-throughput accelerator for ubiquitous machine-learning","volume":"49","author":"tianshi","year":"2014","journal-title":"ACM SIGPLAN Notices"},{"key":"ref12","first-page":"609","article-title":"Dadiannao: A machine-learning supercomputer","author":"yunji","year":"2014","journal-title":"Proc the annual IEEE\/ACM International Symposium on Microarchitecture"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"369","DOI":"10.1145\/2786763.2694358","article-title":"Pudiannao: A polyvalent machine learning accelerator","volume":"43","author":"daofu","year":"2015","journal-title":"ACM SIGARCH Comput Arch News"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"92","DOI":"10.1145\/2872887.2750389","article-title":"Shidiannao: shifting vision processing closer to the sensor","volume":"43","author":"zidong","year":"2015","journal-title":"ACM SIGARCH Comput Arch News"},{"key":"ref15","first-page":"1","article-title":"The sunway taihulight supercomputer: system and applications","author":"haohuan","year":"2016","journal-title":"Science China Information Sciences"},{"key":"ref16","year":"0"},{"key":"ref17","first-page":"675","article-title":"Caffe: Convolutional architecture for fast feature embedding","author":"yangqing","year":"2014","journal-title":"Proceedings of the 2nd ACM International Multimedia Conference"},{"key":"ref18","author":"martin","year":"2016","journal-title":"Tensorflow Large-scale machine learning on heterogeneous distributed systems"},{"key":"ref19","author":"andrew","year":"2015","journal-title":"maxDNN an efficient convolution kernel for deep learning with maxwell gpus"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","article-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four researchgroups","volume":"29","author":"geoffrey","year":"2012","journal-title":"IEEE Signal Processing Magazine"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"},{"key":"ref3","first-page":"2892","article-title":"Deeply learned face representations are sparse, selective, and robust","author":"yi","year":"2015","journal-title":"Proceedings of the IEEE Conference on ComputerVision and Pattern Recognition"},{"key":"ref6","author":"volodymyr","year":"2013","journal-title":"Playing atari with deep reinforcement learning"},{"key":"ref5","doi-asserted-by":"crossref","first-page":"30","DOI":"10.1109\/TASL.2011.2134090","article-title":"Context-dependent pre-trained deep neural networks for large- vocabulary speech recognition","volume":"20","author":"george","year":"2012","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"key":"ref8","year":"2015","journal-title":"NVIDIA Nvidia tegra drive px Self-driving car computer"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"david","year":"2016","journal-title":"Nature"},{"key":"ref2","author":"karen","year":"2014","journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition"},{"key":"ref9","author":"sharan","year":"2014","journal-title":"cuDNN Efficient Primitives for Deep Learning"},{"key":"ref1","first-page":"818","article-title":"Visualizing and understanding convolutional networks","author":"matthew","year":"2014","journal-title":"European Conference on Computer Vision"},{"key":"ref20","first-page":"2","article-title":"Caffe con troll: Shallow ideas to speed up deep learning","author":"stefan","year":"2015","journal-title":"Proceedings of the Fourth Workshop on Data analytics in the Cloud"},{"key":"ref22","author":"andrew","year":"2015","journal-title":"Fast algorithms for convolutional neural networks"},{"key":"ref21","author":"nicolas","year":"2014","journal-title":"Fast convolutional nets with fbfft A GPU performance evaluation"},{"key":"ref24","first-page":"26","article-title":"Going deeper with embedded fpga platform for convolutional neural network","author":"jiantao","year":"2016","journal-title":"Proceedings of the 2016 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays"},{"key":"ref23","first-page":"161","article-title":"Optimizing fpga-based accelerator design for deep convolutional neural networks","author":"chen","year":"2015","journal-title":"Proceedings of the 2015 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays"},{"key":"ref26","first-page":"326","article-title":"Energy-efficient cnn implementation on a deeply pipelined fpga cluster","author":"chen","year":"2016","journal-title":"Proceedings of the 2016 International Symposium on Low Power Electronics and Design"},{"key":"ref25","first-page":"16","article-title":"Throughput -optimized opencl-based fpga accelerator for large-scale convolutional neural networks","author":"naveen","year":"2016","journal-title":"Proceedings of the 2016 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays"}],"event":{"name":"2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","location":"Orlando, FL, USA","start":{"date-parts":[[2017,5,29]]},"end":{"date-parts":[[2017,6,2]]}},"container-title":["2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7965806\/7967079\/07967152.pdf?arnumber=7967152","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,11]],"date-time":"2020-10-11T12:36:32Z","timestamp":1602419792000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7967152\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/ipdps.2017.20","relation":{},"subject":[],"published":{"date-parts":[[2017,5]]}}}