{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:18:20Z","timestamp":1777655900308,"version":"3.51.4"},"reference-count":37,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,4]]},"DOI":"10.1109\/ispass.2017.7975270","type":"proceedings-article","created":{"date-parts":[[2017,7,13]],"date-time":"2017-07-13T20:47:13Z","timestamp":1499978833000},"page":"55-64","source":"Crossref","is-referenced-by-count":89,"title":["Performance analysis of CNN frameworks for GPUs"],"prefix":"10.1109","author":[{"given":"Heehoon","family":"Kim","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyoungwook","family":"Nam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wookeun","family":"Jung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jaejin","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","first-page":"1223","article-title":"Large scale distributed deep networks","volume":"25","author":"dean","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611970364"},{"key":"ref31","first-page":"27","volume":"15","author":"nvidia","year":"2008","journal-title":"CUBLAS Library"},{"key":"ref30","article-title":"Online algorithms and stochastic approximations","author":"bottou","year":"1998","journal-title":"Online Learning and Neural Networks"},{"key":"ref37","article-title":"Optimizing parallel reduction in cuda","volume":"2","author":"harris","year":"2007","journal-title":"NVIDIA Developer Technology"},{"key":"ref36","author":"chintala","year":"0","journal-title":"convnet-benchmarks"},{"key":"ref35","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2014-274","article-title":"1-bit stochastic gradient descent and application to data-parallel distributed training of speech dnns","author":"seide","year":"2014","journal-title":"Proc INTERSPEECH 2014"},{"key":"ref34","article-title":"Deep compression: Compressing deep neural network with pruning, trained quantization and huffman coding","volume":"abs 1510 149","author":"han","year":"2015","journal-title":"CoRR"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref11","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","year":"2015","journal-title":"software available from tensorflow org"},{"key":"ref12","article-title":"Torch7: A matlab-like environment for machine learning","author":"collobert","year":"2011","journal-title":"BigLearn NIPS Workshop"},{"key":"ref13","article-title":"An introduction to computational networks and the computational network toolkit","year":"2014","journal-title":"Tech Rep"},{"key":"ref14","article-title":"cudnn: Efficient primitives for deep learning","volume":"abs 1410 759","author":"chetlur","year":"2014","journal-title":"CoRR"},{"key":"ref15","article-title":"Fast training of convolutional networks through ffts","volume":"abs 1312 5851","author":"mathieu","year":"2013","journal-title":"CoRR"},{"key":"ref16","article-title":"Fast convolutional nets with fbfft: A GPU performance evaluation","volume":"abs 1412 7580","author":"vasilache","year":"2014","journal-title":"CoRR"},{"key":"ref17","article-title":"Fast algorithms for convolutional neural networks","volume":"abs 1509 9308","author":"lavin","year":"2015","journal-title":"CoRR"},{"key":"ref18","article-title":"Multi-gpu training of convnets","volume":"abs 1312 5853","author":"yadan","year":"2013","journal-title":"CoRR"},{"key":"ref19","article-title":"Comparative study of caffe, neon, theano, and torch for deep learning","volume":"abs 1511 6435","author":"bahrampour","year":"2015","journal-title":"CoRR"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"ref27","article-title":"End to end learning for self-driving cars","volume":"abs 1604 7316","year":"2016","journal-title":"CoRR"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref29","article-title":"Going deeper with convolutions","volume":"abs 1409 4842","author":"szegedy","year":"2014","journal-title":"CoRR"},{"key":"ref5","article-title":"Neural machine translation by jointly learning to align and translate","volume":"abs 1409 473","author":"bahdanau","year":"2014","journal-title":"CoRR"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref7","article-title":"Imagenet large scale visual recognition challenge","volume":"abs 1409 575","year":"2014","journal-title":"CoRR"},{"key":"ref2","article-title":"Very deep convolutional networks for large-scale image recognition","volume":"abs 1409 1556","author":"simonyan","year":"2014","journal-title":"CoRR"},{"key":"ref9","article-title":"Theano: A python framework for fast computation of mathematical expressions","volume":"abs 1605 2688","author":"al-rfou","year":"2016","journal-title":"CoRR"},{"key":"ref1","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CCBD.2016.029"},{"key":"ref22","year":"0","journal-title":"Cuda-convnet"},{"key":"ref21","year":"0","journal-title":"Github"},{"key":"ref24","article-title":"Pylearn2: a machine learning research library","author":"goodfellow","year":"2013","journal-title":"arXiv preprint arXiv 1308 4214"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref26","year":"0","journal-title":"Lasagne documentation"},{"key":"ref25","author":"chollet","year":"2015","journal-title":"Keras"}],"event":{"name":"2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","location":"Santa Rosa, CA, USA","start":{"date-parts":[[2017,4,24]]},"end":{"date-parts":[[2017,4,25]]}},"container-title":["2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7965804\/7975258\/07975270.pdf?arnumber=7975270","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,24]],"date-time":"2023-08-24T13:44:16Z","timestamp":1692884656000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7975270\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,4]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/ispass.2017.7975270","relation":{},"subject":[],"published":{"date-parts":[[2017,4]]}}}