{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T12:49:06Z","timestamp":1751374146934,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/iiswc47752.2019.9041972","type":"proceedings-article","created":{"date-parts":[[2020,3,20]],"date-time":"2020-03-20T08:55:14Z","timestamp":1584694514000},"page":"131-142","source":"Crossref","is-referenced-by-count":5,"title":["Deep Learning Language Modeling Workloads: Where Time Goes on Graphics Processors"],"prefix":"10.1109","author":[{"given":"Ali Hadi","family":"Zadeh","sequence":"first","affiliation":[]},{"given":"Zissis","family":"Poulos","sequence":"additional","affiliation":[]},{"given":"Andreas","family":"Moshovos","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","article-title":"Adaptive input representations for neural language modeling","author":"baevski","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref11","first-page":"1081","article-title":"A scalable hierarchical distributed language model","author":"mnih","year":"2009","journal-title":"Advances in Neural Information Processing Systems 21"},{"article-title":"Exploring the limits of language modeling","year":"2016","author":"jozefowicz","key":"ref12"},{"key":"ref13","article-title":"Efficient estimation of word representations in vector space","volume":"2013","author":"mikolov","year":"0","journal-title":"Proceedings of Workshop at ICLR"},{"journal-title":"Softmax Optimizations for Intel Xeon Processor-based Platforms","year":"2019","author":"czaja","key":"ref14"},{"key":"ref15","first-page":"561","volume":"1","author":"goodman","year":"2001","journal-title":"Classes for fast maximum entropy training"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1103"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947611"},{"key":"ref18","article-title":"Blackout: Speeding up recurrent neural network language models with very large vocabularies","volume":"abs 1511 6909","author":"ji","year":"2016","journal-title":"CoRR"},{"key":"ref19","article-title":"Quick training of probabilistic neural nets by importance sampling","author":"bengio","year":"0","journal-title":"Proceedings of the conference on Artificial Intelligence and Statistics (AISTATS) 2003"},{"key":"ref4","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","volume":"abs 1412 3555","author":"chung","year":"2014","journal-title":"CoRR"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref6","first-page":"1243","article-title":"Convolutional sequence to sequence learning","author":"gehring","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning - Volume 70 ser ICML'17 JMLR org"},{"key":"ref5","first-page":"933","article-title":"Language modeling with gated convolutional networks","author":"dauphin","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning - Volume 70 ser ICML'17 JMLR org"},{"key":"ref8","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1082"},{"key":"ref2","first-page":"3104","author":"sutskever","year":"2014","journal-title":"Sequence to sequence learning with neural networks"},{"key":"ref9","first-page":"1302","article-title":"Efficient softmax approximation for gpus","author":"grave","year":"0","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70 JMLR org"},{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-65","article-title":"Lstm neural networks for language modeling","author":"sundermeyer","year":"2012","journal-title":"InterSpeech"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.3115\/1075812.1075835"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1086\/220872"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"ref23","article-title":"Pointer sentinel mixture models","author":"merity","year":"0","journal-title":"5th International Conference on Learning Representations ICLR 2017 Toulon France April 24&#x2013;26 2017 Conference Track Proceedings"},{"key":"ref26","article-title":"Automatic differentiation in PyTorch","author":"paszke","year":"0","journal-title":"Proc NIPS Autodiff Workshop"},{"key":"ref25","article-title":"One billion word benchmark for measuring progress in statistical language modeling","author":"chelba","year":"2013","journal-title":"Google Tech Rep"}],"event":{"name":"2019 IEEE International Symposium on Workload Characterization (IISWC)","start":{"date-parts":[[2019,11,3]]},"location":"Orlando, FL, USA","end":{"date-parts":[[2019,11,5]]}},"container-title":["2019 IEEE International Symposium on Workload Characterization (IISWC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9027808\/9041927\/09041972.pdf?arnumber=9041972","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,19]],"date-time":"2022-10-19T08:24:00Z","timestamp":1666167840000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9041972\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/iiswc47752.2019.9041972","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}