{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:24:53Z","timestamp":1775229893426,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016,8]]},"DOI":"10.1109\/dsmp.2016.7583516","type":"proceedings-article","created":{"date-parts":[[2016,10,6]],"date-time":"2016-10-06T21:54:27Z","timestamp":1475790867000},"page":"100-103","source":"Crossref","is-referenced-by-count":36,"title":["Accelerating recurrent neural network training using sequence bucketing and multi-GPU data parallelization"],"prefix":"10.1109","author":[{"given":"Viacheslav","family":"Khomenko","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Oleg","family":"Shyshkov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Olga","family":"Radyvonenko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kostiantyn","family":"Bokhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","first-page":"2595","article-title":"Parallelized stochastic gradient descent","volume":"23","author":"zinkevich","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref11","article-title":"BlackOut: Speeding up Recurrent Neural Network Language Models With Very Large Vocabularies","author":"ji","year":"2015"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICFHR.2014.55"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref14","article-title":"Learning to execute","author":"zaremba","year":"2014"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref16","article-title":"DARPA TIMIT acoustic-phonetic continuous speech corpus CD-ROM","author":"garofolo","year":"1993","journal-title":"National Institute of Standards and Technology NISTIR 4930"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.1994.576870"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1815330.1815343"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1036"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2015.07.012"},{"key":"ref3","first-page":"577","article-title":"Unconstrained on-line handwriting recognition with recurrent neural networks","author":"graves","year":"2008","journal-title":"Advances in neural information processing systems"},{"key":"ref6","first-page":"2285","article-title":"Deep convex net: A scalable architecture for speech pattern classification","author":"yu","year":"2011","journal-title":"Proceedings of Interspeech"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1080"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"1203","DOI":"10.21437\/Interspeech.2013-327","article-title":"Speed Up of Recurrent Neural Network Language Models With Sentence Independent Subsampling Stochastic Gradient Descent","author":"shi","year":"2013","journal-title":"InterSpeech"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178816"},{"key":"ref2","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"2014"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015015"},{"key":"ref9","article-title":"Accelerating recurrent neural network training via two stage classes and parallelization","author":"zhiheng","year":"2013","journal-title":"2013 IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"ref20","first-page":"1223","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref22","article-title":"Theano: A Python framework for fast computation of mathematical expressions","author":"team","year":"2016"},{"key":"ref21","article-title":"ADADELTA: an adaptive learning rate method","author":"zeiler","year":"2012"},{"key":"ref23","year":"2016","journal-title":"Lasagne Lightweight library to build and train neural networks in Theano"}],"event":{"name":"2016 IEEE First International Conference on Data Stream Mining & Processing (DSMP)","location":"Lviv, Ukraine","start":{"date-parts":[[2016,8,23]]},"end":{"date-parts":[[2016,8,27]]}},"container-title":["2016 IEEE First International Conference on Data Stream Mining &amp; Processing (DSMP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7578909\/7583485\/07583516.pdf?arnumber=7583516","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,11]],"date-time":"2025-06-11T02:11:25Z","timestamp":1749607885000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7583516\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,8]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/dsmp.2016.7583516","relation":{},"subject":[],"published":{"date-parts":[[2016,8]]}}}