{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T02:37:43Z","timestamp":1730342263857,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,11]]},"DOI":"10.23919\/apsipa.2018.8659527","type":"proceedings-article","created":{"date-parts":[[2019,3,18]],"date-time":"2019-03-18T23:11:49Z","timestamp":1552950709000},"page":"1934-1939","source":"Crossref","is-referenced-by-count":0,"title":["Reward Only Training of Encoder-Decoder Digit Recognition Systems Based on Policy Gradient Methods"],"prefix":"10.23919","author":[{"given":"Yilong","family":"Peng","sequence":"first","affiliation":[]},{"given":"Hayato","family":"Shibata","sequence":"additional","affiliation":[]},{"given":"Takahiro","family":"Shinozaki","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref30","first-page":"244","article-title":"A Japanese national project on spontaneous speech corpus and processing technology","author":"furui","year":"2000","journal-title":"Proc ASR'00"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992696"},{"key":"ref11","first-page":"1","article-title":"Task loss estimation for sequence prediction","author":"bahdanau","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"3537","DOI":"10.21437\/Interspeech.2017-639","article-title":"Optimizing expected word error rate via sampling for speech recognition","author":"shannon","year":"2017","journal-title":"Proc Interspeech 2017"},{"journal-title":"Improving end-to-end speech recognition with policy learning","year":"2017","author":"zhou","key":"ref13"},{"journal-title":"Sequence-to-sequence asr optimization via reinforcement learning","year":"2017","author":"tjandra","key":"ref14"},{"key":"ref15","article-title":"Reinforcement learning of speech recognition system based on policy gradient and hypothesis selection","author":"kato","year":"2018","journal-title":"Proc ICASSP"},{"key":"ref16","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"1999","journal-title":"Proceedings of the 12th International Conference on Neural Information Processing Systems"},{"key":"ref17","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume":"48","author":"mnih","year":"2016","journal-title":"Proc 33rd Int Conf Mach Learn"},{"journal-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref18"},{"key":"ref19","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2"},{"journal-title":"Adam A method for stochastic optimization","year":"2014","author":"kingma","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2011.6163976"},{"key":"ref27","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"Proc USENIX Conf Operating System Design and Implementations"},{"key":"ref3","first-page":"3169","article-title":"The zero resource speech challenge 2015","author":"versteegh","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TAAI.2010.29"},{"key":"ref29","first-page":"747","article-title":"Composite embedding system for zerospeech 2017 track1","author":"shibata","year":"2017","journal-title":"Proc ASRU"},{"key":"ref5","first-page":"2007","article-title":"Learning from real users: rating dialogue success with neural networks for reinforcement learning in spoken dialogue systems","author":"su","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2032618"},{"key":"ref2","first-page":"137","article-title":"Comparing human and machine errors in conversational speech transcription","author":"stolcke","year":"2017","journal-title":"Proc Interspeech ISCA - International Speech Communication Association"},{"key":"ref9","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"132","DOI":"10.21437\/Interspeech.2017-405","article-title":"English conversational telephone speech recognition by humans and machines","author":"saon","year":"2017","journal-title":"Proc Interspeech 2017"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/106365603321828970"},{"key":"ref21","first-page":"949","article-title":"Natural evolution strategies","volume":"15","author":"wierstra","year":"2014","journal-title":"J Mach Learn Res"},{"journal-title":"Neural machine translation by jointly learning to align and translate","year":"2014","author":"bahdanau","key":"ref24"},{"key":"ref23","first-page":"1329","article-title":"Bench-marking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref26","first-page":"538","article-title":"The optimal reward baseline for gradient-based reinforcement learning","author":"weaver","year":"2001","journal-title":"Proceedings of the Seventeenth Conference on Uncertainty in Artificial Intelligence"},{"key":"ref25","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1007\/978-1-4615-3618-5_2","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","author":"williams","year":"1992","journal-title":"Reinforcement Learning"}],"event":{"name":"2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2018,11,12]]},"location":"Honolulu, HI, USA","end":{"date-parts":[[2018,11,15]]}},"container-title":["2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8648538\/8659446\/08659527.pdf?arnumber=8659527","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,23]],"date-time":"2020-08-23T22:19:49Z","timestamp":1598221189000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8659527\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,11]]},"references-count":30,"URL":"https:\/\/doi.org\/10.23919\/apsipa.2018.8659527","relation":{},"subject":[],"published":{"date-parts":[[2018,11]]}}}