{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T10:38:45Z","timestamp":1730198325967,"version":"3.28.0"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/apsipaasc47483.2019.9023288","type":"proceedings-article","created":{"date-parts":[[2020,3,6]],"date-time":"2020-03-06T12:03:54Z","timestamp":1583496234000},"page":"302-306","source":"Crossref","is-referenced-by-count":3,"title":["Classification of causes of speech recognition errors using attention-based bidirectional long short-term memory and modulation spectrum"],"prefix":"10.1109","author":[{"given":"Jennifer","family":"Santoso","sequence":"first","affiliation":[]},{"given":"Takeshi","family":"Yamada","sequence":"additional","affiliation":[]},{"given":"Shoji","family":"Makino","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(98)00027-2"},{"key":"ref11","first-page":"668","article-title":"Joint acoustic and modulation frequency","author":"atlas","year":"2003","journal-title":"EURASIP Journal on Applied Signal Processing"},{"key":"ref12","first-page":"1243","article-title":"Exploring modulation spectrum features for speech-based depression level classification","author":"bozkurt","year":"0","journal-title":"Proc INTERSPEECH 2014"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1250\/ast.39.234"},{"key":"ref14","first-page":"30","article-title":"Dimension reduction of the modulation spectrogram for speaker verification","author":"kinnunen","year":"2008","journal-title":"Odyssey"},{"key":"ref15","first-page":"799","article-title":"Bidirectional LSTM networks for improved phoneme classification and recognition","volume":"2","author":"graves","year":"2005","journal-title":"Proceedings of ICANNGA-2005"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1994.8753425"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref18","article-title":"Bidirectional LSTM-HMM hybrid system for polyphonic sound event detection","author":"hayashi","year":"2016","journal-title":"Tech Rep DCASE2016 Challenge"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461990"},{"key":"ref4","first-page":"61","article-title":"Automatic pronunciation evaluation and mispronunciation detection using CMUSphinx","author":"srikanth","year":"0","journal-title":"CICLing 2012"},{"key":"ref3","first-page":"192","article-title":"Novel speech recognition interface based on notification of utterance volume required in changing noisy environment","author":"goto","year":"0","journal-title":"Proc NCSP'18"},{"key":"ref6","first-page":"514","article-title":"Categorizing error causes related to utterance characteristics in speech recognition","volume":"19","author":"santoso","year":"0","journal-title":"Proc of NCSP"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-20609-7_17"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1995.479672"},{"key":"ref7","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014","journal-title":"ICLR 2015"},{"key":"ref2","first-page":"132","article-title":"Exploring features for localized detection of speech recognition errors","author":"pincus","year":"0","journal-title":"Proceedings of SIGDIAL 2013"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2009.10.001"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639103"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-486"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPA.2018.8659587"},{"journal-title":"Simulated Spoken Dialogue","article-title":"Spoken Dialogue","year":"0","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-2034"},{"journal-title":"Open-Source Large Vocabulary CSR Engine Julius","year":"0","key":"ref26"},{"journal-title":"Utsunomiya University Spoken Dialogue Database for Paralinguistic Information Studies","year":"0","key":"ref25"}],"event":{"name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2019,11,18]]},"location":"Lanzhou, China","end":{"date-parts":[[2019,11,21]]}},"container-title":["2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8989870\/9023008\/09023288.pdf?arnumber=9023288","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T17:52:56Z","timestamp":1658080376000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9023288\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc47483.2019.9023288","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}