{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:15:37Z","timestamp":1776888937169,"version":"3.51.2"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Basic Research Program of China","doi-asserted-by":"publisher","award":["2017YFB1002102"],"award-info":[{"award-number":["2017YFB1002102"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/taslp.2020.3009477","type":"journal-article","created":{"date-parts":[[2020,7,15]],"date-time":"2020-07-15T21:21:07Z","timestamp":1594848067000},"page":"2174-2183","source":"Crossref","is-referenced-by-count":11,"title":["Modular End-to-End Automatic Speech Recognition Framework for Acoustic-to-Word Model"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3067-8333","authenticated-orcid":false,"given":"Qi","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4400-5340","authenticated-orcid":false,"given":"Zhehuai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingkun","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yizhou","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7102-9826","authenticated-orcid":false,"given":"Kai","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","article-title":"OOV words extension for modular neural acoustics-to-word model","author":"li","year":"0","journal-title":"Proc Nat Conf Man-Mach Speech Commun"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1392"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-546"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1118"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268935"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003906"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1452"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461935"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472641"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1284"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472152"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/34.62605"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1990.115720"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639619"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2456"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1967-11751-8"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461361"},{"key":"ref26","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.2140\/pjm.1968.27.211"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"ref10","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"0","journal-title":"Proc Neural Inf Process Syst Conf"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1055"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472618"},{"key":"ref14","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268937"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-831"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639693"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1006\/csla.2001.0184"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/5.381844"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2602884"},{"key":"ref5","first-page":"338","article-title":"Long short-term memory recurrent neural network architectures for large scale acoustic modeling","author":"sak","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref8","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333804"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639664"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953069"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178787"},{"key":"ref47","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"ko","year":"0","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref42","author":"paszke","year":"2017","journal-title":"Automatic differentiation in pytorch[J]"},{"key":"ref41","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"0","journal-title":"Proc IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"ref44","article-title":"Feedforward sequential memory networks: A new structure to learn long-term dependency","author":"zhang","year":"2015","journal-title":"arXiv 1512 08301"},{"key":"ref43","article-title":"MXNet: A flexible and efficient machine learning library for heterogeneous distributed systems","author":"chen","year":"2015","journal-title":"arXiv 1512 01274"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8938144\/09141380.pdf?arnumber=9141380","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:31:25Z","timestamp":1651080685000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9141380\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.3009477","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}