{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,12]],"date-time":"2025-08-12T21:52:33Z","timestamp":1755035553918,"version":"3.37.3"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icassp43922.2022.9746227","type":"proceedings-article","created":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T19:50:34Z","timestamp":1651089034000},"page":"8292-8296","source":"Crossref","is-referenced-by-count":5,"title":["Alignment-Learning Based Single-Step Decoding for Accurate and Fast Non-Autoregressive Speech Recognition"],"prefix":"10.1109","author":[{"given":"Yonghe","family":"Wang","sequence":"first","affiliation":[{"name":"Inner Mongolia University,College of Computer Science,Hohhot,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rui","family":"Liu","sequence":"additional","affiliation":[{"name":"Inner Mongolia University,College of Computer Science,Hohhot,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Feilong","family":"Bao","sequence":"additional","affiliation":[{"name":"Inner Mongolia University,College of Computer Science,Hohhot,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hui","family":"Zhang","sequence":"additional","affiliation":[{"name":"Inner Mongolia University,College of Computer Science,Hohhot,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanglai","family":"Gao","sequence":"additional","affiliation":[{"name":"Inner Mongolia University,College of Computer Science,Hohhot,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref31","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"ICLRE"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414198"},{"key":"ref11","article-title":"Non-autoregressive transformer automatic speech recognition","author":"chen","year":"2020","journal-title":"Signal Processing Letters"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1619"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.154"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2086"},{"key":"ref15","first-page":"3770","article-title":"Aligndenoise: Single-pass non-autoregressive speech recognition","author":"chen","year":"2021","journal-title":"InterSpeech"},{"key":"ref16","first-page":"5889","article-title":"Cassnat: Ctc alignment-based single step non-autoregressive trans-former for speech recognition","author":"fan","year":"2021","journal-title":"ICASSP"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1955"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1600"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3082299"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref4","first-page":"1","article-title":"On the comparison of popular end-to-end models for large scale speech recognition","author":"li","year":"2020","journal-title":"InterSpeech"},{"key":"ref27","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"ASRU"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref6","first-page":"5884","article-title":"Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition","author":"dong","year":"2018","journal-title":"ICASSP"},{"key":"ref29","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"InterSpeech"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1107"},{"key":"ref8","first-page":"1403","article-title":"Imputer: Sequence modelling via imputation and dynamic programming","author":"chan","year":"2020","journal-title":"ICML"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054345"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953075"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2404"},{"key":"ref20","first-page":"5206","article-title":"Librispeech: an asr corpus based on public domain audio books","author":"panayotov","year":"2015","journal-title":"ICASSP"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2017.8384449"},{"key":"ref24","first-page":"690","article-title":"Scalable modified kneserney language model estimation","author":"heafield","year":"2013","journal-title":"ACL"},{"key":"ref23","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"ICONIP"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICFHR-2018.2018.00052"},{"key":"ref25","first-page":"2978","article-title":"Transformer-xl: Attentive language models beyond a fixed-length context","author":"dai","year":"2020","journal-title":"ACL"}],"event":{"name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2022,5,23]]},"location":"Singapore, Singapore","end":{"date-parts":[[2022,5,27]]}},"container-title":["ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9745891\/9746004\/09746227.pdf?arnumber=9746227","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,22]],"date-time":"2022-08-22T20:12:55Z","timestamp":1661199175000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9746227\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/icassp43922.2022.9746227","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}