{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:34:30Z","timestamp":1753601670331},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icassp48485.2024.10445950","type":"proceedings-article","created":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T18:56:31Z","timestamp":1710788191000},"page":"12221-12225","source":"Crossref","is-referenced-by-count":3,"title":["Augmenting Conformers With Structured State-Space Sequence Models For Online Speech Recognition"],"prefix":"10.1109","author":[{"given":"Haozhe","family":"Shan","sequence":"first","affiliation":[{"name":"Harvard University"}]},{"given":"Albert","family":"Gu","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Zhong","family":"Meng","sequence":"additional","affiliation":[{"name":"Google LLC"}]},{"given":"Weiran","family":"Wang","sequence":"additional","affiliation":[{"name":"Google LLC"}]},{"given":"Krzysztof","family":"Choromanski","sequence":"additional","affiliation":[{"name":"Google LLC"}]},{"given":"Tara","family":"Sainath","sequence":"additional","affiliation":[{"name":"Google LLC"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"article-title":"Attention is all you need","volume-title":"NeurIPS","author":"Vaswani","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"article-title":"Branchformer: Parallel mlp-attention architectures to capture local and global context for speech recognition and understanding","volume-title":"ICML","author":"Peng","key":"ref6"},{"article-title":"Efficiently modeling long sequences with structured state spaces","year":"2021","author":"Gu","key":"ref7"},{"article-title":"Diagonal state spaces are as effective as structured state spaces","volume-title":"NeurIPS","author":"Gupta","key":"ref8"},{"article-title":"On the parameterization and initialization of diagonal state space models","volume-title":"NeurIPS","author":"Gu","key":"ref9"},{"article-title":"Mega: Moving average equipped gated attention","volume-title":"The Eleventh International Conference on Learning Representations","author":"Ma","key":"ref10"},{"article-title":"Simplified state space layers for sequence modeling","volume-title":"The Eleventh International Conference on Learning Representations","author":"Smith","key":"ref11"},{"article-title":"Combining recurrent, convolutional, and continuous-time models with linear state space layers","volume-title":"NeurIPS","author":"Gu","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1036"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096271"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096135"},{"article-title":"Parallelizing linear recurrent neural nets over sequence length","year":"2017","author":"Martin","key":"ref16"},{"article-title":"Hyena hierarchy: Towards larger convolutional language models","year":"2023","author":"Poli","key":"ref17"},{"article-title":"Hungry hungry hippos: Towards language modeling with state space models","year":"2022","author":"Dao","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2"},{"article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"NeurIPS","author":"Baevski","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.5555\/2986459.2986721"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1285"}],"event":{"name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2024,4,14]]},"location":"Seoul, Korea, Republic of","end":{"date-parts":[[2024,4,19]]}},"container-title":["ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10445798\/10445803\/10445950.pdf?arnumber=10445950","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,2]],"date-time":"2024-08-02T04:59:28Z","timestamp":1722574768000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10445950\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10445950","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}