{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,7]],"date-time":"2025-04-07T20:36:14Z","timestamp":1744058174881,"version":"3.28.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,13]]},"DOI":"10.1109\/asru51503.2021.9687902","type":"proceedings-article","created":{"date-parts":[[2022,2,3]],"date-time":"2022-02-03T20:31:00Z","timestamp":1643920260000},"page":"824-829","source":"Crossref","is-referenced-by-count":2,"title":["ChannelAugment: Improving Generalization of Multi-Channel ASR by Training with Input Channel Randomization"],"prefix":"10.1109","author":[{"given":"Marco","family":"Gaudesi","sequence":"first","affiliation":[{"name":"Nuance Communications"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Felix","family":"Weninger","sequence":"additional","affiliation":[{"name":"Nuance Communications"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dushyant","family":"Sharma","sequence":"additional","affiliation":[{"name":"Nuance Communications"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Puming","family":"Zhan","sequence":"additional","affiliation":[{"name":"Nuance Communications"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1337"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2020-27"},{"key":"ref31","article-title":"Sound event localization and detection using activity-coupled cartesian DOA vector and RD3Net","author":"shimada","year":"2020","journal-title":"ArXiv"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2017.09.023"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1121\/1.382599"},{"key":"ref36","article-title":"Non intrusive estimation of speech signal parameters using a frame based machine learning approach","author":"sharma","year":"2020","journal-title":"Proc of eusipco"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref34","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"Proc of ICLR"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-552"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2549"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.881676"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2009.2025790"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-234"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"1274","DOI":"10.1109\/JSTSP.2017.2764276","article-title":"Unified architecture for multichan-nel end-to-end speech recognition with neural beamforming","volume":"11","author":"tsubasa","year":"2017","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003986"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683706"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054130"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref28","first-page":"2968","article-title":"Meeting transcription using asynchronous distant mi-crophones","author":"yoshioka","year":"2019","journal-title":"Proc of Interspeech"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004025"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947477"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2719"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref29","first-page":"421","article-title":"Examining the combination of multi-band pro-cessing and channel dropout for robust speech recognition","author":"kov\u00e1cs","year":"2019","journal-title":"Proc of Interspeech"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053896"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2672401"},{"key":"ref7","first-page":"551","article-title":"Single headed attention based sequence-to-sequence model for state-of-the-art results on Switchboard","author":"t\u00fcske","year":"2020","journal-title":"Proc of Interspeech"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref1","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"Proc of ICML Workshop on Representation Learning"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471664"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053205"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-733"},{"key":"ref21","first-page":"18","article-title":"Deep beamforming and data augmentation for robust speech recog-nition: Results of the 4th CHiME Challenge","author":"schrank","year":"2016","journal-title":"Proc of CHiME-4 Workshop"},{"key":"ref24","first-page":"6640","article-title":"Frequency domain multi-channel acoustic modeling for distant speech recognition","author":"wu","year":"2019","journal-title":"Proc of ICASSP"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2019.8902524"},{"key":"ref26","first-page":"6635","article-title":"Multi-geometry spatial acous-tic modeling for distant speech recognition","author":"kumatani","year":"2019","journal-title":"Proc of ICASSP"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053940"}],"event":{"name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","start":{"date-parts":[[2021,12,13]]},"location":"Cartagena, Colombia","end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9687821\/9687855\/09687902.pdf?arnumber=9687902","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T20:42:23Z","timestamp":1652733743000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9687902\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,13]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/asru51503.2021.9687902","relation":{},"subject":[],"published":{"date-parts":[[2021,12,13]]}}}