{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T00:07:24Z","timestamp":1772150844644,"version":"3.50.1"},"reference-count":35,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,13]],"date-time":"2021-12-13T00:00:00Z","timestamp":1639353600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,13]]},"DOI":"10.1109\/asru51503.2021.9688231","type":"proceedings-article","created":{"date-parts":[[2022,2,3]],"date-time":"2022-02-03T20:31:00Z","timestamp":1643920260000},"page":"283-288","source":"Crossref","is-referenced-by-count":5,"title":["Speaker Conditioning of Acoustic Models Using Affine Transformation for Multi-Speaker Speech Recognition"],"prefix":"10.1109","author":[{"given":"Midia","family":"Yousefi","sequence":"first","affiliation":[{"name":"Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering, University of Texas at Dallas,Richardson,Texas,USA"}]},{"given":"John H.L.","family":"Hansen","sequence":"additional","affiliation":[{"name":"Center for Robust Speech Systems (CRSS), Erik Jonsson School of Engineering, University of Texas at Dallas,Richardson,Texas,USA"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9003993"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1750"},{"key":"ref31","article-title":"Optimal completion distillation for sequence learning","author":"sabour","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053998"},{"key":"ref35","article-title":"wav2vec: Unsupervised pretraining for speech recognition","author":"schneider","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref34","author":"zhang","year":"2016","journal-title":"Understanding deep learning requires rethinking generalization"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISTEL.2016.7881870"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053108"},{"key":"ref12","first-page":"67","article-title":"Front-end, back-end, and hybrid techniques for noise-robust speech recognition","author":"li","year":"2011","journal-title":"Robust Speech Recognition of Uncertain or Missing Data"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2305833"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-581"},{"key":"ref15","article-title":"Probabilistic permutation invariant training for speech separation","author":"khorram","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682273"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2444659"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU46091.2019.9004009"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1244"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2404"},{"key":"ref4","first-page":"2117","article-title":"Assessing speaker engagement in 2-person debates: Overlap detection in united states presidential debates","author":"shokouhi","year":"2018","journal-title":"InterSpeech"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414594"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-331"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1631\/FITEE.1700814"},{"key":"ref29","article-title":"Align-refine: Non-autoregressive speech recognition via iterative realignment","author":"chi","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1768"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3036237"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2018.10.010"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/CHiME.2020-1"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/CHiME.2018-8"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054029"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"803","DOI":"10.1109\/LSP.2021.3070817","article-title":"Streaming end-to-end multi-talker speech recognition","volume":"28","author":"liang","year":"2021","journal-title":"IEEE Signal Processing Letters"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1130"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2765834"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053692"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682866"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"ref25","article-title":"Film: Visual reasoning with a general conditioning layer","volume":"32","author":"perez","year":"0","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"}],"event":{"name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Cartagena, Colombia","start":{"date-parts":[[2021,12,13]]},"end":{"date-parts":[[2021,12,17]]}},"container-title":["2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9687821\/9687855\/09688231.pdf?arnumber=9688231","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T20:42:24Z","timestamp":1652733744000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9688231\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,13]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/asru51503.2021.9688231","relation":{},"subject":[],"published":{"date-parts":[[2021,12,13]]}}}