{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T05:24:57Z","timestamp":1769923497530,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,9]],"date-time":"2024-03-09T00:00:00Z","timestamp":1709942400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Chinese Scholarship Council","award":["202008610239"],"award-info":[{"award-number":["202008610239"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,9]]},"DOI":"10.1145\/3648536.3648539","type":"proceedings-article","created":{"date-parts":[[2024,5,9]],"date-time":"2024-05-09T12:04:20Z","timestamp":1715256260000},"page":"20-28","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Single-Channel Robot Ego-Speech Filtering during Human-Robot Interaction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5624-7235","authenticated-orcid":false,"given":"Yue","family":"Li","sequence":"first","affiliation":[{"name":"Social AI, Vrije Universiteit, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5707-5236","authenticated-orcid":false,"given":"Koen","family":"Hindriks","sequence":"additional","affiliation":[{"name":"VU University, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1932-3200","authenticated-orcid":false,"given":"Florian","family":"Kunneman","sequence":"additional","affiliation":[{"name":"Department for Computer Science, Vrije Universiteit, Netherlands"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,9]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Noise reduction in speech processing. Vol.\u00a02","author":"Benesty Jacob","unstructured":"Jacob Benesty, Jingdong Chen, Yiteng Huang, and Israel Cohen. 2009. Noise reduction in speech processing. Vol.\u00a02. Springer Science & Business Media."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1979.1163209"},{"key":"e_1_3_2_1_3_1","volume-title":"Premotor cortex modulates somatosensory cortex during voluntary movements without proprioceptive feedback. Nature neuroscience 10, 4","author":"Christensen Mark\u00a0Schram","year":"2007","unstructured":"Mark\u00a0Schram Christensen, Jesper Lundbye-Jensen, Svend\u00a0Sparre Geertsen, Tue\u00a0Hvass Petersen, Olaf\u00a0B Paulson, and Jens\u00a0Bo Nielsen. 2007. Premotor cortex modulates somatosensory cortex during voluntary movements without proprioceptive feedback. Nature neuroscience 10, 4 (2007), 417\u2013419."},{"key":"e_1_3_2_1_4_1","volume-title":"Noise reduction in speech processing","author":"Cohen Israel","unstructured":"Israel Cohen, Yiteng Huang, Jingdong Chen, and Jacob Benesty. 2009. Noise reduction in speech processing. Springer."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1017\/ATSIP.2015.22"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICOSP.2014.7015050"},{"key":"e_1_3_2_1_7_1","unstructured":"Meng Ge Chenglin Xu Longbiao Wang Eng\u00a0Siong Chng Jianwu Dang and Haizhou Li. 2020. SpEx+: A Complete Time Domain Speaker Extraction Network. arxiv:2005.04686\u00a0[eess.AS]"},{"key":"e_1_3_2_1_8_1","volume-title":"A complete time domain speaker extraction network. arXiv preprint arXiv:2005.04686","author":"Ge Meng","year":"2020","unstructured":"Meng Ge, Chenglin Xu, Longbiao Wang, Eng\u00a0Siong Chng, Jianwu Dang, and Haizhou Li. 2020. Spex+: A complete time domain speaker extraction network. arXiv preprint arXiv:2005.04686 (2020)."},{"key":"e_1_3_2_1_9_1","unstructured":"Google. 2023. Google Cloud Text-to-Speech AI. https:\/\/cloud.google.com\/text-to-speech\/?hl=en"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2020.2980956"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054222"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/1733343.1733397"},{"key":"e_1_3_2_1_14_1","first-page":"1490","article-title":"Blind source separation and ICA techniques: a review","volume":"4","author":"Jain N","year":"2012","unstructured":"Sanjeev\u00a0N Jain and Chandrashekhar Rai. 2012. Blind source separation and ICA techniques: a review. International Journal of Engineering Science and Technology 4, 4 (2012), 1490\u20131503.","journal-title":"International Journal of Engineering Science and Technology"},{"key":"e_1_3_2_1_15_1","unstructured":"Jakub Jansk\u00fd Ji\u0159\u00ed M\u00e1lek Jaroslav \u010cmejla Tom\u00e1\u0161 Kounovsk\u00fd Zbyn\u011bk Koldovsk\u00fd and Jind\u0159ich \u017d\u010f\u00e1nsk\u00fd. 2019. Adaptive blind audio source extraction supervised by dominant speaker identification using x-vectors. arxiv:1910.11824\u00a0[eess.AS]"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683855"},{"key":"e_1_3_2_1_17_1","unstructured":"Haohe Liu Qiuqiang Kong Qiao Tian Yan Zhao DeLiang Wang Chuanzeng Huang and Yuxuan Wang. 2021. VoiceFixer: Toward General Speech Restoration With Neural Vocoder. arxiv:2109.13731\u00a0[cs.SD]"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053327"},{"key":"e_1_3_2_1_19_1","volume-title":"Deep filtering: Signal extraction using complex time-frequency filters. arXiv preprint arXiv:1904.08369","author":"Mack Wolfgang","year":"2019","unstructured":"Wolfgang Mack and Emanu\u00ebl\u00a0AP Habets. 2019. Deep filtering: Signal extraction using complex time-frequency filters. arXiv preprint arXiv:1904.08369 (2019)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44533-1_55"},{"key":"e_1_3_2_1_21_1","volume-title":"Blind source separation with parameter-free adaptive step-size method for robot audition","author":"Nakajima Hirofumi","year":"2009","unstructured":"Hirofumi Nakajima, Kazuhiro Nakadai, Yuji Hasegawa, and Hiroshi Tsujino. 2009. Blind source separation with parameter-free adaptive step-size method for robot audition. IEEE transactions on audio, speech, and language processing 18, 6 (2009), 1476\u20131485."},{"key":"e_1_3_2_1_22_1","unstructured":"Nathan Lively. [n. d.]. Audio Analyzers: Pink Noise vs Sine Sweep. https:\/\/www.sounddesignlive.com\/audio-analyzers-pink-noise-vs-sine-sweep\/"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1513"},{"key":"e_1_3_2_1_24_1","unstructured":"Thomas Orden Mike\u00a0EU Ligthart Koen Hindriks Thomas Wiggers and Karen Chiang. [n. d.]. The Social Interaction Cloud (SIC). https:\/\/socialrobotics.atlassian.net\/wiki\/spaces\/CBSR\/overview?homepageId=2186870789"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.3390\/s20082376"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","unstructured":"Alec Radford Jong\u00a0Wook Kim Tao Xu Greg Brockman Christine McLeavey and Ilya Sutskever. 2022. Robust Speech Recognition via Large-Scale Weak Supervision. https:\/\/doi.org\/10.48550\/ARXIV.2212.04356","DOI":"10.48550\/ARXIV.2212.04356"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/HSNMC.2002.1032545"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2977372"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2020.101178"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.3115\/1708376.1708421"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1002\/tee.23008"},{"key":"e_1_3_2_1_33_1","volume-title":"Selective attention reduces physiological noise in the external ear canals of humans. I: Auditory attention. Hearing research 312","author":"Walsh P","year":"2014","unstructured":"Kyle\u00a0P Walsh, Edward\u00a0G Pasanen, and Dennis McFadden. 2014. Selective attention reduces physiological noise in the external ear canals of humans. I: Auditory attention. Hearing research 312 (2014), 143\u2013159."},{"key":"e_1_3_2_1_34_1","volume-title":"VoiceFilter-Lite: Streaming targeted voice separation for on-device speech recognition. arXiv preprint arXiv:2009.04323","author":"Wang Quan","year":"2020","unstructured":"Quan Wang, Ignacio\u00a0Lopez Moreno, Mert Saglam, Kevin Wilson, Alan Chiao, Renjie Liu, Yanzhang He, Wei Li, Jason Pelecanos, Marily Nika, 2020. VoiceFilter-Lite: Streaming targeted voice separation for on-device speech recognition. arXiv preprint arXiv:2009.04323 (2020)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1101"},{"key":"e_1_3_2_1_36_1","volume-title":"Complex ratio masking for monaural speech separation","author":"Williamson S","year":"2015","unstructured":"Donald\u00a0S Williamson, Yuxuan Wang, and DeLiang Wang. 2015. Complex ratio masking for monaural speech separation. IEEE\/ACM transactions on audio, speech, and language processing 24, 3 (2015), 483\u2013492."},{"key":"e_1_3_2_1_37_1","unstructured":"Seung won Park. [n. d.]. Unofficial PyTorch implementation of Google AI\u2019s VoiceFilter system."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952154"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2019.2922820"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2023.3240008"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2019.2922820"}],"event":{"name":"TAHRI 2024: 2024 International Symposium on Technological Advances in Human-Robot Interaction","location":"Boulder CO USA","acronym":"TAHRI 2024"},"container-title":["Proceedings of the 2024 International Symposium on Technological Advances in Human-Robot Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3648536.3648539","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3648536.3648539","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T18:06:20Z","timestamp":1755972380000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3648536.3648539"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,9]]},"references-count":41,"alternative-id":["10.1145\/3648536.3648539","10.1145\/3648536"],"URL":"https:\/\/doi.org\/10.1145\/3648536.3648539","relation":{},"subject":[],"published":{"date-parts":[[2024,3,9]]},"assertion":[{"value":"2024-05-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}