{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T02:12:17Z","timestamp":1774404737480,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":82,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706598.3713745","type":"proceedings-article","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T04:35:25Z","timestamp":1745469325000},"page":"1-19","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Spatial Speech Translation: Translating Across Space With Binaural Hearables"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8031-5066","authenticated-orcid":false,"given":"Tuochao","family":"Chen","sequence":"first","affiliation":[{"name":"University of Washington, Computer Science and Engineering, Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0841-7296","authenticated-orcid":false,"given":"Qirui","family":"Wang","sequence":"additional","affiliation":[{"name":"Mobile Intelligence Lab, Paul G. Allen School of Computer Science and Engineering, Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9564-6271","authenticated-orcid":false,"given":"Runlin","family":"He","sequence":"additional","affiliation":[{"name":"Mobile Intelligence Lab, University of Washington, Seattle, Washington, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9863-3054","authenticated-orcid":false,"given":"Shyamnath","family":"Gollakota","sequence":"additional","affiliation":[{"name":"university of Washington, Seattle, Washington, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Alex Agranovich Eliya Nachmani Oleg Rybakov Yifan Ding Ye Jia Nadav Bar Heiga Zen and Michelle\u00a0Tadmor Ramanovich. 2024. SimulTron: On-Device Simultaneous Speech to Speech Translation. arxiv:https:\/\/arXiv.org\/abs\/2406.02133\u00a0[eess.AS] https:\/\/arxiv.org\/abs\/2406.02133"},{"key":"e_1_3_3_1_3_2","unstructured":"Dmitry Alexandrovsky Susanne Putze Michael Bonfert Sebastian H\u00f6ffner Pitt Michelmann Dirk Wenig Rainer Malaka and Jan\u00a0David Smeddinck. 2020. Unmet Needs and Opportunities for Mobile Translation AI. CHI (2020)."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","unstructured":"V.R. Algazi R.O. Duda D.M. Thompson and C. Avendano. 2001. The CIPIC HRTF database. 99-102\u00a0pages. 10.1109\/ASPAA.2001.969552","DOI":"10.1109\/ASPAA.2001.969552"},{"key":"e_1_3_3_1_5_2","unstructured":"Apple. 2024. Listen with Personalized Spatial Audio for AirPods and Beats. https:\/\/support.apple.com\/en-us\/102596"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2007.366611"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1126"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"e_1_3_3_1_9_2","unstructured":"Dzmitry Bahdanau Kyunghyun Cho and Yoshua Bengio. 2014. Neural Machine Translation by Jointly Learning to Align and Translate. CoRR abs\/1409.0473 (2014). https:\/\/api.semanticscholar.org\/CorpusID:11212020"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","unstructured":"Lo\u00efc Barrault Yu-An Chung Mariano Meglioli David Dale Ning Dong Paul-Ambroise Duquenne Hady Elsahar Hongyu Gong Kevin Heffernan John Hoffman Christopher Klaiber Pengwei Li Daniel Licht Jean Maillard Alice Rakotoarison Kaushik Sadagopan Guillaume Wenzek Ethan Ye Bapi Akula and Skyler Wang. 2025. Joint speech and text machine translation for up to 100 languages. Nature 637 (01 2025) 587\u2013593. 10.1038\/s41586-024-08359-z","DOI":"10.1038\/s41586-024-08359-z"},{"key":"e_1_3_3_1_11_2","unstructured":"Google Blog. 2024. Translate with Google Pixel Buds. https:\/\/support.google.com\/googlepixelbuds\/answer\/7573100?hl=en"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Zal\u00e1n Borsos Rapha\u00ebl Marinier Damien Vincent Eugene Kharitonov Olivier Pietquin Matt Sharifi Dominik Roblek Olivier Teboul David Grangier Marco Tagliasacchi and Neil Zeghidour. 2023. AudioLM: a Language Modeling Approach to Audio Generation. arxiv:https:\/\/arXiv.org\/abs\/2209.03143\u00a0[cs.SD]","DOI":"10.1109\/TASLP.2023.3288409"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Alessandro Carlini Camille Bordeau and Maxime Ambard. 2024. Auditory localization: a comprehensive practical review. Frontiers Psycholo. (2024).","DOI":"10.3389\/fpsyg.2024.1408073"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538933"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","unstructured":"Sanyuan Chen Chengyi Wang Zhengyang Chen Yu Wu Shujie Liu Zhuo Chen Jinyu Li Naoyuki Kanda Takuya Yoshioka Xiong Xiao Jian Wu Long Zhou Shuo Ren Yanmin Qian Yao Qian Jian Wu Michael Zeng Xiangzhan Yu and Furu Wei. 2022. WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing. IEEE Journal of Selected Topics in Signal Processing 16 6 (Oct. 2022) 1505\u20131518. 10.1109\/jstsp.2022.3188113","DOI":"10.1109\/jstsp.2022.3188113"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","unstructured":"Tuochao Chen Malek Itani Sefik Eskimez Takuya Yoshioka and Shyamnath Gollakota. 2024. Hearable devices with sound bubbles. Nature Electronics 7 (11 2024) 1047\u20131058. 10.1038\/s41928-024-01276-z","DOI":"10.1038\/s41928-024-01276-z"},{"key":"e_1_3_3_1_17_2","volume-title":"InterSpeech","author":"Chen Tuochao","year":"2024","unstructured":"Tuochao Chen, Qirui Wang, Bohan Wu, Malek Itani, Sefik\u00a0Emre Eskimez, Takuya Yoshioka, and Shyamnath Gollakota. 2024. Target conversation extraction: Source separation using turn-taking dynamics. In InterSpeech."},{"key":"e_1_3_3_1_18_2","unstructured":"Shanbo Cheng Zhichao Huang Tom Ko Hang Li Ningxin Peng Lu Xu and Qini Zhang. 2024. Towards Achieving Human Parity on End-to-end Simultaneous Speech Translation via LLM Agent. arxiv:https:\/\/arXiv.org\/abs\/2407.21646\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2407.21646"},{"key":"e_1_3_3_1_19_2","unstructured":"Kyunghyun Cho and Masha Esipova. 2016. Can neural machine translation do simultaneous translation? arxiv:https:\/\/arXiv.org\/abs\/1606.02012\u00a0[cs.CL]"},{"key":"e_1_3_3_1_20_2","unstructured":"Seamless Communication Lo\u00efc Barrault Yu-An Chung Mariano\u00a0Coria Meglioli David Dale Ning Dong Mark Duppenthaler Paul-Ambroise Duquenne Brian Ellis Hady Elsahar Justin Haaheim John Hoffman Min-Jae Hwang Hirofumi Inaguma Christopher Klaiber Ilia Kulikov Pengwei Li Daniel Licht Jean Maillard Ruslan Mavlyutov Alice Rakotoarison Kaushik\u00a0Ram Sadagopan Abinesh Ramakrishnan Tuan Tran Guillaume Wenzek Yilin Yang Ethan Ye Ivan Evtimov Pierre Fernandez Cynthia Gao Prangthip Hansanti Elahe Kalbassi Amanda Kallet Artyom Kozhevnikov Gabriel\u00a0Mejia Gonzalez Robin\u00a0San Roman Christophe Touret Corinne Wong Carleigh Wood Bokai Yu Pierre Andrews Can Balioglu Peng-Jen Chen Marta\u00a0R. Costa-juss\u00e0 Maha Elbayad Hongyu Gong Francisco Guzm\u00e1n Kevin Heffernan Somya Jain Justine Kao Ann Lee Xutai Ma Alex Mourachko Benjamin Peloquin Juan Pino Sravya Popuri Christophe Ropers Safiyyah Saleem Holger Schwenk Anna Sun Paden Tomasello Changhan Wang Jeff Wang Skyler Wang and Mary Williamson. 2023. Seamless: Multilingual Expressive and Streaming Speech Translation. arxiv:https:\/\/arXiv.org\/abs\/2312.05187\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2312.05187"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Samuele Cornell Zhong-Qiu Wang Yoshiki Masuyama Shinji Watanabe Manuel Pariente and Nobutaka Ono. 2023. Multi-Channel Target Speaker Extraction with Refinement: The WavLab Submission to the Second Clarity Enhancement Challenge. arxiv:https:\/\/arXiv.org\/abs\/2302.07928","DOI":"10.1109\/ICASSP49357.2023.10095961"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-2079"},{"key":"e_1_3_3_1_23_2","unstructured":"Qianqian Dong Zhiying Huang Qiao Tian Chen Xu Tom Ko Yunlong Zhao Siyuan Feng Tang Li Kexin Wang Xuxin Cheng Fengpeng Yue Ye Bai Xi Chen Lu Lu Zejun Ma Yuping Wang Mingxuan Wang and Yuxuan Wang. 2023. PolyVoice: Language Models for Speech to Speech Translation. arxiv:https:\/\/arXiv.org\/abs\/2306.02982\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2306.02982"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.899"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1241"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746962"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.543"},{"key":"e_1_3_3_1_28_2","volume-title":"InterSpeech","author":"Giri Ritwik","year":"2021","unstructured":"Ritwik Giri, Shrikant Venkataramani, Jean-Marc Valin, Umut Isik, and Arvindh Krishnaswamy. 2021. Personalized PercepNet: Real-time, Low-complexity Target Voice Separation and Enhancement. In InterSpeech."},{"key":"e_1_3_3_1_29_2","unstructured":"Jiatao Gu James Bradbury Caiming Xiong Victor\u00a0OK Li and Richard Socher. 2017. Non-autoregressive neural machine translation. (2017)."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053215"},{"key":"e_1_3_3_1_32_2","unstructured":"IoSR-Surrey. 2016. IoSR-surrey\/realroombrirs: Binaural impulse responses captured in real rooms. https:\/\/github.com\/IoSR-Surrey\/RealRoomBRIRs."},{"key":"e_1_3_3_1_33_2","unstructured":"IoSR-Surrey. 2023. Simulated Room Impulse Responses. https:\/\/iosr.uk\/software\/index.php."},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","unstructured":"Malek Itani Tuochao Chen Takuya Yoshioka and Shyamnath Gollakota. 2023. Creating speech zones with self-distributing acoustic swarms. Nature Communications 14 (09 2023). 10.1038\/s41467-023-40869-8","DOI":"10.1038\/s41467-023-40869-8"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606782"},{"key":"e_1_3_3_1_36_2","volume-title":"Advances in Neural Information Processing Systems","author":"Jenrungrot Teerapat","year":"2020","unstructured":"Teerapat Jenrungrot, Vivek Jayaram, Steve Seitz, and Ira Kemelmacher-Shlizerman. 2020. The Cone of Silence: Speech Separation by Localization. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_3_1_37_2","unstructured":"Chunyu Kit and Tak\u00a0Ming Wong. 2008. Comparative Evaluation of Online Machine Translation Systems with Legal Texts. Law Library Journal 100 (2008) 299\u2013321. https:\/\/api.semanticscholar.org\/CorpusID:13481458"},{"key":"e_1_3_3_1_38_2","unstructured":"Jungil Kong Jaehyeon Kim and Jaekyoung Bae. 2020. Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis. Advances in neural information processing systems 33 (2020) 17022\u201317033."},{"key":"e_1_3_3_1_39_2","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Le Matthew","year":"2024","unstructured":"Matthew Le, Apoorv Vyas, Bowen Shi, Brian Karrer, Leda Sari, Rashel Moritz, Mary Williamson, Vimal Manohar, Yossi Adi, Jay Mahadeokar, and Wei-Ning Hsu. 2024. Voicebox: text-guided multilingual universal speech generation at scale. In Proceedings of the 37th International Conference on Neural Information Processing Systems."},{"key":"e_1_3_3_1_40_2","unstructured":"Marie Lebert. 2022. A short history of translation through the ages. https:\/\/www.iapti.org\/iaptiarticle\/a-short-history-of-translation-through-the-ages-marie-lebert-2\/"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.235"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095939"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"crossref","unstructured":"Stephen\u00a0C. Levinson. 2016. Turn-taking in Human Communication \u2013 Origins and Implications for Language Processing. Trends in Cog. Sci. (2016).","DOI":"10.1016\/j.tics.2015.10.010"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-naacl.17"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1289"},{"key":"e_1_3_3_1_46_2","volume-title":"ICLR","author":"Ma Xutai","year":"2020","unstructured":"Xutai Ma, Juan Pino, James Cross, Liezl Puzon, and Jiatao Gu. 2020. Monotonic Multihead Attention. In ICLR."},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","unstructured":"Evgeny Matusov Stephan Kanthak and Hermann Ney. 2005. On the Integration of Speech Recognition and Statistical Machine Translation. 3177\u20133180. 10.21437\/Interspeech.2005-726","DOI":"10.21437\/Interspeech.2005-726"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"Tobias May Steven Van De\u00a0Par and Armin Kohlrausch. 2010. A probabilistic model for robust localization based on a binaural auditory front-end. IEEE Transactions on audio speech and language processing 19 1 (2010) 1\u201313.","DOI":"10.1109\/TASL.2010.2042128"},{"key":"e_1_3_3_1_49_2","unstructured":"Mymanu. 2024. Mymanu Click S. https:\/\/mymanu.com\/products\/mymanu-clik-s"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1999.758176"},{"key":"e_1_3_3_1_51_2","unstructured":"NLLB Team Marta\u00a0R. Costa-juss\u00e0 James Cross Onur \u00c7elebi Maha Elbayad Kenneth Heafield Kevin Heffernan Elahe Kalbassi Janice Lam Daniel Licht Jean Maillard Anna Sun Skyler Wang Guillaume Wenzek Al Youngblood Bapi Akula Loic Barrault Gabriel Mejia-Gonzalez Prangthip Hansanti John Hoffman Semarley Jarrett Kaushik\u00a0Ram Sadagopan Dirk Rowe Shannon Spruit Chau Tran Pierre Andrews Necip\u00a0Fazil Ayan Shruti Bhosale Sergey Edunov Angela Fan Cynthia Gao Vedanuj Goswami Francisco Guzm\u00e1n Philipp Koehn Alexandre Mourachko Christophe Ropers Safiyyah Saleem Holger Schwenk and Jeff Wang. 2022. No Language Left Behind: Scaling Human-Centered Machine Translation. (2022)."},{"key":"e_1_3_3_1_52_2","unstructured":"NPR. 2023. Finding your place in the galaxy with the help of Star Trek. https:\/\/www.npr.org\/2023\/10\/14\/1205714903\/star-trek"},{"key":"e_1_3_3_1_53_2","unstructured":"All Things\u00a0Considered NPR. 1998. Babelfish a Translator Inspired by \u2019The Hitchhiker\u2019s Guide\u2019. https:\/\/www.npr.org\/1998\/02\/12\/1036190\/babelfish-a-translator-inspired-by-the-hitchhikers-guide"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","unstructured":"Lucas Nunes\u00a0Vieira Minako O\u2019Hagan and Carol O\u2019Sullivan. 2020. Understanding the societal impacts of machine translation: a critical review of the literature on medical and legal use cases. Information Communication and Society (06 2020). 10.1080\/1369118X.2020.1776370","DOI":"10.1080\/1369118X.2020.1776370"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11032"},{"key":"e_1_3_3_1_56_2","volume-title":"International Workshop on Spoken Language Translation","author":"Post Matt","year":"2013","unstructured":"Matt Post, G.\u00a0Santhosh Kumar, Adam Lopez, Damianos\u00a0G. Karakos, Chris Callison-Burch, and Sanjeev Khudanpur. 2013. Improved speech-to-text translation with the Fisher and Callhome Spanish-English speech translation corpus. In International Workshop on Spoken Language Translation."},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.5555\/3305890.3305974"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.350"},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462417"},{"key":"e_1_3_3_1_60_2","volume-title":"Three Directions for the Design of Human-Centered Machine Translation","author":"Robertson Samantha","year":"2021","unstructured":"Samantha Robertson, Wesley Deng, Timnit Gebru, Margaret Mitchell, Daniel\u00a0J. Liebling, Michal Lahav, Katherine Heller, Mark D\u00edaz, Samy Bengio, and Niloufar Salehi (Eds.). 2021. Three Directions for the Design of Human-Centered Machine Translation."},{"key":"e_1_3_3_1_61_2","unstructured":"Paul\u00a0K. Rubenstein Chulayuth Asawaroengchai Duc\u00a0Dung Nguyen Ankur Bapna Zal\u00e1n Borsos F\u00e9lix de Chaumont\u00a0Quitry Peter Chen Dalia\u00a0El Badawy Wei Han Eugene Kharitonov Hannah Muckenhirn Dirk Padfield James Qin Danny Rozenberg Tara Sainath Johan Schalkwyk Matt Sharifi Michelle\u00a0Tadmor Ramanovich Marco Tagliasacchi Alexandru Tudor M Velimirovi\u0107 Damien Vincent Jiahui Yu Y Wang Vicky Zayats N Zeghidour Yu Zhang Zhishuai Zhang Lukas Zilka and Christian Frank. 2023. AudioPaLM: A Large Language Model That Can Speak and Listen."},{"key":"e_1_3_3_1_62_2","unstructured":"SDK. 2023. Steam Audio. https:\/\/valvesoftware.github.io\/steam-audio\/."},{"key":"e_1_3_3_1_63_2","unstructured":"ShanonPearce. 2022. Shanonpearce\/ash-listening-set: A dataset of filters for headphone correction and binaural synthesis of spatial audio systems on headphones. https:\/\/github.com\/ShanonPearce\/ASH-Listening-Set\/tree\/main"},{"key":"e_1_3_3_1_64_2","unstructured":"Kai Shen Zeqian Ju Xu Tan Yanqing Liu Yichong Leng Lei He Tao Qin Sheng Zhao and Jiang Bian. 2023. NaturalSpeech 2: Latent Diffusion Models are Natural and Zero-Shot Speech and Singing Synthesizers. arxiv:https:\/\/arXiv.org\/abs\/2304.09116\u00a0[eess.AS] https:\/\/arxiv.org\/abs\/2304.09116"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.661"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413901"},{"key":"e_1_3_3_1_67_2","unstructured":"Timekettle. [n. d.]. Timekettle WT2 Edge\/W3 Real-time Translator Earbuds 2-way simultaneous interpretation. https:\/\/www.timekettle.co\/products\/wt2-edge-online-voice-language-translator-earbuds"},{"key":"e_1_3_3_1_68_2","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems , Vol.\u00a030."},{"key":"e_1_3_3_1_69_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094573"},{"key":"e_1_3_3_1_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606779"},{"key":"e_1_3_3_1_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642057"},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"crossref","unstructured":"Anran Wang Maruchi Kim Hao Zhang and Shyamnath Gollakota. 2022. Hybrid Neural Networks for On-device Directional Hearing. AAAI (2022).","DOI":"10.1609\/aaai.v36i10.21394"},{"key":"e_1_3_3_1_73_2","unstructured":"Chengyi Wang Sanyuan Chen Yu Wu Ziqiang Zhang Long Zhou Shujie Liu Zhuo Chen Yanqing Liu Huaming Wang Jinyu Li Lei He Sheng Zhao and Furu Wei. 2023. Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers. arxiv:https:\/\/arXiv.org\/abs\/2301.02111\u00a0[cs.CL]"},{"key":"e_1_3_3_1_74_2","volume-title":"Interspeech","author":"Wang Peidong","year":"2022","unstructured":"Peidong Wang, Eric Sun, Jian Xue, Yu Wu, Long Zhou, Yashesh Gaur, Shujie Liu, and Jinyu Li. 2022. LAMASSU: A Streaming Language-Agnostic Multilingual Speech Recognition and Translation Model Using Neural Transducers. In Interspeech. https:\/\/api.semanticscholar.org\/CorpusID:258968116"},{"key":"e_1_3_3_1_75_2","unstructured":"Waverly. 2024. Waverly labs Earbuds. https:\/\/www.waverlylabs.com\/"},{"key":"e_1_3_3_1_76_2","unstructured":"Yonghui Wu Mike Schuster Zhifeng Chen Quoc\u00a0V. Le Mohammad Norouzi Wolfgang Macherey Maxim Krikun Yuan Cao Qin Gao Klaus Macherey Jeff Klingner Apurva Shah Melvin Johnson Xiaobing Liu \u0141ukasz Kaiser Stephan Gouws Yoshikiyo Kato Taku Kudo Hideto Kazawa Keith Stevens George Kurian Nishant Patil Wei Wang Cliff Young Jason Smith Jason Riesa Alex Rudnick Oriol Vinyals Greg Corrado Macduff Hughes and Jeffrey Dean. 2016. Google\u2019s Neural Machine Translation System: Bridging the Gap between Human and Machine Translation. CoRR abs\/1609.08144 (2016)."},{"key":"e_1_3_3_1_77_2","unstructured":"Zhongweiyang Xu and Romit\u00a0Roy Choudhury. 2022. Learning to Separate Voices by Spatial Regions. ICML (2022)."},{"key":"e_1_3_3_1_78_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10953"},{"key":"e_1_3_3_1_79_2","volume-title":"Proceedings of the EMNLP","author":"Javad\u00a0Dousti Changhan Wang Jiatao Gu Juan\u00a0Pino Xutai\u00a0Ma, Mohammad","year":"2020","unstructured":"Changhan Wang Jiatao Gu Juan\u00a0Pino Xutai\u00a0Ma, Mohammad Javad\u00a0Dousti. 2020. Simuleval: An evaluation toolkit for simultaneous translation. In Proceedings of the EMNLP."},{"key":"e_1_3_3_1_80_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446050"},{"key":"e_1_3_3_1_81_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.485"},{"key":"e_1_3_3_1_82_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.581"},{"key":"e_1_3_3_1_83_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-667"}],"event":{"name":"CHI 2025: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713745","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3713745","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:19Z","timestamp":1750298239000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713745"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":82,"alternative-id":["10.1145\/3706598.3713745","10.1145\/3706598"],"URL":"https:\/\/doi.org\/10.1145\/3706598.3713745","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}