{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,14]],"date-time":"2025-06-14T04:05:54Z","timestamp":1749873954411,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,16]]},"DOI":"10.1145\/3699682.3728347","type":"proceedings-article","created":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T13:05:37Z","timestamp":1749819937000},"page":"194-203","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Task-specific, personalized Automatic Speech Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2927-3841","authenticated-orcid":false,"given":"Fahrettin","family":"G\u00f6kg\u00f6z","sequence":"first","affiliation":[{"name":"Fraunhofer FKIE, Wachtberg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1673-9964","authenticated-orcid":false,"given":"Hussein","family":"Hasso","sequence":"additional","affiliation":[{"name":"Fraunhofer FKIE, Wachtberg, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,6,13]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Harsh Ahlawat Naveen Aggarwal and Deepti Gupta. 2025. Automatic Speech Recognition: A survey of deep learning techniques and approaches. International Journal of Cognitive Computing in Engineering (2025).","DOI":"10.1016\/j.ijcce.2024.12.007"},{"key":"e_1_3_3_1_3_2","series-title":"(NIPS \u201920)","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: a framework for self-supervised learning of speech representations. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS \u201920). Curran Associates Inc., Red Hook, NY, USA, Article 1044, 12\u00a0pages."},{"key":"e_1_3_3_1_4_2","series-title":"(NIPS\u201920)","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: a framework for self-supervised learning of speech representations. In Proceedings of the 34th International Conference on Neural Information Processing Systems(NIPS\u201920). Curran Associates Inc., Red Hook, NY, USA. event-place: Vancouver, BC, Canada."},{"key":"e_1_3_3_1_5_2","unstructured":"Joshua Ball. 2023. Voice Activity Detection (VAD) in Noisy Environments. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.05815 (2023)."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"C.F. Barnes S.A. Rizvi and N.M. Nasrabadi. 1996. Advances in residual vector quantization: a review. IEEE Transactions on Image Processing 5 2 (1996) 226\u2013262. 10.1109\/83.480761","DOI":"10.1109\/83.480761"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","unstructured":"Peter Bell Joachim Fainberg Ondrej Klejch Jinyu Li Steve Renals and Pawel Swietojanski. 2021. Adaptation Algorithms for Neural Network-Based Speech Recognition: An Overview. IEEE Open Journal of Signal Processing 2 (2021) 33\u201366. 10.1109\/OJSP.2020.3045349","DOI":"10.1109\/OJSP.2020.3045349"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","unstructured":"Mathieu Bernard and Hadrien Titeux. 2021. Phonemizer: Text to Phones Transcription for Multiple Languages in Python. Journal of Open Source Software 6 68 (2021) 3958. 10.21105\/joss.03958","DOI":"10.21105\/joss.03958"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Sanyuan Chen Chengyi Wang Zhengyang Chen Yu Wu Shujie Liu Zhuo Chen Jinyu Li Naoyuki Kanda Takuya Yoshioka Xiong Xiao et\u00a0al. 2022. Wavlm: Large-scale self-supervised pre-training for full stack speech processing. IEEE Journal of Selected Topics in Signal Processing 16 6 (2022) 1505\u20131518.","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","unstructured":"Alessia Cornaggia-Urrigshardt Fahrettin G\u00f6kg\u00f6z Frank Kurth Hans-Christian Schmitz and Kevin Wilkinghoff. 2022. Speech Recognition Lab. Procedia Comput. Sci. 205 C (Jan 2022) 218\u2013228. 10.1016\/j.procs.2022.09.023","DOI":"10.1016\/j.procs.2022.09.023"},{"key":"e_1_3_3_1_11_2","volume-title":"Natural Language Processing for Prolog Programmers","author":"Covington Michael\u00a0A.","year":"1994","unstructured":"Michael\u00a0A. Covington. 1994. Natural Language Processing for Prolog Programmers. Prentice Hall, Upper Saddle River."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","unstructured":"Ross Cutler Ando Saabas Tanel P\u00e4rnamaa Marju Purin Evgenii Indenbom Nicolae-C\u0103t\u0103lin Ristea Jegor Gu\u017evin Hannes Gamper Sebastian Braun and Robert Aichner. 2024. ICASSP 2023 Acoustic Echo Cancellation Challenge. IEEE Open Journal of Signal Processing 5 (2024) 675\u2013685. 10.1109\/OJSP.2024.3376289","DOI":"10.1109\/OJSP.2024.3376289"},{"key":"e_1_3_3_1_13_2","volume-title":"Proceedings of the ISMIR 2021 Workshop on Music Source Separation","author":"D\u00e9fossez Alexandre","year":"2021","unstructured":"Alexandre D\u00e9fossez. 2021. Hybrid Spectrogram and Waveform Source Separation. In Proceedings of the ISMIR 2021 Workshop on Music Source Separation."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1532"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICMCIS61231.2024.10540613"},{"key":"e_1_3_3_1_17_2","first-page":"120","volume-title":"Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021","author":"G\u00f6kg\u00f6z Fahrettin","year":"2021","unstructured":"Fahrettin G\u00f6kg\u00f6z and Mahmoud Hashem. 2021. Investigating the scarce data and resources problem for speech recognition using transfer learning and data augmentation. In Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021, Stefan Hillmann, Benjamin Weiss, Thilo Michael, and Sebastian M\u00f6ller (Eds.). TUDpress, Dresden, 120\u2013127. https:\/\/www.essv.de\/pdf\/pdf\/2021_120_127.pdf ISSN: 0940-6832."},{"key":"e_1_3_3_1_18_2","first-page":"187","volume-title":"Proceedings of the Sixth Workshop on Statistical Machine Translation","author":"Heafield Kenneth","year":"2011","unstructured":"Kenneth Heafield. 2011. KenLM: Faster and Smaller Language Model Queries. In Proceedings of the Sixth Workshop on Statistical Machine Translation. Association for Computational Linguistics, Edinburgh, Scotland, 187\u2013197. https:\/\/aclanthology.org\/W11-2123"},{"key":"e_1_3_3_1_19_2","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Gaussian error linear units (gelus). arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1606.08415 (2016)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1535"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053545"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053104"},{"key":"e_1_3_3_1_23_2","unstructured":"ITU-T. 2003. ITU-T Recommendation P.835 Subjective test methodology for evaluating speech communication systems that include noise suppression algorithm. https:\/\/www.itu.int\/rec\/T-REC-P.835-200311-I\/en."},{"key":"e_1_3_3_1_24_2","unstructured":"ITU-T. 2021. ITU-T Recommendation P.808 Subjective evaluation of speech quality with a crowdsourcing approach. https:\/\/www.itu.int\/rec\/T-REC-P.808-202106-I\/en."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","unstructured":"Herve J\u00e9gou Matthijs Douze and Cordelia Schmid. 2011. Product Quantization for Nearest Neighbor Search. IEEE Transactions on Pattern Analysis and Machine Intelligence 33 1 (2011) 117\u2013128. 10.1109\/TPAMI.2010.57","DOI":"10.1109\/TPAMI.2010.57"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","unstructured":"Eugene Kharitonov Damien Vincent Zal\u00e1n Borsos Rapha\u00ebl Marinier Sertan Girgin Olivier Pietquin Matt Sharifi Marco Tagliasacchi and Neil Zeghidour. 2023. Speak Read and Prompt: High-Fidelity Text-to-Speech with Minimal Supervision. Transactions of the Association for Computational Linguistics 11 (12 2023) 1703\u20131718. 10.1162\/tacl_a_00618 arXiv:https:\/\/direct.mit.edu\/tacl\/article-pdf\/doi\/10.1162\/tacl_a_00618\/2200655\/tacl_a_00618.pdf","DOI":"10.1162\/tacl_a_00618"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2015-711"},{"key":"e_1_3_3_1_28_2","unstructured":"Jinyu Li. 2021. Recent Advances in End-to-End Automatic Speech Recognition. ArXiv abs\/2111.01690 (2021). https:\/\/api.semanticscholar.org\/CorpusID:240419899"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-299"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"e_1_3_3_1_31_2","first-page":"12311","volume-title":"Advances in Neural Information Processing Systems","author":"Potapczynski Andres","year":"2020","unstructured":"Andres Potapczynski, Gabriel Loaiza-Ganem, and John\u00a0P Cunningham. 2020. Invertible Gaussian Reparameterization: Revisiting the Gumbel-Softmax. In Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 12311\u201312321. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/90c34175923a36ab7a5de4b981c1972f-Paper.pdf"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414878"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSPW62465.2024.10626457"},{"key":"e_1_3_3_1_34_2","volume-title":"Proceedings of the 24th International Command & Control Research and Technology Symposium","author":"Schmitz Hans-Christian","year":"2019","unstructured":"Hans-Christian Schmitz, Alessia Cornaggia-Urrigshardt, Fahrettin G\u00f6kg\u00f6z, Samantha Kent, and Kevin Wilkinghoff. 2019. Calm Interfaces for Integrated C2 Systems. In Proceedings of the 24th International Command & Control Research and Technology Symposium. Laurel. Maryland."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3379336.3381496"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","unstructured":"Nahian Siddique Sidike Paheding Colin\u00a0P. Elkin and Vijay Devabhaktuni. 2021. U-Net and Its Variants for Medical Image Segmentation: A Review of Theory and Applications. IEEE Access 9 (2021) 82031\u201382057. 10.1109\/ACCESS.2021.3086020","DOI":"10.1109\/ACCESS.2021.3086020"},{"key":"e_1_3_3_1_37_2","first-page":"2002","volume-title":"Interspeech","author":"Stolcke Andreas","year":"2002","unstructured":"Andreas Stolcke et\u00a0al. 2002. SRILM-an extensible language modeling toolkit.. In Interspeech, Vol.\u00a02002. 2002."},{"key":"e_1_3_3_1_38_2","volume-title":"Advances in Neural Information Processing Systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.), Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","unstructured":"E. Vincent R. Gribonval and C. Fevotte. 2006. Performance measurement in blind audio source separation. IEEE Transactions on Audio Speech and Language Processing 14 4 (2006) 1462\u20131469. 10.1109\/TSA.2005.858005","DOI":"10.1109\/TSA.2005.858005"},{"key":"e_1_3_3_1_40_2","first-page":"9","volume-title":"14th ITG Conference on Speech Communication (ITG Speech)","author":"Wilkinghoff Kevin","year":"2021","unstructured":"Kevin Wilkinghoff, Alessia Cornaggia-Urrigshardt, and Fahrettin G\u00f6kg\u00f6z. 2021. Two-Dimensional Embeddings for Low-Resource Keyword Spotting Based on Dynamic Time Warping. In 14th ITG Conference on Speech Communication (ITG Speech). VDE-Verlag, 9\u201313."},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","unstructured":"Neil Zeghidour Alejandro Luebs Ahmed Omran Jan Skoglund and Marco Tagliasacchi. 2022. SoundStream: An End-to-End Neural Audio Codec. IEEE\/ACM Transactions on Audio Speech and Language Processing 30 (2022) 495\u2013507. 10.1109\/TASLP.2021.3129994","DOI":"10.1109\/TASLP.2021.3129994"}],"event":{"name":"UMAP '25: 33rd ACM Conference on User Modeling, Adaptation and Personalization","location":"New York City USA","acronym":"UMAP '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 33rd ACM Conference on User Modeling, Adaptation and Personalization"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3699682.3728347","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,13]],"date-time":"2025-06-13T13:06:30Z","timestamp":1749819990000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3699682.3728347"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,13]]},"references-count":40,"alternative-id":["10.1145\/3699682.3728347","10.1145\/3699682"],"URL":"https:\/\/doi.org\/10.1145\/3699682.3728347","relation":{},"subject":[],"published":{"date-parts":[[2025,6,13]]},"assertion":[{"value":"2025-06-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}