{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:51:59Z","timestamp":1776113519693,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,28]]},"DOI":"10.1145\/3746059.3747613","type":"proceedings-article","created":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T07:49:12Z","timestamp":1758959352000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["RhythmTA: A Visual-Aided Interactive System for ESL Rhythm Training via Dubbing Practice"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-5876-7219","authenticated-orcid":false,"given":"Chang","family":"Chen","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2158-0353","authenticated-orcid":false,"given":"Sicheng","family":"Song","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7642-9044","authenticated-orcid":false,"given":"Shuchang","family":"Xu","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0365-8997","authenticated-orcid":false,"given":"Zhicheng","family":"Li","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3344-9694","authenticated-orcid":false,"given":"Huamin","family":"Qu","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3730-0827","authenticated-orcid":false,"given":"Yanna","family":"Lin","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong SAR, China"}]}],"member":"320","published-online":{"date-parts":[[2025,9,27]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1515\/9781474463775"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"George\u00a0D Allen. 1972. The Location of Rhythmic Stress Beats in English: An Experimental Study I. Language and speech 15 1 (1972) 72\u2013100.","DOI":"10.1177\/002383097201500110"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.21437\/SpeechProsody.2004-130"},{"key":"e_1_3_3_2_5_2","unstructured":"Alexei Baevski Yuhao Zhou Abdelrahman Mohamed and Michael Auli. 2020. wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. Advances in neural information processing systems 33 (2020) 12449\u201312460."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Aaron Bangor Philip\u00a0T Kortum and James\u00a0T Miller. 2008. An Empirical Evaluation of the System Usability Scale. Intl. Journal of Human\u2013Computer Interaction 24 6 (2008) 574\u2013594.","DOI":"10.1080\/10447310802205776"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/964696.964698"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-44"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Nia Cason Corine Ast\u00e9sano and Daniele Sch\u00f6n. 2015. Bridging music and speech rhythm: Rhythmic priming and audio\u2013motor training affect speech perception. Acta psychologica 155 (2015) 43\u201350.","DOI":"10.1016\/j.actpsy.2014.12.002"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3447526.3472057"},{"key":"e_1_3_3_2_11_2","unstructured":"Chi-Fen Chen et\u00a0al. 1996. A New Perspective on Teaching English Pronunciation: Rhythm. (1996)."},{"key":"e_1_3_3_2_12_2","unstructured":"Yunfei Chu Jin Xu Qian Yang Haojie Wei Xipin Wei Zhifang Guo Yichong Leng Yuanjun Lv Jinzheng He Junyang Lin et\u00a0al. 2024. Qwen2-audio technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.10759 (2024)."},{"key":"e_1_3_3_2_13_2","volume-title":"An Introduction to English Prosody","author":"Couper-Kuhlen E.","year":"1986","unstructured":"E. Couper-Kuhlen. 1986. An Introduction to English Prosody. Edward Arnold. https:\/\/books.google.co.jp\/books?id=fKyMQgAACAAJ"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Rebecca\u00a0M Dauer. 1983. Stress-Timing and Syllable-Timing Reanalyzed. Journal of phonetics 11 1 (1983) 51\u201362.","DOI":"10.1016\/S0095-4470(19)30776-4"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Giulio Degano Peter\u00a0W Donhauser Laura Gwilliams Paola Merlo and Narly Golestani. 2024. Speech Prosody Enhances the Neural Processing of Syntax. Communications Biology 7 1 (2024) 748.","DOI":"10.1038\/s42003-024-06444-7"},{"key":"e_1_3_3_2_16_2","first-page":"682","volume-title":"Proceedings of the Tenth International Congress of Phonetic Sciences","author":"Fokes J","year":"1984","unstructured":"J Fokes, ZS Bond, and M Steinberg. 1984. Patterns of English Word Stress by Native and Non-Native Speakers. In Proceedings of the Tenth International Congress of Phonetic Sciences. 682\u2013686."},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"crossref","unstructured":"Shinya Fujii and Catherine\u00a0Y Wan. 2014. The Role of Rhythm in Speech and Language Rehabilitation: The SEP Hypothesis. Frontiers in Human Neuroscience 8 (2014) 777.","DOI":"10.3389\/fnhum.2014.00777"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Esther Grabe Ee\u00a0Ling Low et\u00a0al. 2002. Durational Variability in Speech and the Rhythm Class Hypothesis. Papers in laboratory phonology 7 515-546 (2002) 1\u201316.","DOI":"10.1515\/9783110197105.2.515"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.31274\/isudp.2024.161"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Yo Hamada. 2012. An Effective Way to Improve Listening Skills Through Shadowing. The Language Teacher 36 (1) 3\u201310.","DOI":"10.37546\/JALTTLT36.1-1"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Soon Hau\u00a0Chua Haimo Zhang Muhammad Hammad Shengdong Zhao Sahil Goyal and Karan Singh. 2015. ColorBless: Augmenting Visual Information for Colorblind People with Binocular Luster Effect. ACM Transactions on Computer-Human Interaction (TOCHI) 21 6 (2015) 1\u201320.","DOI":"10.1145\/2687923"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-58316-7_11"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Rodney\u00a0H Jones. 1997. Beyond \u201cListen and Repeat\u201d: Pronunciation Teaching Materials and Theories of Second Language Acquisition. System 25 1 (1997) 103\u2013112.","DOI":"10.1016\/S0346-251X(96)00064-4"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Gary\u00a0Geunbae Lee Ho-Young Lee Jieun Song Byeongchang Kim Sechun Kang Jinsik Lee and Hyosung Hwang. 2017. Automatic Sentence Stress Feedback for Non-Native English Learners. Computer Speech & Language 41 (2017) 29\u201342.","DOI":"10.1016\/j.csl.2016.04.003"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126627"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383455"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Yang Liu and Godfried\u00a0T Toussaint. 2012. Mathematical Notation Representation and Visualization of Musical Rhythm: A Comparative Perspective. International Journal of Machine Learning and Computing 2 3 (2012) 261.","DOI":"10.7763\/IJMLC.2012.V2.127"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-623"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Justyna Maculewicz Cumhur Erkut and Stefania Serafin. 2016. An investigation on the impact of auditory and haptic feedback on rhythmic walking interactions. International Journal of Human-Computer Studies 85 (2016) 40\u201346.","DOI":"10.1016\/j.ijhcs.2015.07.003"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2404"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-48312-7_26"},{"key":"e_1_3_3_2_33_2","unstructured":"Tetsuo Nishihara and Adrian Leis. 2014. Rhythm in English: Implications for Teaching. Toohoku Eigo Kyooiku Gakkai Kiyoo 34 (2014) 65\u201376."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Francis Nolan and Hae-Sung Jeon. 2014. Speech Rhythm: A Metaphor? Philosophical Transactions of the Royal Society B: Biological Sciences 369 1658 (2014) 20130396.","DOI":"10.1098\/rstb.2013.0396"},{"key":"e_1_3_3_2_35_2","first-page":"809","volume-title":"Proceedings of Interspeech 2017","author":"\u00d6ktem Alp","year":"2017","unstructured":"Alp \u00d6ktem, Mireia Farr\u00fas, and Leo Wanner. 2017. Prosograph: A Tool for Prosody Visualisation of Large Speech Corpora. In Proceedings of Interspeech 2017. 809\u2013810."},{"key":"e_1_3_3_2_36_2","first-page":"3203","volume-title":"Proceedings of the 2011 CHI Conference on Human Factors in Computing Systems","author":"Patel Rupal","year":"2011","unstructured":"Rupal Patel and William Furr. 2011. ReadN\u2019Karaoke: Visualizing Prosody in Children\u2019s Books for Expressive Oral Reading. In Proceedings of the 2011 CHI Conference on Human Factors in Computing Systems. 3203\u20133206."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0E Peelle and Matthew\u00a0H Davis. 2012. Neural Oscillations Carry Speech Rhythm Through to Comprehension. Frontiers in Psychology 3 (2012) 320.","DOI":"10.3389\/fpsyg.2012.00320"},{"key":"e_1_3_3_2_38_2","unstructured":"Kenneth\u00a0L Pike. 1945. The Intonation of American English. (1945)."},{"key":"e_1_3_3_2_39_2","unstructured":"qupeiyin.cn. 2024. Lingodub. https:\/\/www.qupeiyin.com\/index.html Accessed: 2025-04-10."},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445190"},{"key":"e_1_3_3_2_41_2","unstructured":"Peter Roach. 1982. On the Distinction Between \u2018Stress-Timed\u2019 and \u2018Syllable-Timed\u2019 Languages. Linguistic controversies 73 (1982) 79."},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Maria\u00a0Paula Roncaglia-Denissen Maren Schmidt-Kassow and Sonja\u00a0A Kotz. 2013. Speech Rhythm Facilitates Syntactic Ambiguity Resolution: ERP Evidence. PLOS ONE 8 2 (2013) e56000.","DOI":"10.1371\/journal.pone.0056000"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/632716.632872"},{"key":"e_1_3_3_2_44_2","unstructured":"Yong Ruan Xiangdong Wang Hong Liu Zhigang Ou Yun Gao Jianfeng Cheng and Yueliang Qian. 2019. An End-to-End Approach for Lexical Stress Detection Based on Transformer. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1911.04862 (2019)."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/2807442.2807464"},{"key":"e_1_3_3_2_46_2","volume-title":"Quantifying thFe User Experience: Practical Statistics for User Research","author":"Sauro Jeff","year":"2016","unstructured":"Jeff Sauro and James\u00a0R Lewis. 2016. Quantifying thFe User Experience: Practical Statistics for User Research. Morgan Kaufmann."},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"crossref","unstructured":"Maria-Josep Sol\u00e9\u00a0Sabater. 1991. Stress and Rhythm in English. Revista alicantina de estudios ingleses No. 04 (Nov. 1991); pp. 145-162 (1991).","DOI":"10.14198\/raei.1991.4.13"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"publisher","DOI":"10.21437\/SpeechProsody.2010-73"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126661"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376726"},{"key":"e_1_3_3_2_51_2","unstructured":"Weizhen Technology (Beijing) Co. Ltd.2021. Mofunshow. https:\/\/www.mofunenglish.com\/ Accessed: 2025-04-10."},{"key":"e_1_3_3_2_52_2","volume-title":"Critical Values and Probability Levels for the Wilcoxon Rank Sum Test and the Wilcoxon Signed Rank Test","author":"Wilcoxon Frank","year":"1963","unstructured":"Frank Wilcoxon, SK Katti, Roberta\u00a0A Wilcox, et\u00a0al. 1963. Critical Values and Probability Levels for the Wilcoxon Rank Sum Test and the Wilcoxon Signed Rank Test. Vol.\u00a01. American Cyanamid Pearl River, NY."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/GlobalSIP45357.2019.8969232"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3399715.3399922"},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/2642918.2647390"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/BIGCOMP.2019.8679261"},{"key":"e_1_3_3_2_57_2","unstructured":"Yuguan Information Technology (Shanghai) LLC.2022. Liulishuo. https:\/\/www.liulishuo.com\/ Accessed: 2025-04-10."},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376322"}],"event":{"name":"UIST '25: The 38th Annual ACM Symposium on User Interface Software and Technology","location":"Busan Republic of Korea","acronym":"UIST '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the 38th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746059.3747613","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T22:14:15Z","timestamp":1759011255000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746059.3747613"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,27]]},"references-count":57,"alternative-id":["10.1145\/3746059.3747613","10.1145\/3746059"],"URL":"https:\/\/doi.org\/10.1145\/3746059.3747613","relation":{},"subject":[],"published":{"date-parts":[[2025,9,27]]},"assertion":[{"value":"2025-09-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}