{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T19:02:31Z","timestamp":1774638151332,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":83,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,19]],"date-time":"2023-04-19T00:00:00Z","timestamp":1681862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Capes\/Fulbright","award":["23038.017809\/2019-67"],"award-info":[{"award-number":["23038.017809\/2019-67"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1954284,2125362,2235405"],"award-info":[{"award-number":["1954284,2125362,2235405"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,19]]},"DOI":"10.1145\/3544548.3581511","type":"proceedings-article","created":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T04:27:20Z","timestamp":1681964840000},"page":"1-15","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":27,"title":["Visualization of Speech Prosody and Emotion in Captions: Accessibility\u00a0for\u00a0Deaf\u00a0and\u00a0Hard-of-Hearing Users"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5046-9884","authenticated-orcid":false,"given":"Calu\u00e3","family":"de Lacerda Pataca","sequence":"first","affiliation":[{"name":"Computing and Information Sciences, Rochester Institute of Technology, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3133-8664","authenticated-orcid":false,"given":"Matthew","family":"Watkins","sequence":"additional","affiliation":[{"name":"School of Information, Rochester Institute of Technology, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4191-3565","authenticated-orcid":false,"given":"Roshan","family":"Peiris","sequence":"additional","affiliation":[{"name":"School of Information, Rochester Institute of Technology, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4971-2004","authenticated-orcid":false,"given":"Sooyeon","family":"Lee","sequence":"additional","affiliation":[{"name":"Informatics\/Ying Wu College of Computing, New Jersey Institute of Technology, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6290-2681","authenticated-orcid":false,"given":"Matt","family":"Huenerfauth","sequence":"additional","affiliation":[{"name":"School of Information, Rochester Institute of Technology, United States"}]}],"member":"320","published-online":{"date-parts":[[2023,4,19]]},"reference":[{"key":"e_1_3_3_3_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.21437\/SpeechProsody.2018-162"},{"key":"e_1_3_3_3_3_1","volume-title":"Perception of Captioned Video Quality. In Universal Access in Human-Computer Interaction. Access to Media","author":"Amin Akhter\u00a0Al","unstructured":"Akhter\u00a0Al Amin, Saad Hassan, and Matt Huenerfauth. 2021. Effect of Occlusion on Deaf and Hard of Hearing Users\u2019 Perception of Captioned Video Quality. In Universal Access in Human-Computer Interaction. Access to Media, Learning and Assistive Environments, Margherita Antona and Constantine Stephanidis (Eds.). Springer International Publishing, Cham, 202\u2013220."},{"key":"e_1_3_3_3_4_1","doi-asserted-by":"publisher","DOI":"10.5594\/J02244"},{"key":"e_1_3_3_3_5_1","doi-asserted-by":"publisher","DOI":"10.1075\/pc.19.2.01att"},{"key":"e_1_3_3_3_6_1","unstructured":"Pl\u00ednio\u00a0A. Barbosa. 2019. Pros\u00f3dia. Par\u00e1bola Editorial Brazil."},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3026041"},{"key":"e_1_3_3_3_8_1","volume-title":"Readability Research: An Interdisciplinary Approach. CoRR abs\/2107.09615(2021), 85\u00a0pages. arXiv:2107.09615https:\/\/arxiv.org\/abs\/2107.09615","author":"Beier Sofie","year":"2021","unstructured":"Sofie Beier, Sam Berlow, Esat Boucaud, Zoya Bylinskii, Tianyuan Cai, Jenae Cohn, Kathy Crowley, Stephanie\u00a0L. Day, Tilman Dingler, Jonathan Dobres, Jennifer Healey, Rajiv Jain, Marjorie Jordan, Bernard Kerr, Qisheng Li, Dave\u00a0B. Miller, Susanne Nobles, Alexandra Papoutsaki, Jing Qian, Tina Rezvanian, Shelley Rodrigo, Ben\u00a0D. Sawyer, Shannon\u00a0M. Sheppard, Bram Stein, Rick Treitman, Jen Vanek, Shaun Wallace, and Benjamin Wolfe. 2021. Readability Research: An Interdisciplinary Approach. CoRR abs\/2107.09615(2021), 85\u00a0pages. arXiv:2107.09615https:\/\/arxiv.org\/abs\/2107.09615"},{"key":"e_1_3_3_3_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290607.3312921"},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132525.3132541"},{"key":"e_1_3_3_3_11_1","first-page":"28","article-title":"Visual prosody supports reading aloud expressively","volume":"53","author":"Bessemans Ann","year":"2019","unstructured":"Ann Bessemans, Maarten Renckens, Kevin Bormans, Erik Nuyts, and Kevin Larson. 2019. Visual prosody supports reading aloud expressively. Visible Language 53, 3 (2019), 28\u201349.","journal-title":"Visible Language"},{"key":"e_1_3_3_3_12_1","unstructured":"Natesh\u00a0M Bhat. 2021. Text-to-speech x-platform\u00b6. https:\/\/pyttsx3.readthedocs.io\/en\/latest\/"},{"key":"e_1_3_3_3_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1094562.1094570"},{"key":"e_1_3_3_3_14_1","unstructured":"Paul Boersma. 2006. Praat: doing phonetics by computer. http:\/\/www.praat.org\/. Accessed on August 24 2022."},{"key":"e_1_3_3_3_15_1","volume-title":"Using thematic analysis in psychology. Qualitative research in psychology 3, 2","author":"Braun Virginia","year":"2006","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using thematic analysis in psychology. Qualitative research in psychology 3, 2 (2006), 77\u2013101."},{"key":"e_1_3_3_3_16_1","doi-asserted-by":"publisher","DOI":"10.1080\/20557132.2015.1057373"},{"key":"e_1_3_3_3_17_1","doi-asserted-by":"publisher","DOI":"10.1177\/1329878X8101900113"},{"key":"e_1_3_3_3_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359852.3359892"},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1639642.1639656"},{"key":"e_1_3_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451698"},{"key":"e_1_3_3_3_21_1","first-page":"56","article-title":"Designing voice-aware text in voice media with background color and typography","volume":"28","author":"Chen Qinyue","year":"2022","unstructured":"Qinyue Chen, Yuchun Yan, and Hyeon-Jeong Suk. 2022. Designing voice-aware text in voice media with background color and typography. Journal of the International Colour Association 28 (2022), 56\u201362.","journal-title":"Journal of the International Colour Association"},{"key":"e_1_3_3_3_22_1","doi-asserted-by":"publisher","unstructured":"Wellington da Silva Plinio\u00a0Almeida Barbosa and \u00c5sa Abelin. 2016. Cross-Cultural and Cross-Linguistic Perception of Authentic Emotions through Speech: An Acoustic-Phonetic Study with Brazilian and Swedish Listeners. DELTA: Documenta\u00e7\u00e3o de Estudos em Ling\u00fc\u00edstica Te\u00f3rica e Aplicada 32 2 (Aug. 2016) 449\u2013480. https:\/\/doi.org\/10.1590\/0102-445003263701432483","DOI":"10.1590\/0102-445003263701432483"},{"key":"e_1_3_3_3_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377526"},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"publisher","DOI":"10.1075\/ihll.6.07mor"},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1093\/elt\/ccr019"},{"key":"e_1_3_3_3_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2982142.2982198"},{"key":"e_1_3_3_3_27_1","doi-asserted-by":"publisher","unstructured":"Siyuan Feng Olya Kudina Bence\u00a0Mark Halpern and Odette Scharenborg. 2021. Quantifying Bias in Automatic Speech Recognition. https:\/\/doi.org\/10.48550\/ARXIV.2103.15122","DOI":"10.48550\/ARXIV.2103.15122"},{"key":"e_1_3_3_3_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/642611.642677"},{"key":"e_1_3_3_3_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/642611.642653"},{"key":"e_1_3_3_3_30_1","doi-asserted-by":"publisher","DOI":"10.1177\/2372732215602130"},{"key":"e_1_3_3_3_31_1","volume-title":"Colour and emotion: children also associate red with negative valence. Developmental science 19, 6","author":"Gil Sandrine","year":"2016","unstructured":"Sandrine Gil and Ludovic Le\u00a0Bigot. 2016. Colour and emotion: children also associate red with negative valence. Developmental science 19, 6 (2016), 1087\u20131094."},{"key":"e_1_3_3_3_32_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2108.00084"},{"key":"e_1_3_3_3_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1357054.1357157"},{"key":"e_1_3_3_3_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290607.3313071"},{"key":"e_1_3_3_3_35_1","volume-title":"The Complete Interviews: The Design Trilogy Interviews","author":"Hustwit Gary","unstructured":"Gary Hustwit. 2015. Helvetica\/Objectified\/Urbanized: The Complete Interviews: The Design Trilogy Interviews. Versions Publishing, London, UK."},{"key":"e_1_3_3_3_36_1","volume-title":"Time spent viewing captions on television programs. American annals of the deaf 145, 5","author":"Jensema J","year":"2000","unstructured":"Carl\u00a0J Jensema, Ramalinga\u00a0Sarma Danturthi, and Robert Burch. 2000. Time spent viewing captions on television programs. American annals of the deaf 145, 5 (2000), 464\u2013468."},{"key":"e_1_3_3_3_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3325862"},{"key":"e_1_3_3_3_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3517428.3550375"},{"key":"e_1_3_3_3_39_1","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2018.01284"},{"key":"e_1_3_3_3_40_1","doi-asserted-by":"publisher","DOI":"10.1075\/btl.128.09kru"},{"key":"e_1_3_3_3_41_1","volume-title":"Joint effects of emotion and color on memory.Emotion 13, 3","author":"Kuhbandner Christof","year":"2013","unstructured":"Christof Kuhbandner and Reinhard Pekrun. 2013. Joint effects of emotion and color on memory.Emotion 13, 3 (2013), 375."},{"key":"e_1_3_3_3_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373625.3417299"},{"key":"e_1_3_3_3_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/2596695.2596701"},{"key":"e_1_3_3_3_44_1","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1145\/1279540.1279551","article-title":"Emotive captioning","volume":"5","author":"Lee G","year":"2007","unstructured":"Daniel\u00a0G Lee, Deborah\u00a0I Fels, and John\u00a0Patrick Udo. 2007. Emotive captioning. Computers in Entertainment (CIE) 5, 2 (2007), 11.","journal-title":"Computers in Entertainment (CIE)"},{"key":"e_1_3_3_3_45_1","volume-title":"The language, tone and prosody of emotions: neural substrates and dynamics of spoken-word emotion perception. Frontiers in neuroscience 10","author":"Liebenthal Einat","year":"2016","unstructured":"Einat Liebenthal, David\u00a0A Silbersweig, and Emily Stern. 2016. The language, tone and prosody of emotions: neural substrates and dynamics of spoken-word emotion perception. Frontiers in neuroscience 10 (2016), 506."},{"key":"e_1_3_3_3_46_1","volume-title":"Closed Captioning Challenges for IP Video Delivery. In The 2012 Annual Technical Conference & Exhibition. SMPTE","author":"Livingston Jason","year":"2012","unstructured":"Jason Livingston. 2012. Closed Captioning Challenges for IP Video Delivery. In The 2012 Annual Technical Conference & Exhibition. SMPTE, Hollywood, CA, USA, 1\u20139."},{"key":"e_1_3_3_3_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373625.3417300"},{"key":"e_1_3_3_3_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359852.3359874"},{"key":"e_1_3_3_3_49_1","unstructured":"Tim Mahrt. 2022. PraatIO. https:\/\/github.com\/timmahrt\/praatIO. Accessed on August 3 2022."},{"key":"e_1_3_3_3_50_1","volume-title":"Communicating emotion with animated text. visual communication 8, 4","author":"Malik Sabrina","year":"2009","unstructured":"Sabrina Malik, Jonathan Aitken, and Judith\u00a0Kelly Waalen. 2009. Communicating emotion with animated text. visual communication 8, 4 (2009), 469\u2013479."},{"key":"e_1_3_3_3_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3132525.3134779"},{"key":"e_1_3_3_3_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3479578"},{"key":"e_1_3_3_3_53_1","doi-asserted-by":"publisher","DOI":"10.21437\/SpeechProsody.2010-38"},{"key":"e_1_3_3_3_54_1","volume-title":"Caption quality: Approaches to standards and measurement. Media Access Australia","author":"Mikul Chris","unstructured":"Chris Mikul. 2014. Caption quality: Approaches to standards and measurement. Media Access Australia, Sydney, Australia."},{"key":"e_1_3_3_3_55_1","doi-asserted-by":"publisher","DOI":"10.1177\/002246698301700107"},{"key":"e_1_3_3_3_56_1","volume-title":"Types of color blindness","author":"National Eye Institute 2019.","year":"2022","unstructured":"National Eye Institute 2019. Types of color blindness. National Eye Institute. https:\/\/www.nei.nih.gov\/learn-about-eye-health\/eye-conditions-and-diseases\/color-blindness\/types-color-blindnessAccessed on August 3, 2022."},{"key":"e_1_3_3_3_57_1","doi-asserted-by":"publisher","DOI":"10.1002\/9781444335262.wbctp0048"},{"key":"e_1_3_3_3_58_1","unstructured":"Robert\u00a0M Ochshorn and Max Hawkins. 2015. Gentle: a robust yet lenient forced aligner built on Kaldi.https:\/\/lowerquality.com\/gentle\/"},{"key":"e_1_3_3_3_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2019.2955949"},{"key":"e_1_3_3_3_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2022.3174721"},{"key":"e_1_3_3_3_61_1","volume-title":"Emotional Type: Emotional expression in text message. Master\u2019s thesis.","author":"Promphan Suksumek","year":"2017","unstructured":"Suksumek Promphan. 2017. Emotional Type: Emotional expression in text message. Master\u2019s thesis. Basel School of Design, Switzerland."},{"key":"e_1_3_3_3_62_1","doi-asserted-by":"publisher","DOI":"10.1080\/0907676X.2012.722651"},{"key":"e_1_3_3_3_63_1","doi-asserted-by":"publisher","DOI":"10.1007\/11788713_5"},{"key":"e_1_3_3_3_64_1","doi-asserted-by":"crossref","unstructured":"Raisa Rashid Quoc Vy Richard Hunt and Deborah\u00a0I Fels. 2008. Dancing with words: Using animated text for captioning. Intl. Journal of Human\u2013Computer Interaction 24 5(2008) 505\u2013519.","DOI":"10.1080\/10447310802142342"},{"key":"e_1_3_3_3_65_1","doi-asserted-by":"crossref","unstructured":"Nancy\u00a0A Remington Leandre\u00a0R Fabrigar and Penny\u00a0S Visser. 2000. Reexamining the circumplex model of affect.Journal of personality and social psychology 79 2(2000) 286.","DOI":"10.1037\/\/0022-3514.79.2.286"},{"key":"e_1_3_3_3_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/632716.632872"},{"key":"e_1_3_3_3_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517488"},{"key":"e_1_3_3_3_68_1","doi-asserted-by":"crossref","unstructured":"James\u00a0A Russell. 1980. A circumplex model of affect.Journal of personality and social psychology 39 6(1980) 1161.","DOI":"10.1037\/h0077714"},{"key":"e_1_3_3_3_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/ITHET.2016.7760711"},{"key":"e_1_3_3_3_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CW49994.2020.00039"},{"key":"e_1_3_3_3_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3234695.3236355"},{"key":"e_1_3_3_3_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3334480.3383083"},{"key":"e_1_3_3_3_73_1","volume-title":"Emotional response to color across media. Color Research & Application: Endorsed by Inter-Society Color Council","author":"Suk Hyeon-Jeong","year":"2010","unstructured":"Hyeon-Jeong Suk and Hans Irtel. 2010. Emotional response to color across media. Color Research & Application: Endorsed by Inter-Society Color Council, The Colour Group (Great Britain), Canadian Society for Color, Color Science Association of Japan, Dutch Society for the Study of Color, The Swedish Colour Centre Foundation, Colour Society of Australia, Centre Fran\u00e7ais de la Couleur 35, 1 (2010), 64\u201377."},{"key":"e_1_3_3_3_74_1","unstructured":"[74] Andreas Triantafyllopoulos.2022. Personal communication."},{"key":"e_1_3_3_3_75_1","doi-asserted-by":"publisher","unstructured":"Andreas Triantafyllopoulos Johannes Wagner Hagen Wierstorf Maximilian Schmitt Uwe Reichel Florian Eyben Felix Burkhardt and Bj\u00f6rn\u00a0W. Schuller. 2022. Probing Speech Emotion Recognition Transformers for Linguistic Knowledge. https:\/\/doi.org\/10.48550\/ARXIV.2204.00400","DOI":"10.48550\/ARXIV.2204.00400"},{"key":"e_1_3_3_3_76_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1352"},{"key":"e_1_3_3_3_77_1","unstructured":"Walda Verbaenen. 2019. Phonotype. The visual identity of a language according to its phonology. Master\u2019s thesis. pxl-mad."},{"key":"e_1_3_3_3_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/2513383.2517035"},{"key":"e_1_3_3_3_79_1","volume-title":"Crossmodal Benefits to Vocal Emotion Perception in Cochlear Implant Users. iScience 25, 12","author":"von Eiff Celina\u00a0Isabelle","year":"2022","unstructured":"Celina\u00a0Isabelle von Eiff, Sascha Fr\u00fchholz, Daniela Korth, Orlando Guntinas-Lichius, and Stefan\u00a0Robert Schweinberger. 2022. Crossmodal Benefits to Vocal Emotion Perception in Cochlear Implant Users. iScience 25, 12 (2022), 105711."},{"key":"e_1_3_3_3_80_1","doi-asserted-by":"publisher","unstructured":"Johannes Wagner Andreas Triantafyllopoulos Hagen Wierstorf Maximilian Schmitt Felix Burkhardt Florian Eyben and Bj\u00f6rn\u00a0W. Schuller. 2022. Dawn of the transformer era in speech emotion recognition: closing the valence gap. https:\/\/doi.org\/10.48550\/ARXIV.2203.07378","DOI":"10.48550\/ARXIV.2203.07378"},{"key":"e_1_3_3_3_81_1","unstructured":"Matthew Wickline. 2001. Coblis \u2013 Color blindness simulator. https:\/\/www.color-blindness.com\/coblis-color-blindness-simulator\/"},{"key":"e_1_3_3_3_82_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.pragma.2005.04.012"},{"key":"e_1_3_3_3_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/SPED.2015.7343095"}],"event":{"name":"CHI '23: CHI Conference on Human Factors in Computing Systems","location":"Hamburg Germany","acronym":"CHI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581511","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3581511","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3581511","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:54Z","timestamp":1750178814000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581511"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,19]]},"references-count":83,"alternative-id":["10.1145\/3544548.3581511","10.1145\/3544548"],"URL":"https:\/\/doi.org\/10.1145\/3544548.3581511","relation":{},"subject":[],"published":{"date-parts":[[2023,4,19]]},"assertion":[{"value":"2023-04-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}