{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:48:34Z","timestamp":1776116914645,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3663548.3675658","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T18:37:25Z","timestamp":1729449445000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["Context-Aware Image Descriptions for Web Accessibility"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1014-4385","authenticated-orcid":false,"given":"Ananya","family":"Gubbi Mohanbabu","sequence":"first","affiliation":[{"name":"Department of Computer Science, The University of Texas at Austin, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3908-4366","authenticated-orcid":false,"given":"Amy","family":"Pavel","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Texas, Austin, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"d.]. BeMyAI. https:\/\/www.bemyeyes.com\/blog\/introducing-be-my-ai. Accessed: April 20th","year":"2024","unstructured":"[n. d.]. BeMyAI. https:\/\/www.bemyeyes.com\/blog\/introducing-be-my-ai. Accessed: April 20th, 2024."},{"key":"e_1_3_2_1_2_1","volume-title":"Accessed: April 20th","year":"2024","unstructured":"[n. d.]. Google Chrome Image Descriptions. https:\/\/support.google.com\/chrome\/answer\/9311597?hl=en. Accessed: April 20th, 2024."},{"key":"e_1_3_2_1_3_1","volume-title":"d.]. Google Gemini. https:\/\/gemini.google.com\/app. Accessed: April 20th","year":"2024","unstructured":"[n. d.]. Google Gemini. https:\/\/gemini.google.com\/app. Accessed: April 20th, 2024."},{"key":"e_1_3_2_1_4_1","volume-title":"d.]. OpenAI GPT-V. https:\/\/www.openai.com. Accessed: April 20th","year":"2024","unstructured":"[n. d.]. OpenAI GPT-V. https:\/\/www.openai.com. Accessed: April 20th, 2024."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173650"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.17487\/RFC1866"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1866029.1866080"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1168987.1169018"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01275"},{"key":"e_1_3_2_1_10_1","unstructured":"Laura\u00a0Begley Bloom. [n. d.]. Bucket List Travel: The Top 20 Places In The World."},{"key":"e_1_3_2_1_11_1","volume-title":"d.]. General Guidelines for Accessible Content","author":"DIAGRAM Center","unstructured":"DIAGRAM Center. [n. d.]. General Guidelines for Accessible Content. http:\/\/diagramcenter.org\/general-guidelines-final-draft.html#key. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 895\u2013903","author":"Chunseong\u00a0Park Cesc","year":"2017","unstructured":"Cesc Chunseong\u00a0Park, Byeongchang Kim, and Gunhee Kim. 2017. Attend to you: Personalized image captioning with context sequence memory networks. In Proceedings of the IEEE conference on computer vision and pattern recognition. 895\u2013903."},{"key":"e_1_3_2_1_13_1","volume-title":"d.]. OpenAI NVDA add on. https:\/\/github.com\/aaclause\/nvda-OpenAI. Accessed","author":"Clause Andr\u00e9-Abush","year":"2024","unstructured":"Andr\u00e9-Abush Clause. [n. d.]. OpenAI NVDA add on. https:\/\/github.com\/aaclause\/nvda-OpenAI. Accessed: 2024."},{"key":"e_1_3_2_1_14_1","unstructured":"No Conformity. [n. d.]. No Conformity on X: \"After Brand Transformation: With a brand transformation Billie Eilish became a global phenomenon. She captivated audiences w\/ her sound aesthetic & authentic storytelling. Her brand evolution positioned her as an icon earning her Grammy Awards & critical acclaim\"."},{"key":"e_1_3_2_1_15_1","unstructured":"W3 Consortium. 2018. Web Content Accesisbility Guidelines (WCAG) 2.1. https:\/\/www.w3.org\/TR\/WCAG21\/"},{"key":"e_1_3_2_1_16_1","unstructured":"James Crawford-Smith. [n. d.]. Prince Harry\u2019s \u2019Protective\u2019 Gesture Over Meghan Markle Caught on Camera."},{"key":"e_1_3_2_1_17_1","unstructured":"Dillard\u2019s. [n. d.]. Free People Bluebell Floral Print V-Neck Sleeveless Maxi Dress."},{"key":"e_1_3_2_1_18_1","unstructured":"[18] Sophie Dodd. [n. d.]."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Mark Everingham Josef Sivic and Andrew Zisserman. 2006. Hello! My name is... Buffy\u201d\u2013Automatic Naming of Characters in TV Video.. In BMVC Vol.\u00a02. 6.","DOI":"10.5244\/C.20.92"},{"key":"e_1_3_2_1_20_1","unstructured":"Explosion. [n. d.]. EntityRecognizer. https:\/\/spacy.io\/api\/entityrecognizer."},{"key":"e_1_3_2_1_21_1","unstructured":"American\u00a0Foundation for\u00a0the Blind. [n. d.]. Improving Your Website Accessibility. https:\/\/www.afb.org\/consulting\/afb-accessibility-resources\/improving-your-web-site. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_22_1","unstructured":"Perkins\u00a0School for\u00a0the Blind. [n. d.]. Creating Image Descriptions and Alt Text. https:\/\/www.perkins.org\/resource\/creating-image-descriptions-alt-text\/. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_23_1","unstructured":"Perkins\u00a0School for\u00a0the Blind. [n. d.]. How to Write Alt Text and Image Descriptions for the Visually Impaired. https:\/\/www.perkins.org\/resource\/how-write-alt-text-and-image-descriptions-visually-impaired\/. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.108"},{"key":"e_1_3_2_1_25_1","volume-title":"Determining question-answer plausibility in crowdsourced datasets using multi-task learning. arXiv preprint arXiv:2011.04883","author":"Gardner Rachel","year":"2020","unstructured":"Rachel Gardner, Maya Varma, Clare Zhu, and Ranjay Krishna. 2020. Determining question-answer plausibility in crowdsourced datasets using multi-task learning. arXiv preprint arXiv:2011.04883 (2020)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313605"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373625.3417027"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308561.3353792"},{"key":"e_1_3_2_1_29_1","volume-title":"Making Memes Accessible. In The 21st International ACM SIGACCESS Conference on Computers and Accessibility. 367\u2013376","author":"Gleason Cole","year":"2019","unstructured":"Cole Gleason, Amy Pavel, Xingyu Liu, Patrick Carrington, Lydia\u00a0B Chilton, and Jeffrey\u00a0P Bigham. 2019. Making Memes Accessible. In The 21st International ACM SIGACCESS Conference on Computers and Accessibility. 367\u2013376."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376728"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376728"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1080\/19331681.2010.508011"},{"key":"e_1_3_2_1_33_1","unstructured":"Google. [n. d.]. Firebase Realtime Database Documentation. https:\/\/firebase.google.com\/docs\/database. Accessed: 2024-04-23."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174092"},{"key":"e_1_3_2_1_35_1","volume-title":"Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718","author":"Hessel Jack","year":"2021","unstructured":"Jack Hessel, Ari Holtzman, Maxwell Forbes, Ronan\u00a0Le Bras, and Yejin Choi. 2021. Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502081"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606735"},{"key":"e_1_3_2_1_38_1","volume-title":"Context Matters for Image Descriptions for Accessibility: Challenges for Referenceless Evaluation Metrics. arXiv preprint arXiv:2205.10646","author":"Kreiss Elisa","year":"2022","unstructured":"Elisa Kreiss, Cynthia Bennett, Shayan Hooshmand, Eric Zelikman, Meredith\u00a0Ringel Morris, and Christopher Potts. 2022. Context Matters for Image Descriptions for Accessibility: Challenges for Referenceless Evaluation Metrics. arXiv preprint arXiv:2205.10646 (2022)."},{"key":"e_1_3_2_1_39_1","volume-title":"ContextRef: Evaluating Referenceless Metrics For Image Description Generation. arXiv preprint arXiv:2309.11710","author":"Kreiss Elisa","year":"2023","unstructured":"Elisa Kreiss, Eric Zelikman, Christopher Potts, and Nick Haber. 2023. ContextRef: Evaluating Referenceless Metrics For Image Description Generation. arXiv preprint arXiv:2309.11710 (2023)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501966"},{"key":"e_1_3_2_1_41_1","unstructured":"Veronica Lewis. [n. d.]. How to Write Alt Text and Image Descriptions for Photojournalism Images. https:\/\/veroniiiica.com\/how-to-write-alt-text-and-image-descriptions-for-photojournalism-images\/. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_42_1","unstructured":"Veronica Lewis. [n. d.]. Writing Image Descriptions for Red Carpet Outfits. https:\/\/veroniiiica.com\/writing-image-descriptions-for-red-carpet-outfits\/. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_43_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730\u201319742."},{"key":"e_1_3_2_1_44_1","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong\u00a0Jae Lee. 2023. Visual Instruction Tuning. In NeurIPS."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/1562164.1562197"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3441852.3471207"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025814"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10475"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2764916"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173633"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/2858036.2858116"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581302"},{"key":"e_1_3_2_1_53_1","unstructured":"NY\u00a0Furniture Outlets. [n. d.]. French Beige Chenille Cherry Carved Wood Sofa Traditioanal McFerran SF8700."},{"key":"e_1_3_2_1_54_1","volume-title":"DreamStruct: Understanding Slides and UIs via Synthetic Data Generation. To Appear at ECCV","author":"Peng Yi-Hao","year":"2024","unstructured":"Yi-Hao Peng, Faria Huq, Jason Wu, Amanda Li, Jeffrey\u00a0P Bigham, and Amy Pavel. 2024. DreamStruct: Understanding Slides and UIs via Synthetic Data Generation. To Appear at ECCV 2024."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v5i1.13301"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01280"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640543.3645212"},{"key":"e_1_3_2_1_58_1","volume-title":"Alt-Text with Context: Improving Accessibility for Images on Twitter. In The Twelfth International Conference on Learning Representations.","author":"Srivatsan Nikita","year":"2023","unstructured":"Nikita Srivatsan, Sofia Samaniego, Omar Florez, and Taylor Berg-Kirkpatrick. 2023. Alt-Text with Context: Improving Accessibility for Images on Twitter. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376404"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3441852.3471233"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3234695.3236337"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/1124772.1124785"},{"key":"e_1_3_2_1_63_1","unstructured":"W3C. [n. d.]. Tips for Creating Accessible Images. https:\/\/www.w3.org\/WAI\/tutorials\/images\/tips\/. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_64_1","unstructured":"WebAIM. [n. d.]. Techniques for Writing Effective Alt Text. https:\/\/webaim.org\/techniques\/alttext\/. Accessed: 2024-04-21."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2998181.2998364"}],"event":{"name":"ASSETS '24: The 26th International ACM SIGACCESS Conference on Computers and Accessibility","location":"St. John's NL Canada","acronym":"ASSETS '24","sponsor":["SIGACCESS ACM Special Interest Group on Accessible Computing"]},"container-title":["The 26th International ACM SIGACCESS Conference on Computers and Accessibility"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663548.3675658","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:57:17Z","timestamp":1750291037000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663548.3675658"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":65,"alternative-id":["10.1145\/3663548.3675658","10.1145\/3663548"],"URL":"https:\/\/doi.org\/10.1145\/3663548.3675658","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2024-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}