{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T07:22:17Z","timestamp":1781335337354,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":85,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,22]],"date-time":"2023-10-22T00:00:00Z","timestamp":1697932800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Ability Central"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,22]]},"DOI":"10.1145\/3597638.3608402","type":"proceedings-article","created":{"date-parts":[[2023,10,19]],"date-time":"2023-10-19T14:03:06Z","timestamp":1697724186000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["The Potential of a Visual Dialogue Agent In a Tandem Automated Audio Description System for Videos"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2761-1429","authenticated-orcid":false,"given":"Abigale","family":"Stangl","sequence":"first","affiliation":[{"name":"Human Centered Design and Engineering, University of Washington, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4255-578X","authenticated-orcid":false,"given":"Shasta","family":"Ihorn","sequence":"additional","affiliation":[{"name":"Department of Psychology, San Francisco State University, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9478-1391","authenticated-orcid":false,"given":"Yue-Ting","family":"Siu","sequence":"additional","affiliation":[{"name":"Northwest Center for Assistive Technology Training (CATT-NW), United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2653-0129","authenticated-orcid":false,"given":"Aditya","family":"Bodi","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7996-901X","authenticated-orcid":false,"given":"Mar","family":"Castanon","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4030-7186","authenticated-orcid":false,"given":"Lothar D","family":"Narins","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2418-5287","authenticated-orcid":false,"given":"Ilmi","family":"Yoon","sequence":"additional","affiliation":[{"name":"Department of Computer Science, San Francisco State University, United States"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,10,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3355390"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.5241\/8-148"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445498"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11145-020-10026-4"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411763.3451810"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1177\/0145482X1210600304"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.4324\/9781003122388"},{"key":"e_1_3_2_1_11_1","volume-title":"COCO-Stuff: Thing and Stuff Classes in Context. CoRR abs\/1612.03716","author":"Caesar Holger","year":"2016","unstructured":"Holger Caesar, Jasper R.\u00a0R. Uijlings, and Vittorio Ferrari. 2016. COCO-Stuff: Thing and Stuff Classes in Context. CoRR abs\/1612.03716 (2016). http:\/\/arxiv.org\/abs\/1612.03716 arXiv:1612.03716."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Joao Carreira and Andrew Zisserman. 2017. Quo Vadis Action Recognition? A New Model and the Kinetics Dataset. 6299\u20136308. https:\/\/openaccess.thecvf.com\/content_cvpr_2017\/html\/Carreira_Quo_Vadis_Action_CVPR_2017_paper.html","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_13_1","unstructured":"Google Cloud. [n. d.]. Vision AI | Cloud Vision API. https:\/\/cloud.google.com\/vision"},{"key":"e_1_3_2_1_14_1","unstructured":"Listen\u00a0By Code. [n. d.]. Home Page. https:\/\/tracxn.com\/d\/companies\/listen-by-code\/__GK6TP3f6EIo4oD3KKpeuCUBStSGTyfGpZuKMr2ML6EY\/"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1177\/0145482X9909300602"},{"key":"e_1_3_2_1_16_1","unstructured":"Abhishek Das Satwik Kottur Khushi Gupta Avi Singh Deshraj Yadav Jose M.\u00a0F. Moura Devi Parikh and Dhruv Batra. 2017. Visual Dialog. 326\u2013335. https:\/\/openaccess.thecvf.com\/content_cvpr_2017\/html\/Das_Visual_Dialog_CVPR_2017_paper.html"},{"key":"e_1_3_2_1_17_1","unstructured":"DCMP. 2023. The Described and Captioned Media Program. https:\/\/dcmp.org\/"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2599174"},{"key":"e_1_3_2_1_21_1","unstructured":"Melanie Ehrenkranz. 2020. Vital Coronavirus Information Is Failing the Blind and Visually Impaired. https:\/\/www.vice.com\/en\/article\/4ag9wb\/vital-coronavirus-information-is-failing-the-blind-and-visually-impaired"},{"key":"e_1_3_2_1_22_1","unstructured":"Hugging Face. 2023. Transformers. https:\/\/huggingface.co\/docs\/transformers\/index"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3386296.3386304"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/0010-0277(94)90024-8"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2010.5543575"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10209-008-0141-0"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","unstructured":"Timnit Gebru Jamie Morgenstern Briana Vecchione Jennifer\u00a0Wortman Vaughan Hanna Wallach Hal Daum\u00e9\u00a0III and Kate Crawford. 2021. Datasheets for Datasets. https:\/\/doi.org\/10.48550\/arXiv.1803.09010 arXiv:1803.09010 [cs].","DOI":"10.48550\/arXiv.1803.09010"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.101.3.371"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462620"},{"key":"e_1_3_2_1_30_1","volume-title":"Including Those with Additional Disabilities. RE:view 28, 1","author":"Hatlen Phil","year":"1996","unstructured":"Phil Hatlen. 1996. The Core Curriculum for Blind and Visually Impaired Students, Including Those with Additional Disabilities. RE:view 28, 1 (1996), 25\u201332. ERIC Number: EJ532379."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2741510"},{"key":"e_1_3_2_1_32_1","volume-title":"Continuum Companion to Discourse Analysis","author":"Hyland Ken","unstructured":"Ken Hyland. 2011. Continuum Companion to Discourse Analysis. Bloomsbury Publishing. Google-Books-ID: FgoSBwAAQBAJ."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3517428.3550394"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597638.3608381"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1807.09956"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.95.2.163"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1177\/0145482X0509900806"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","unstructured":"Alexander Kirillov Eric Mintun Nikhila Ravi Hanzi Mao Chloe Rolland Laura Gustafson Tete Xiao Spencer Whitehead Alexander\u00a0C. Berg Wan-Yen Lo Piotr Doll\u00e1r and Ross Girshick. 2023. Segment Anything. https:\/\/doi.org\/10.48550\/arXiv.2304.02643 arXiv:2304.02643 [cs].","DOI":"10.48550\/arXiv.2304.02643"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","unstructured":"Elisa Kreiss Cynthia Bennett Shayan Hooshmand Eric Zelikman Meredith\u00a0Ringel Morris and Christopher Potts. 2022. Context Matters for Image Descriptions for Accessibility: Challenges for Referenceless Evaluation Metrics. https:\/\/doi.org\/10.48550\/arXiv.2205.10646 arXiv:2205.10646 [cs].","DOI":"10.48550\/arXiv.2205.10646"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01316-z"},{"key":"e_1_3_2_1_42_1","volume-title":"Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision. Springer, 740\u2013755","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C\u00a0Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In European Conference on Computer Vision. Springer, 740\u2013755."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445233"},{"key":"e_1_3_2_1_44_1","volume-title":"Proceedings of the 35th Annual ACM Symposium on User Interface Software and Technology. 1\u201314","author":"Wang Ruolin","year":"2022","unstructured":"Xingyu\"\u00a0Bruce\" Liu, Ruolin Wang, Dingzeyu Li, Xiang\u00a0Anthony Chen, and Amy Pavel. 2022. CrossA11y: Identifying Video Accessibility Issues via Cross-modal Grounding. In Proceedings of the 35th Annual ACM Symposium on User Interface Software and Technology. 1\u201314."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","unstructured":"Jiasen Lu Dhruv Batra Devi Parikh and Stefan Lee. 2019. ViLBERT: Pretraining Task-Agnostic Visiolinguistic Representations for Vision-and-Language Tasks. https:\/\/doi.org\/10.48550\/arXiv.1908.02265 arXiv:1908.02265 [cs].","DOI":"10.48550\/arXiv.1908.02265"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1177\/0145482X19887620"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1080\/15596893.2017.1361689"},{"key":"e_1_3_2_1_48_1","volume-title":"Applied Multivariate Research: Design and Interpretation","author":"Meyers S.","unstructured":"Lawrence\u00a0S. Meyers, Glenn Gamst, and A.\u00a0J. Guarino. 2016. Applied Multivariate Research: Design and Interpretation. SAGE Publications. Google-Books-ID: bm51DQAAQBAJ."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511973017.061"},{"key":"e_1_3_2_1_50_1","first-page":"12","article-title":"Guiding novice web workers in making image descriptions using templates","volume":"7","author":"Morash S","year":"2015","unstructured":"Valerie\u00a0S Morash, Yue-Ting Siu, Joshua\u00a0A Miele, Lucia Hasty, and Steven Landau. 2015. Guiding novice web workers in making image descriptions using templates. ACM Transactions on Accessible Computing (TACCESS) 7, 4 (2015), 12. Publisher: ACM.","journal-title":"ACM Transactions on Accessible Computing (TACCESS)"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173633"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373625.3418030"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581023"},{"key":"e_1_3_2_1_54_1","unstructured":"NCAM. 2023. NCAM WhatWeDo. https:\/\/www.wgbh.org\/foundation\/what-we-do\/ncam"},{"key":"e_1_3_2_1_55_1","unstructured":"OpenAI. 2021. OpenAI API. https:\/\/openai.com\/api\/"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415864"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","unstructured":"Yi-Hao Peng JiWoong Jang Jeffrey\u00a0P Bigham and Amy Pavel. [n. d.]. Say It All: Feedback for Improving Non-Visual Presentation Accessibility. ([n. d.]) 12. https:\/\/doi.org\/10.1145\/3411764.3445572","DOI":"10.1145\/3411764.3445572"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/2745555.2746653"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","unstructured":"Joseph Redmon and Ali Farhadi. 2018. YOLOv3: An Incremental Improvement. https:\/\/doi.org\/10.48550\/arXiv.1804.02767 arXiv:1804.02767 [cs].","DOI":"10.48550\/arXiv.1804.02767"},{"key":"e_1_3_2_1_60_1","unstructured":"Thomas Reid. 2023. Blind Centered Audio Description Chat: Blind Professionals in AD. http:\/\/reidmymind.com\/blind-centered-audio-description-chat-blind-professionals-in-ad\/"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298940"},{"key":"e_1_3_2_1_62_1","volume-title":"Their Families, and Professionals in the United States and Canada","author":"Rosenblum LP","year":"2020","unstructured":"LP Rosenblum, TS Herzberg, T Wild, KD Botsford, D Fast, JT Kaiser, LK Cook, MAC Hicks, JN DeGrant, and CR McBride. 2020. Access and Engagement: Examining the Impact of COVID-19 on Students Birth-21 with Visual Impairments, Their Families, and Professionals in the United States and Canada. American Foundation for the Blind (2020)."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373625.3417997"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1177\/0145482X0710100103"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3192714.3192830"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00497"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376404"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3441852.3471233"},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","unstructured":"Weijie Su Xizhou Zhu Yue Cao Bin Li Lewei Lu Furu Wei and Jifeng Dai. 2020. VL-BERT: Pre-training of Generic Visual-Linguistic Representations. https:\/\/doi.org\/10.48550\/arXiv.1908.08530 arXiv:1908.08530 [cs].","DOI":"10.48550\/arXiv.1908.08530"},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300768"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","unstructured":"Christian Szegedy Wei Liu Yangqing Jia Pierre Sermanet Scott Reed Dragomir Anguelov Dumitru Erhan Vincent Vanhoucke and Andrew Rabinovich. 2014. Going Deeper with Convolutions. https:\/\/doi.org\/10.48550\/arXiv.1409.4842 arXiv:1409.4842 [cs].","DOI":"10.48550\/arXiv.1409.4842"},{"key":"e_1_3_2_1_72_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention Is All You Need. http:\/\/arxiv.org\/abs\/1706.03762 arXiv:1706.03762 [cs]."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_76_1","unstructured":"W3C. 2018. Web Content Accessibility Guidelines (WCAG) 2.1. https:\/\/www.w3.org\/TR\/WCAG21\/"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445347"},{"key":"e_1_3_2_1_78_1","volume-title":"Applied statistics: from bivariate through multivariate techniques","author":"Warner M.","unstructured":"Rebecca\u00a0M. Warner. 2008. Applied statistics: from bivariate through multivariate techniques. SAGE Publications, Los Angeles. OCLC: ocm72988475."},{"key":"e_1_3_2_1_79_1","first-page":"308","article-title":"Blindness and early childhood development. American Foundation for the Blind, New York, NY","author":"Warren H.","year":"1977","unstructured":"David\u00a0H. Warren. 1977. Blindness and early childhood development. American Foundation for the Blind, New York, NY, US. Pages: x, 308.","journal-title":"US. Pages"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"e_1_3_2_1_81_1","unstructured":"YouDescribe. 2023. YouDescribe - Audio Description for YouTube Videos. https:\/\/youdescribe.org\/"},{"key":"e_1_3_2_1_82_1","unstructured":"YouTube. 2023. YouTube for Press. https:\/\/blog.youtube\/press\/"},{"key":"e_1_3_2_1_83_1","doi-asserted-by":"publisher","DOI":"10.1145\/3334480.3382821"},{"key":"e_1_3_2_1_84_1","volume-title":"ProcNets: Learning to Segment Procedures in Untrimmed and Unconstrained Videos. (March","author":"Zhou Luowei","year":"2017","unstructured":"Luowei Zhou, Chenliang Xu, and Jason Corso. 2017. ProcNets: Learning to Segment Procedures in Untrimmed and Unconstrained Videos. (March 2017)."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12342"}],"event":{"name":"ASSETS '23: The 25th International ACM SIGACCESS Conference on Computers and Accessibility","location":"New York NY USA","acronym":"ASSETS '23","sponsor":["SIGACCESS ACM Special Interest Group on Accessible Computing"]},"container-title":["The 25th International ACM SIGACCESS Conference on Computers and Accessibility"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597638.3608402","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3597638.3608402","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:51Z","timestamp":1750178211000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3597638.3608402"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,22]]},"references-count":85,"alternative-id":["10.1145\/3597638.3608402","10.1145\/3597638"],"URL":"https:\/\/doi.org\/10.1145\/3597638.3608402","relation":{},"subject":[],"published":{"date-parts":[[2023,10,22]]},"assertion":[{"value":"2023-10-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}