{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T15:02:23Z","timestamp":1777734143115,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,19]],"date-time":"2023-04-19T00:00:00Z","timestamp":1681862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,19]]},"DOI":"10.1145\/3544548.3581566","type":"proceedings-article","created":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T04:26:08Z","timestamp":1681964768000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":39,"title":["Visual Captions: Augmenting Verbal Communication with On-the-fly Visuals"],"prefix":"10.1145","author":[{"given":"Xingyu \"Bruce\"","family":"Liu","sequence":"first","affiliation":[]},{"given":"Vladimir","family":"Kirilyuk","sequence":"additional","affiliation":[]},{"given":"Xiuxiu","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Alex","family":"Olwal","sequence":"additional","affiliation":[]},{"given":"Peggy","family":"Chi","sequence":"additional","affiliation":[]},{"given":"Xiang \"Anthony\"","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Ruofei","family":"Du","sequence":"additional","affiliation":[]}],"member":"320","published-online":{"date-parts":[[2023,4,19]]},"reference":[{"key":"e_1_3_3_3_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196709.3196734"},{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3272973.3272990"},{"key":"e_1_3_3_3_3_1","volume-title":"Non-verbal communication in human social interaction. Non-verbal communication 2","author":"Argyle Michael","year":"1972","unstructured":"Michael Argyle. 1972. Non-verbal communication in human social interaction. Non-verbal communication 2 (1972), 1."},{"key":"e_1_3_3_3_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.iwslt-1.27"},{"key":"e_1_3_3_3_5_1","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3495883"},{"key":"e_1_3_3_3_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/123078.128726"},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1719970.1720014"},{"key":"e_1_3_3_3_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474778"},{"key":"e_1_3_3_3_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415814"},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1943403.1943438"},{"key":"e_1_3_3_3_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300915"},{"key":"e_1_3_3_3_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3329714.3338126"},{"key":"e_1_3_3_3_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/383952.383955"},{"key":"e_1_3_3_3_14_1","volume-title":"Advances in Psychology. Vol.\u00a052","author":"Hart G","unstructured":"Sandra\u00a0G Hart and Lowell\u00a0E Staveland. 1988. Development of NASA-TLX (Task Load Index): Results of Empirical and Theoretical Research. In Advances in Psychology. Vol.\u00a052. Elsevier, New York, NY, USA, 139\u2013183."},{"key":"e_1_3_3_3_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2932710"},{"key":"e_1_3_3_3_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474785"},{"key":"e_1_3_3_3_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406865.3418585"},{"key":"e_1_3_3_3_18_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in Neural Information Processing Systems 33 (2020), 6840\u20136851.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303030"},{"key":"e_1_3_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517558"},{"key":"e_1_3_3_3_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2382336.2382364"},{"key":"e_1_3_3_3_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dss.2015.03.001"},{"key":"e_1_3_3_3_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300641"},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"publisher","unstructured":"Vladimir Kulikov Shahar Yadin Matan Kleiner and Tomer Michaeli. 2022. SinDDM: A Single Image Denoising Diffusion Model. https:\/\/doi.org\/10.48550\/ARXIV.2211.16582","DOI":"10.48550\/ARXIV.2211.16582"},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376519"},{"key":"e_1_3_3_3_26_1","volume-title":"Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers). Asian Federation of Natural Language Processing","author":"Li Yanran","year":"2017","unstructured":"Yanran Li, Hui Su, Xiaoyu Shen, Wenjie Li, Ziqiang Cao, and Shuzi Niu. 2017. DailyDialog: A Manually Labelled Multi-turn Dialogue Dataset. In Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers). Asian Federation of Natural Language Processing, Taipei, Taiwan, 986\u2013995. https:\/\/aclanthology.org\/I17-1099"},{"key":"e_1_3_3_3_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545702"},{"key":"e_1_3_3_3_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1029632.1029674"},{"key":"e_1_3_3_3_29_1","volume-title":"Psychology of Learning and Motivation. Vol.\u00a041","author":"Mayer E","unstructured":"Richard\u00a0E Mayer. 2002. Multimedia Learning. In Psychology of Learning and Motivation. Vol.\u00a041. Elsevier, New York, NY, USA, 85\u2013139."},{"key":"e_1_3_3_3_30_1","volume-title":"Hearing Lips and Seeing Voices. Nature 264, 5588","author":"McGurk Harry","year":"1976","unstructured":"Harry McGurk and John MacDonald. 1976. Hearing Lips and Seeing Voices. Nature 264, 5588 (1976), 746\u2013748."},{"key":"e_1_3_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3432193"},{"key":"e_1_3_3_3_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10590-009-9050-0"},{"key":"e_1_3_3_3_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3131277.3132173"},{"key":"e_1_3_3_3_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3410531.3414310"},{"key":"e_1_3_3_3_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173867"},{"key":"e_1_3_3_3_36_1","volume-title":"International Conference on Machine Learning. PMLR, Association for Computing Machinery","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning. PMLR, Association for Computing Machinery, New York, NY, USA, 8748\u20138763."},{"key":"e_1_3_3_3_37_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. ArXiv abs\/2204.06125(2022) 10."},{"key":"e_1_3_3_3_38_1","doi-asserted-by":"crossref","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. CoRR abs\/1908.10084(2019) 10. arXiv:1908.10084http:\/\/arxiv.org\/abs\/1908.10084","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_3_3_39_1","volume-title":"The Proceedings of the First International Conference on the Practical Application of Intelligent Agents and Multi Agent Technology, Vol.\u00a01. Association for Computing Machinery","author":"Rhodes Bradley","year":"1996","unstructured":"Bradley Rhodes and Thad Starner. 1996. Remembrance Agent: a Continuously Running Automated Information Retrieval System. In The Proceedings of the First International Conference on the Practical Application of Intelligent Agents and Multi Agent Technology, Vol.\u00a01. Association for Computing Machinery, New York, NY, USA, 487\u2013495."},{"key":"e_1_3_3_3_40_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2205.11487"},{"key":"e_1_3_3_3_41_1","volume-title":"Image super-resolution via iterative refinement. arXiv preprint arXiv:2104.07636 1","author":"Saharia Chitwan","year":"2021","unstructured":"Chitwan Saharia, Jonathan Ho, William Chan, Tim Salimans, David\u00a0J Fleet, and Mohammad Norouzi. 2021. Image super-resolution via iterative refinement. arXiv preprint arXiv:2104.07636 1 (2021), 10."},{"key":"e_1_3_3_3_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300852"},{"key":"e_1_3_3_3_43_1","volume-title":"Improved techniques for training score-based generative models. Advances in neural information processing systems 33","author":"Song Yang","year":"2020","unstructured":"Yang Song and Stefano Ermon. 2020. Improved techniques for training score-based generative models. Advances in neural information processing systems 33 (2020), 12438\u201312448."},{"key":"e_1_3_3_3_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0028-3932(03)00185-4"},{"key":"e_1_3_3_3_45_1","volume-title":"The Merging of the Senses","author":"Stein E","unstructured":"Barry\u00a0E Stein and M\u00a0Alex Meredith. 1993. The Merging of the Senses. The MIT press, New York, NY, USA."},{"key":"e_1_3_3_3_46_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.1907309"},{"key":"e_1_3_3_3_47_1","doi-asserted-by":"publisher","DOI":"10.1177\/107769900007700302"},{"key":"e_1_3_3_3_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143967"},{"key":"e_1_3_3_3_49_1","volume-title":"Language Within Language: Immediacy, a Channel in Verbal Communication","author":"Wiener Morton","unstructured":"Morton Wiener and Albert Mehrabian. 1968. Language Within Language: Immediacy, a Channel in Verbal Communication. Ardent Media, New York, NY, USA."},{"key":"e_1_3_3_3_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415845"},{"key":"e_1_3_3_3_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415882"},{"key":"e_1_3_3_3_52_1","doi-asserted-by":"publisher","unstructured":"Jiahui Yu Yuanzhong Xu Jing\u00a0Yu Koh Thang Luong Gunjan Baid Zirui Wang Vijay Vasudevan Alexander Ku Yinfei Yang Burcu\u00a0Karagol Ayan Ben Hutchinson Wei Han Zarana Parekh Xin Li Han Zhang Jason Baldridge and Yonghui Wu. 2022. Scaling Autoregressive Models for Content-Rich Text-to-Image Generation. https:\/\/doi.org\/10.48550\/ARXIV.2206.10789","DOI":"10.48550\/ARXIV.2206.10789"},{"key":"e_1_3_3_3_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-019-7541-4"},{"key":"e_1_3_3_3_54_1","doi-asserted-by":"publisher","DOI":"10.5555\/1619797.1619900"},{"key":"e_1_3_3_3_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DUI.2013.6550198"},{"key":"e_1_3_3_3_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3355089.3356561"}],"event":{"name":"CHI '23: CHI Conference on Human Factors in Computing Systems","location":"Hamburg Germany","acronym":"CHI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581566","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3581566","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:56Z","timestamp":1750178816000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581566"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,19]]},"references-count":56,"alternative-id":["10.1145\/3544548.3581566","10.1145\/3544548"],"URL":"https:\/\/doi.org\/10.1145\/3544548.3581566","relation":{},"subject":[],"published":{"date-parts":[[2023,4,19]]},"assertion":[{"value":"2023-04-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}