{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T01:50:35Z","timestamp":1777427435406,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":85,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T00:00:00Z","timestamp":1728604800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,13]]},"DOI":"10.1145\/3654777.3676408","type":"proceedings-article","created":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T10:50:36Z","timestamp":1728643836000},"page":"1-16","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":27,"title":["UIClip: A Data-driven Model for Assessing User Interface Design"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5101-0557","authenticated-orcid":false,"given":"Jason","family":"Wu","sequence":"first","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6335-5904","authenticated-orcid":false,"given":"Yi-Hao","family":"Peng","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0215-4526","authenticated-orcid":false,"given":"Xin Yue Amanda","family":"Li","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0935-4745","authenticated-orcid":false,"given":"Amanda","family":"Swearngin","sequence":"additional","affiliation":[{"name":"Apple, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2072-0625","authenticated-orcid":false,"given":"Jeffrey P","family":"Bigham","sequence":"additional","affiliation":[{"name":"Apple, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6880-8546","authenticated-orcid":false,"given":"Jeffrey","family":"Nichols","sequence":"additional","affiliation":[{"name":"Apple, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,11]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Gpt-4 technical report. arXiv preprint arXiv:2303.08774","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia\u00a0Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_2_2_1","unstructured":"Anthropic. 2023. Introducing the next generation of Claude. https:\/\/www.anthropic.com\/news\/claude-3-family. Accessed: 2024-04-01."},{"key":"e_1_3_2_2_3_1","volume-title":"Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond.","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond. (2023)."},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65\u201372","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65\u201372."},{"key":"e_1_3_2_2_5_1","volume-title":"International Conference on Machine Learning. PMLR, 2397\u20132430","author":"Biderman Stella","year":"2023","unstructured":"Stella Biderman, Hailey Schoelkopf, Quentin\u00a0Gregory Anthony, Herbie Bradley, Kyle O\u2019Brien, Eric Hallahan, Mohammad\u00a0Aflah Khan, Shivanshu Purohit, USVSN\u00a0Sai Prashanth, Edward Raff, 2023. Pythia: A suite for analyzing large language models across training and scaling. In International Conference on Machine Learning. PMLR, 2397\u20132430."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445762"},{"key":"e_1_3_2_2_7_1","volume-title":"Jared Kaplan, Harri Edwards, Yuri Burda","author":"Chen Mark","year":"2021","unstructured":"Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de\u00a0Oliveira Pinto, Jared Kaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, 2021. Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374 (2021)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"e_1_3_2_2_9_1","volume-title":"Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph\u00a0E. Gonzalez, Ion Stoica, and Eric\u00a0P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"e_1_3_2_2_10_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung\u00a0Won Chung, Charles Sutton, Sebastian Gehrmann, 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24, 240 (2023), 1\u2013113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126651"},{"key":"e_1_3_2_2_12_1","volume-title":"Documenting large webtext corpora: A case study on the colossal clean crawled corpus. arXiv preprint arXiv:2104.08758","author":"Dodge Jesse","year":"2021","unstructured":"Jesse Dodge, Maarten Sap, Ana Marasovi\u0107, William Agnew, Gabriel Ilharco, Dirk Groeneveld, Margaret Mitchell, and Matt Gardner. 2021. Documenting large webtext corpora: A case study on the colossal clean crawled corpus. arXiv preprint arXiv:2104.08758 (2021)."},{"key":"e_1_3_2_2_13_1","volume-title":"Generating Automatic Feedback on UI Mockups with Large Language Models. arXiv preprint arXiv:2403.13139","author":"Duan Peitong","year":"2024","unstructured":"Peitong Duan, Jeremy Warner, Yang Li, and Bjoern Hartmann. 2024. Generating Automatic Feedback on UI Mockups with Large Language Models. arXiv preprint arXiv:2403.13139 (2024)."},{"key":"e_1_3_2_2_14_1","unstructured":"Martin Ester Hans-Peter Kriegel J\u00f6rg Sander Xiaowei Xu 1996. A density-based algorithm for discovering clusters in large spatial databases with noise. In kdd Vol.\u00a096. 226\u2013231."},{"key":"e_1_3_2_2_15_1","volume-title":"Datacomp: In search of the next generation of multimodal datasets. Advances in Neural Information Processing Systems 36","author":"Gadre Samir\u00a0Yitzhak","year":"2024","unstructured":"Samir\u00a0Yitzhak Gadre, Gabriel Ilharco, Alex Fang, Jonathan Hayase, Georgios Smyrnis, Thao Nguyen, Ryan Marten, Mitchell Wortsman, Dhruba Ghosh, Jieyu Zhang, 2024. Datacomp: In search of the next generation of multimodal datasets. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/964442.964461"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1095034.1095063"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1294211.1294253"},{"key":"e_1_3_2_2_19_1","unstructured":"George Giannakopoulos and Vangelis Karkaletsis. 2011. AutoSummENG and MeMoG in Evaluating Guided Summaries.. In TAC."},{"key":"e_1_3_2_2_20_1","unstructured":"Kelley Gordon. 2020. 5 Principles of Visual Design in UX. https:\/\/www.nngroup.com\/articles\/principles-visual-design\/. Accessed: 2024-03-25."},{"key":"e_1_3_2_2_21_1","volume-title":"Reinforced self-training (rest) for language modeling. arXiv preprint arXiv:2308.08998","author":"Gulcehre Caglar","year":"2023","unstructured":"Caglar Gulcehre, Tom\u00a0Le Paine, Srivatsan Srinivasan, Ksenia Konyushkova, Lotte Weerts, Abhishek Sharma, Aditya Siddhant, Alex Ahern, Miaosen Wang, Chenjie Gu, 2023. Reinforced self-training (rest) for language modeling. arXiv preprint arXiv:2308.08998 (2023)."},{"key":"e_1_3_2_2_22_1","volume-title":"Vol.\u00a02","author":"Hadsell Raia","unstructured":"Raia Hadsell, Sumit Chopra, and Yann LeCun. 2006. Dimensionality reduction by learning an invariant mapping. In 2006 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201906), Vol.\u00a02. IEEE, 1735\u20131742."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/1753326.1753400"},{"key":"e_1_3_2_2_24_1","volume-title":"Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718","author":"Hessel Jack","year":"2021","unstructured":"Jack Hessel, Ari Holtzman, Maxwell Forbes, Ronan\u00a0Le Bras, and Yejin Choi. 2021. Clipscore: A reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)."},{"key":"e_1_3_2_2_25_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01866"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300334"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/279044.279051"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1121\/1.2016299"},{"key":"e_1_3_2_2_30_1","volume-title":"Florian Bressand, Gianna Lengyel","author":"Jiang Q","year":"2023","unstructured":"Albert\u00a0Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra\u00a0Singh Chaplot, Diego de\u00a0las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_2_31_1","volume-title":"Emma\u00a0Bou Hanna","author":"Jiang Q","year":"2024","unstructured":"Albert\u00a0Q Jiang, Alexandre Sablayrolles, Antoine Roux, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra\u00a0Singh Chaplot, Diego de\u00a0las Casas, Emma\u00a0Bou Hanna, Florian Bressand, 2024. Mixtral of experts. arXiv preprint arXiv:2401.04088 (2024)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.2190\/TW.43.1.b"},{"key":"e_1_3_2_2_33_1","volume-title":"Large language models are zero-shot reasoners. Advances in neural information processing systems 35","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang\u00a0Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. Advances in neural information processing systems 35 (2022), 22199\u201322213."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2470654.2466420"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/257089.257396"},{"key":"e_1_3_2_2_36_1","volume-title":"Bloom: A 176b-parameter open-access multilingual language model.","author":"Le\u00a0Scao Teven","year":"2022","unstructured":"Teven Le\u00a0Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra\u00a0Sasha Luccioni, Fran\u00e7ois Yvon, Matthias Gall\u00e9, 2022. Bloom: A 176b-parameter open-access multilingual language model. (2022)."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376327"},{"key":"e_1_3_2_2_38_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Lee Kenton","year":"2023","unstructured":"Kenton Lee, Mandar Joshi, Iulia\u00a0Raluca Turc, Hexiang Hu, Fangyu Liu, Julian\u00a0Martin Eisenschlos, Urvashi Khandelwal, Peter Shaw, Ming-Wei Chang, and Kristina Toutanova. 2023. Pix2struct: Screenshot parsing as pretraining for visual language understanding. In International Conference on Machine Learning. PMLR, 18893\u201318912."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406324.3410710"},{"key":"e_1_3_2_2_40_1","volume-title":"Spotlight: Mobile ui understanding using vision-language models with a focus. arXiv preprint arXiv:2209.14927","author":"Li Gang","year":"2022","unstructured":"Gang Li and Yang Li. 2022. Spotlight: Mobile ui understanding using vision-language models with a focus. arXiv preprint arXiv:2209.14927 (2022)."},{"key":"e_1_3_2_2_41_1","unstructured":"William Lidwell Kritina Holden and Jill Butler. 2010. Universal principles of design revised and updated: 125 ways to enhance usability influence perception increase appeal make better design decisions and teach through design. Rockport Pub."},{"key":"e_1_3_2_2_42_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_2_2_43_1","volume-title":"In Proceedings of The 8th International Conference on Distributed Multimedia Systems (2002 International Workshop on Visual Computing). Citeseer, 573\u2013580","author":"Lin James","year":"2002","unstructured":"James Lin and James\u00a0A Landay. 2002. Damask: A tool for early-stage design and prototyping of multi-device user interfaces. In In Proceedings of The 8th International Conference on Distributed Multimedia Systems (2002 International Workshop on Visual Computing). Citeseer, 573\u2013580."},{"key":"e_1_3_2_2_44_1","volume-title":"Improved baselines with visual instruction tuning. arXiv preprint arXiv:2310.03744","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Yuheng Li, and Yong\u00a0Jae Lee. 2023. Improved baselines with visual instruction tuning. arXiv preprint arXiv:2310.03744 (2023)."},{"key":"e_1_3_2_2_45_1","unstructured":"Haotian Liu Chunyuan Li Yuheng Li Bo Li Yuanhan Zhang Sheng Shen and Yong\u00a0Jae Lee. 2024. LLaVA-NeXT: Improved reasoning OCR and world knowledge. https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2675133.2675283"},{"key":"e_1_3_2_2_47_1","volume-title":"Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36","author":"Madaan Aman","year":"2024","unstructured":"Aman Madaan, Niket Tandon, Prakhar Gupta, Skyler Hallinan, Luyu Gao, Sarah Wiegreffe, Uri Alon, Nouha Dziri, Shrimai Prabhumoye, Yiming Yang, 2024. Self-refine: Iterative refinement with self-feedback. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702575"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180246"},{"key":"e_1_3_2_2_50_1","volume-title":"DENIM: An informal web site design tool inspired by observations of practice. Human-computer interaction 18, 3","author":"Newman W","year":"2003","unstructured":"Mark\u00a0W Newman, James Lin, Jason\u00a0I Hong, and James\u00a0A Landay. 2003. DENIM: An informal web site design tool inspired by observations of practice. Human-computer interaction 18, 3 (2003), 259\u2013324."},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/964442.964507"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/142750.142834"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/191666.191729"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3266037.3266087"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517739"},{"key":"e_1_3_2_2_56_1","volume-title":"Training language models to follow instructions with human feedback. Advances in neural information processing systems 35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, 2022. Training language models to follow instructions with human feedback. Advances in neural information processing systems 35 (2022), 27730\u201327744."},{"key":"e_1_3_2_2_57_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_2_2_58_1","volume-title":"International conference on machine learning. PMLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_2_2_59_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_2_60_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter\u00a0J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1\u201367. http:\/\/jmlr.org\/papers\/v21\/20-074.html","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_61_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)."},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1093\/iwc\/iwy008"},{"key":"e_1_3_2_2_63_1","volume-title":"Improved techniques for training gans. Advances in neural information processing systems 29","author":"Salimans Tim","year":"2016","unstructured":"Tim Salimans, Ian Goodfellow, Wojciech Zaremba, Vicki Cheung, Alec Radford, and Xi Chen. 2016. Improved techniques for training gans. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_2_64_1","volume-title":"Stay on topic with classifier-free guidance. arXiv preprint arXiv:2306.17806","author":"Sanchez Guillaume","year":"2023","unstructured":"Guillaume Sanchez, Honglu Fan, Alexander Spangher, Elad Levi, Pawan\u00a0Sasanka Ammanamanchi, and Stella Biderman. 2023. Stay on topic with classifier-free guidance. arXiv preprint arXiv:2306.17806 (2023)."},{"key":"e_1_3_2_2_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517497"},{"key":"e_1_3_2_2_66_1","volume-title":"A Vision Check-up for Language Models. arXiv preprint arXiv:2401.01862","author":"Sharma Pratyusha","year":"2024","unstructured":"Pratyusha Sharma, Tamar\u00a0Rott Shaham, Manel Baradad, Stephanie Fu, Adrian Rodriguez-Munoz, Shivam Duggal, Phillip Isola, and Antonio Torralba. 2024. A Vision Check-up for Language Models. arXiv preprint arXiv:2401.01862 (2024)."},{"key":"e_1_3_2_2_67_1","unstructured":"Ben Shneiderman Catherine Plaisant Maxine Cohen Steven Jacobs Niklas Elmqvist and Nicholas Diakopoulos. 2016. Designing the user interface: strategies for effective human-computer interaction. Pearson Education."},{"key":"e_1_3_2_2_68_1","volume-title":"Design2Code: How Far Are We From Automating Front-End Engineering?arXiv preprint arXiv:2403.03163","author":"Si Chenglei","year":"2024","unstructured":"Chenglei Si, Yanzhe Zhang, Zhengyuan Yang, Ruibo Liu, and Diyi Yang. 2024. Design2Code: How Far Are We From Automating Front-End Engineering?arXiv preprint arXiv:2403.03163 (2024)."},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300305"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376593"},{"key":"e_1_3_2_2_71_1","volume-title":"Towards Automated Accessibility Report Generation for Mobile Apps. arXiv preprint arXiv:2310.00091","author":"Swearngin Amanda","year":"2023","unstructured":"Amanda Swearngin, Jason Wu, Xiaoyi Zhang, Esteban Gomez, Jen Coughenour, Rachel Stukenborg, Bhavya Garg, Greg Hughes, Adriana Hilliard, Jeffrey\u00a0P Bigham, 2023. Towards Automated Accessibility Report Generation for Mobile Apps. arXiv preprint arXiv:2310.00091 (2023)."},{"key":"e_1_3_2_2_72_1","volume-title":"Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805","author":"Team Gemini","year":"2023","unstructured":"Gemini Team, Rohan Anil, Sebastian Borgeaud, Yonghui Wu, Jean-Baptiste Alayrac, Jiahui Yu, Radu Soricut, Johan Schalkwyk, Andrew\u00a0M Dai, Anja Hauth, 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/2901790.2901817"},{"key":"e_1_3_2_2_74_1","volume-title":"Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474765"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"e_1_3_2_2_77_1","unstructured":"Robin Williams. 2015. The non-designer\u2019s design book: Design and typographic principles for the visual novice. Pearson Education."},{"key":"e_1_3_2_2_78_1","unstructured":"Euphemia Wong. 2024. User Interface Design Guidelines: 10 Rules of Thumb. https:\/\/www.interaction-design.org\/literature\/article\/user-interface-design-guidelines-10-rules-of-thumb. Accessed: 2024-03-25."},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606824"},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581158"},{"key":"e_1_3_2_2_81_1","volume-title":"Demystifying clip data. arXiv preprint arXiv:2309.16671","author":"Xu Hu","year":"2023","unstructured":"Hu Xu, Saining Xie, Xiaoqing\u00a0Ellen Tan, Po-Yao Huang, Russell Howes, Vasu Sharma, Shang-Wen Li, Gargi Ghosh, Luke Zettlemoyer, and Christoph Feichtenhofer. 2023. Demystifying clip data. arXiv preprint arXiv:2309.16671 (2023)."},{"key":"e_1_3_2_2_82_1","doi-asserted-by":"publisher","DOI":"10.1145\/1622176.1622213"},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"e_1_3_2_2_84_1","volume-title":"Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in Neural Information Processing Systems 36","author":"Zheng Lianmin","year":"2024","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric Xing, 2024. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_85_1","volume-title":"Li\u00a0F Fei-Fei, and Michael Bernstein.","author":"Zhou Sharon","year":"2019","unstructured":"Sharon Zhou, Mitchell Gordon, Ranjay Krishna, Austin Narcomey, Li\u00a0F Fei-Fei, and Michael Bernstein. 2019. Hype: A benchmark for human eye perceptual evaluation of generative models. Advances in neural information processing systems 32 (2019)."}],"event":{"name":"UIST '24: The 37th Annual ACM Symposium on User Interface Software and Technology","location":"Pittsburgh PA USA","acronym":"UIST '24"},"container-title":["Proceedings of the 37th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654777.3676408","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3654777.3676408","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,4]],"date-time":"2025-08-04T21:10:32Z","timestamp":1754341832000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654777.3676408"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,11]]},"references-count":85,"alternative-id":["10.1145\/3654777.3676408","10.1145\/3654777"],"URL":"https:\/\/doi.org\/10.1145\/3654777.3676408","relation":{},"subject":[],"published":{"date-parts":[[2024,10,11]]},"assertion":[{"value":"2024-10-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}