{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:22:05Z","timestamp":1776082925197,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":64,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,11]],"date-time":"2024-05-11T00:00:00Z","timestamp":1715385600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,11]]},"DOI":"10.1145\/3613905.3636316","type":"proceedings-article","created":{"date-parts":[[2024,5,11]],"date-time":"2024-05-11T08:15:21Z","timestamp":1715415321000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Computational Methodologies for Understanding, Automating, and Evaluating User Interfaces"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0022-6512","authenticated-orcid":false,"given":"Yue","family":"Jiang","sequence":"first","affiliation":[{"name":"Aalto University, Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0845-5563","authenticated-orcid":false,"given":"Yuwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Computer Science and Engineering, University of Notre Dame, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4928-6225","authenticated-orcid":false,"given":"Tiffany","family":"Knearem","sequence":"additional","affiliation":[{"name":"Google, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8484-841X","authenticated-orcid":false,"given":"Clara E","family":"Kliman-Silver","sequence":"additional","affiliation":[{"name":"Google, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0634-7569","authenticated-orcid":false,"given":"Christof","family":"Lutteroth","sequence":"additional","affiliation":[{"name":"Computer Science, University of Bath, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7902-7625","authenticated-orcid":false,"given":"Toby Jia-Jun","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6880-8546","authenticated-orcid":false,"given":"Jeffrey","family":"Nichols","sequence":"additional","affiliation":[{"name":"Apple Inc, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7110-5024","authenticated-orcid":false,"given":"Wolfgang","family":"Stuerzlinger","sequence":"additional","affiliation":[{"name":"School of Interactive Arts + Technology (SIAT), Simon Fraser University, Canada"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,11]]},"reference":[{"key":"e_1_3_3_2_1_1","unstructured":"Jean-Baptiste Alayrac Jeff Donahue Pauline Luc Antoine Miech Iain Barr Yana Hasson Karel Lenc Arthur Mensch Katie Millican Malcolm Reynolds Roman Ring Eliza Rutherford Serkan Cabi Tengda Han Zhitao Gong Sina Samangooei Marianne Monteiro Jacob Menick Sebastian Borgeaud Andrew Brock Aida Nematzadeh Sahand Sharifzadeh Mikolaj Binkowski Ricardo Barreira Oriol Vinyals Andrew Zisserman and Karen Simonyan. 2022. Flamingo: a Visual Language Model for Few-Shot Learning. arxiv:2204.14198\u00a0[cs.CV]"},{"key":"e_1_3_3_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/504704.504705"},{"key":"e_1_3_3_2_3_1","volume-title":"Uibert: Learning generic multimodal representations for ui understanding. arXiv preprint arXiv:2107.13731","author":"Bai Chongyang","year":"2021","unstructured":"Chongyang Bai, Xiaoxue Zang, Ying Xu, Srinivas Sunkara, Abhinav Rastogi, Jindong Chen, 2021. Uibert: Learning generic multimodal representations for ui understanding. arXiv preprint arXiv:2107.13731 (2021)."},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3276526"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/27623.29354"},{"key":"e_1_3_3_2_6_1","unstructured":"Xi Chen Xiao Wang Soravit Changpinyo AJ Piergiovanni Piotr Padlewski Daniel Salz Sebastian Goodman Adam Grycner Basil Mustafa Lucas Beyer Alexander Kolesnikov Joan Puigcerver Nan Ding Keran Rong Hassan Akbari Gaurav Mishra Linting Xue Ashish Thapliyal James Bradbury Weicheng Kuo Mojtaba Seyedhosseini Chao Jia Burcu\u00a0Karagol Ayan Carlos Riquelme Andreas Steiner Anelia Angelova Xiaohua Zhai Neil Houlsby and Radu Soricut. 2023. PaLI: A Jointly-Scaled Multilingual Language-Image Model. arxiv:2209.06794\u00a0[cs.CV]"},{"key":"e_1_3_3_2_7_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph\u00a0E Gonzalez, 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023) (2023)."},{"key":"e_1_3_3_2_8_1","volume-title":"Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi.","author":"Dai Wenliang","year":"2023","unstructured":"Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng\u00a0Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale Fung, and Steven Hoi. 2023. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arxiv:2305.06500\u00a0[cs.CV]"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126651"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/1753326.1753554"},{"key":"e_1_3_3_2_11_1","article-title":"Practices and Challenges of Using Think-Aloud Protocols in Industry: An International Survey.","volume":"15","author":"Fan Mingming","year":"2020","unstructured":"Mingming Fan, Serina Shi, and Khai\u00a0N Truong. 2020. Practices and Challenges of Using Think-Aloud Protocols in Industry: An International Survey.Journal of Usability Studies 15, 2 (2020).","journal-title":"Journal of Usability Studies"},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1095034.1095063"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/964442.964461"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2010.05.005"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3583960"},{"key":"e_1_3_3_2_16_1","unstructured":"Ben Heuwing Thomas Mandl and Christa Womser-Hacker. 2016. Combining contextual interviews and participative design to define requirements for text analysis of historical media. (2016)."},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300334"},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","unstructured":"Gabriel Ilharco Mitchell Wortsman Ross Wightman Cade Gordon Nicholas Carlini Rohan Taori Achal Dave Vaishaal Shankar Hongseok Namkoong John Miller Hannaneh Hajishirzi Ali Farhadi and Ludwig Schmidt. 2021. OpenCLIP. https:\/\/doi.org\/10.5281\/zenodo.5143773 If you use this software please cite it as below.","DOI":"10.5281\/zenodo.5143773"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300643"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581096"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3573805"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3504030"},{"key":"e_1_3_3_2_23_1","unstructured":"Yue Jiang Eldon Schoop Amanda Swearngin and Jeffrey Nichols. 2023. ILuvUI: Instruction-tuned LangUage-Vision modeling of UIs from Machine Conversations. arxiv:2310.04869\u00a0[cs.HC]"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445043"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376610"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3573874"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_29"},{"key":"e_1_3_3_2_28_1","volume-title":"Proc. MobileHCI Adjunct.","author":"Leiva A.","year":"2020","unstructured":"Luis\u00a0A. Leiva, Asutosh Hota, and Antti Oulasvirta. 2020. Enrico: A High-quality Dataset for Topic Modeling of Mobile UI Designs. In Proc. MobileHCI Adjunct."},{"key":"e_1_3_3_2_29_1","volume-title":"Spotlight: Mobile UI Understanding using Vision-Language Models with a Focus. arXiv preprint arXiv:2209.14927","author":"Li Gang","year":"2022","unstructured":"Gang Li and Yang Li. 2022. Spotlight: Mobile UI Understanding using Vision-Language Models with a Focus. arXiv preprint arXiv:2209.14927 (2022)."},{"key":"e_1_3_3_2_30_1","volume-title":"Spotlight: Mobile UI Understanding using Vision-Language Models with a Focus. ArXiv abs\/2209.14927","author":"Li Gang","year":"2022","unstructured":"Gang Li and Yang Li. 2022. Spotlight: Mobile UI Understanding using Vision-Language Models with a Focus. ArXiv abs\/2209.14927 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252595735"},{"key":"e_1_3_3_2_31_1","volume-title":"Layoutgan: Generating graphic layouts with wireframe discriminators. arXiv preprint arXiv:1901.06767","author":"Li Jianan","year":"2019","unstructured":"Jianan Li, Jimei Yang, Aaron Hertzmann, Jianming Zhang, and Tingfa Xu. 2019. Layoutgan: Generating graphic layouts with wireframe discriminators. arXiv preprint arXiv:1901.06767 (2019)."},{"key":"e_1_3_3_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2020.2999335"},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025483"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/VLHCC.2018.8506506"},{"key":"e_1_3_3_2_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445049"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332165.3347899"},{"key":"e_1_3_3_2_38_1","volume-title":"Proceedings of the 16th ACM International Conference on Mobile Systems, Applications, and Services (MobiSys","author":"Jia-Jun Li Toby","year":"2018","unstructured":"Toby Jia-Jun Li and Oriana Riva. 2018. KITE: Building conversational bots from mobile apps. In Proceedings of the 16th ACM International Conference on Mobile Systems, Applications, and Services (MobiSys 2018). ACM."},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.729"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.443"},{"key":"e_1_3_3_2_41_1","volume-title":"Visual instruction tuning. arXiv preprint arXiv:2304.08485","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong\u00a0Jae Lee. 2023. Visual instruction tuning. arXiv preprint arXiv:2304.08485 (2023)."},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242650"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3519809"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.5555\/1378337.1378350"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER53432.2022.00069"},{"key":"e_1_3_3_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/571985.572008"},{"key":"e_1_3_3_2_48_1","volume-title":"Android in the Wild: A Large-Scale Dataset for Android Device Control. arXiv preprint arXiv:2307.10088","author":"Rawles Christopher","year":"2023","unstructured":"Christopher Rawles, Alice Li, Daniel Rodriguez, Oriana Riva, and Timothy Lillicrap. 2023. Android in the Wild: A Large-Scale Dataset for Android Device Control. arXiv preprint arXiv:2307.10088 (2023)."},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415848"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377515"},{"key":"e_1_3_3_2_51_1","unstructured":"Y Societ. [n.d.]. What Bard is. https:\/\/ai.google\/static\/documents\/google-about-bard.pdf. Accessed: 2023-10-10."},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/1166253.1166301"},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025506"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376593"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/169059.169305"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"crossref","unstructured":"Maryam Taeb Amanda Swearngin Eldon Schoop Ruijia Cheng Yue Jiang and Jeffrey Nichols. 2023. AXNav: Replaying Accessibility Tests from Natural Language. arxiv:2310.02424\u00a0[cs.HC]","DOI":"10.1145\/3613904.3642777"},{"key":"e_1_3_3_2_57_1","volume-title":"Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca.","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori\u00a0B. Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_3_2_58_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580895"},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474765"},{"key":"e_1_3_3_2_61_1","volume-title":"Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu.","author":"Wen Hao","year":"2023","unstructured":"Hao Wen, Yuanchun Li, Guohong Liu, Shanhui Zhao, Tao Yu, Toby Jia-Jun Li, Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu. 2023. Empowering LLM to use Smartphone for Intelligent Task Automation. arXiv preprint arXiv:2308.15272 (2023)."},{"key":"e_1_3_3_2_62_1","volume-title":"DroidBot-GPT: GPT-powered UI Automation for Android. arXiv preprint arXiv:2304.07061","author":"Wen Hao","year":"2023","unstructured":"Hao Wen, Hongming Wang, Jiaxuan Liu, and Yuanchun Li. 2023. DroidBot-GPT: GPT-powered UI Automation for Android. arXiv preprint arXiv:2304.07061 (2023)."},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581158"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/97243.97248"},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445186"}],"event":{"name":"CHI '24: CHI Conference on Human Factors in Computing Systems","location":"Honolulu HI USA","acronym":"CHI '24","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGACCESS ACM Special Interest Group on Accessible Computing"]},"container-title":["Extended Abstracts of the CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3613905.3636316","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3613905.3636316","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:57:18Z","timestamp":1750291038000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3613905.3636316"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,11]]},"references-count":64,"alternative-id":["10.1145\/3613905.3636316","10.1145\/3613905"],"URL":"https:\/\/doi.org\/10.1145\/3613905.3636316","relation":{},"subject":[],"published":{"date-parts":[[2024,5,11]]},"assertion":[{"value":"2024-05-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}