{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,10]],"date-time":"2026-07-10T13:27:52Z","timestamp":1783690072502,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":77,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,24]],"date-time":"2025-03-24T00:00:00Z","timestamp":1742774400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,24]]},"DOI":"10.1145\/3708359.3712129","type":"proceedings-article","created":{"date-parts":[[2025,3,19]],"date-time":"2025-03-19T12:50:34Z","timestamp":1742388634000},"page":"861-877","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["ILuvUI: Instruction-tuned LangUage-Vision modeling of UIs from Machine Conversations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0022-6512","authenticated-orcid":false,"given":"Yue","family":"Jiang","sequence":"first","affiliation":[{"name":"Aalto University, Espoo, Finland,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8951-2878","authenticated-orcid":false,"given":"Eldon","family":"Schoop","sequence":"additional","affiliation":[{"name":"Apple, Seattle, Washington, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0935-4745","authenticated-orcid":false,"given":"Amanda","family":"Swearngin","sequence":"additional","affiliation":[{"name":"Apple, Seattle, Washington, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6880-8546","authenticated-orcid":false,"given":"Jeffrey","family":"Nichols","sequence":"additional","affiliation":[{"name":"Apple Inc, San Diego, California, USA,"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,3,24]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Jean-Baptiste Alayrac Jeff Donahue Pauline Luc Antoine Miech Iain Barr Yana Hasson Karel Lenc Arthur Mensch Katie Millican Malcolm Reynolds Roman Ring Eliza Rutherford Serkan Cabi Tengda Han Zhitao Gong Sina Samangooei Marianne Monteiro Jacob Menick Sebastian Borgeaud Andrew Brock Aida Nematzadeh Sahand Sharifzadeh Mikolaj Binkowski Ricardo Barreira Oriol Vinyals Andrew Zisserman and Karen Simonyan. 2022. Flamingo: a Visual Language Model for Few-Shot Learning. arxiv:https:\/\/arXiv.org\/abs\/2204.14198\u00a0[cs.CV]"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474808"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/2380116.2380129"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3220134.3220135"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","unstructured":"M.\u00a0L. BERNARDI G.\u00a0A. DI\u00a0LUCCA and D. DISTANTE. 2009. RE-UWA approach to recover user centered conceptual models from Web applications. International Journal on Software Tools for Technology Transfer 11 6 (2009) 485\u2013501. 10.1007\/s10009-009-0126-1","DOI":"10.1007\/s10009-009-0126-1"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","unstructured":"Pavol Bielik Marc Fischer and Martin Vechev. 2018. Robust Relational Layout Synthesis from Examples for Android. Proc. ACM Program. Lang. 2 OOPSLA Article 156 (Oct. 2018) 29\u00a0pages. 10.1145\/3276526","DOI":"10.1145\/3276526"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445762"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/2047196.2047228"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","unstructured":"Chunyang Chen Sidong Feng Zhenchang Xing Linda Liu Shengdong Zhao and Jinshui Wang. 2019. Gallery D.C.: Design Search and Knowledge Discovery through Auto-Created GUI Component Gallery. Proc. ACM Hum.-Comput. Interact. 3 CSCW Article 180 (nov 2019) 22\u00a0pages. 10.1145\/3359282","DOI":"10.1145\/3359282"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180240"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Jieshan Chen Chunyang Chen Zhenchang Xing Xin Xia Liming Zhu John Grundy and Jinshui Wang. 2020. Wireframe-Based UI Design Search through Image Autoencoder. ACM Trans. Softw. Eng. Methodol. 29 3 Article 19 (jun 2020) 31\u00a0pages. 10.1145\/3391613","DOI":"10.1145\/3391613"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409691"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","unstructured":"Sen Chen Lingling Fan Chunyang Chen Minhui Xue Yang Liu and Lihua Xu. 2021. GUI-Squatting Attack: Automated Generation of Android Phishing Apps. IEEE Transactions on Dependable and Secure Computing 18 6 (2021) 2551\u20132568. 10.1109\/TDSC.2019.2956035","DOI":"10.1109\/TDSC.2019.2956035"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/AI4Mobile.2019.8672718"},{"key":"e_1_3_3_2_16_2","unstructured":"Xi Chen Xiao Wang Soravit Changpinyo AJ Piergiovanni Piotr Padlewski Daniel Salz Sebastian Goodman Adam Grycner Basil Mustafa Lucas Beyer Alexander Kolesnikov Joan Puigcerver Nan Ding Keran Rong Hassan Akbari Gaurav Mishra Linting Xue Ashish Thapliyal James Bradbury Weicheng Kuo Mojtaba Seyedhosseini Chao Jia Burcu\u00a0Karagol Ayan Carlos Riquelme Andreas Steiner Anelia Angelova Xiaohua Zhai Neil Houlsby and Radu Soricut. 2023. PaLI: A Jointly-Scaled Multilingual Language-Image Model. arxiv:https:\/\/arXiv.org\/abs\/2209.06794\u00a0[cs.CV]"},{"key":"e_1_3_3_2_17_2","unstructured":"Wei-Lin Chiang Zhuohan Li Zi Lin Ying Sheng Zhanghao Wu Hao Zhang Lianmin Zheng Siyuan Zhuang Yonghao Zhuang Joseph\u00a0E Gonzalez et\u00a0al. 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023) (2023)."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/108844.108850"},{"key":"e_1_3_3_2_19_2","unstructured":"Wenliang Dai Junnan Li Dongxu Li Anthony Meng\u00a0Huat Tiong Junqi Zhao Weisheng Wang Boyang Li Pascale Fung and Steven Hoi. 2023. InstructBLIP: Towards General-purpose Vision-Language Models with Instruction Tuning. arxiv:https:\/\/arXiv.org\/abs\/2305.06500\u00a0[cs.CV]"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472301.3484340"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126651"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","unstructured":"G.A. Di\u00a0Lucca P. Fasolino A.R.and\u00a0IGLINSKI and P. Tramontana. 2004. Reverse engineering Web applications: the WARE approach. SOFTWARE MAINTENANCE AND EVOLUTION: RESEARCH AND PRACTICE 12 (2004) 71\u2013101. 10.1002\/smr.281","DOI":"10.1002\/smr.281"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/1753326.1753554"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/1978942.1979086"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/2642918.2647412"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","unstructured":"W.\u00a0Keith Edwards Elizabeth\u00a0D. Mynatt and Kathryn Stockton. 1995. Access to Graphical Interfaces for Blind Users. Interactions 2 1 (jan 1995) 54\u201367. 10.1145\/208143.208161","DOI":"10.1145\/208143.208161"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511109"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376728"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174092"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300334"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.5143773"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3638191"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300643"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3636316"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3573805"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3504030"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/VL\/HCC60511.2024.00032"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445043"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376610"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642822"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.1145\/1141277.1141570"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/VLHCC.2018.8506516"},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/1357054.1357323"},{"key":"e_1_3_3_2_44_2","unstructured":"Gang Li and Yang Li. 2022. Spotlight: Mobile UI Understanding using Vision-Language Models with a Focus. ArXiv abs\/2209.14927 (2022). https:\/\/api.semanticscholar.org\/CorpusID:252595735"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025483"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501992"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_2_48_2","unstructured":"Haotian Liu Chunyuan Li Yuheng Li and Yong\u00a0Jae Lee. 2023. Improved baselines with visual instruction tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.03744 (2023)."},{"key":"e_1_3_3_2_49_2","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong\u00a0Jae Lee. 2023. Visual instruction tuning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.08485 (2023)."},{"key":"e_1_3_3_2_50_2","unstructured":"Zhe Liu Chunyang Chen Junjie Wang Mengzhuo Chen Boyu Wu Xing Che Dandan Wang and Qing Wang. 2023. Chatting with GPT-3 for Zero-Shot Human-Like Mobile Automated GUI Testing. arxiv:https:\/\/arXiv.org\/abs\/2305.09434\u00a0[cs.SE]"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"publisher","DOI":"10.5555\/1378337.1378350"},{"key":"e_1_3_3_2_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/WPC.1997.601265"},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/WCRE.1996.558844"},{"key":"e_1_3_3_2_54_2","unstructured":"Melody\u00a0Marie Moore James\u00a0D. Foley and Spencer Rugaber. 1998. User Interface Reengineering. Ph.D. Dissertation. USA. AAI9918460."},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"publisher","unstructured":"Kevin Moran Carlos Bernal-C\u00e1rdenas Michael Curcio Richard Bonett and Denys Poshyvanyk. 2020. Machine Learning-Based Prototyping of Graphical User Interfaces for Mobile Apps. IEEE Transactions on Software Engineering 46 2 (2020) 196\u2013221. 10.1109\/TSE.2018.2844788","DOI":"10.1109\/TSE.2018.2844788"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.32"},{"key":"e_1_3_3_2_57_2","unstructured":"OpenAI. [n.d.]. ChatGPT. https:\/\/openai.com\/blog\/chatgpt."},{"key":"e_1_3_3_2_58_2","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_2_59_2","unstructured":"Long Ouyang Jeff Wu Xu Jiang Diogo Almeida Carroll\u00a0L. Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray John Schulman Jacob Hilton Fraser Kelton Luke Miller Maddie Simens Amanda Askell Peter Welinder Paul Christiano Jan Leike and Ryan Lowe. 2022. Training language models to follow instructions with human feedback. arxiv:https:\/\/arXiv.org\/abs\/2203.02155\u00a0[cs.CL]"},{"key":"e_1_3_3_2_60_2","unstructured":"Baolin Peng Chunyuan Li Pengcheng He Michel Galley and Jianfeng Gao. 2023. Instruction tuning with gpt-4. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.03277 (2023)."},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/2047196.2047213"},{"key":"e_1_3_3_2_62_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"crossref","unstructured":"A. Sanchez\u00a0Ramon J. Sanchez\u00a0Cuadrado and J. Garcia\u00a0Molina. 2012. Model-driven reverse engineering of legacy graphical user interfaces. Automated Software Engineering 21 2 (2012) 147\u2013186. https:\/\/doi.org\/DOI: 10.1007\/s10515-013-0130-2","DOI":"10.1007\/s10515-013-0130-2"},{"key":"e_1_3_3_2_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517497"},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","unstructured":"E. Stroulia M. El-Ramly P. Iglinski and P. Sorenson. 2003. User Interface Reverse Engineering in Support of Interface Migration to the Web. Automated Software Engg. 10 3 (July 2003) 271\u2013301. 10.1023\/A:1024460315173","DOI":"10.1023\/A:1024460315173"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174078"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300305"},{"key":"e_1_3_3_2_68_2","unstructured":"Rohan Taori Ishaan Gulrajani Tianyi Zhang Yann Dubois Xuechen Li Carlos Guestrin Percy Liang and Tatsunori\u00a0B. Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_3_2_69_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580895"},{"key":"e_1_3_3_2_71_2","unstructured":"Hao Wen Yuanchun Li Guohong Liu Shanhui Zhao Tao Yu Toby Jia-Jun Li Shiqi Jiang Yunhao Liu Yaqin Zhang and Yunxin Liu. 2023. Empowering LLM to use Smartphone for Intelligent Task Automation. arxiv:https:\/\/arXiv.org\/abs\/2308.15272\u00a0[cs.AI]"},{"key":"e_1_3_3_2_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3293882.3330551"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474763"},{"key":"e_1_3_3_2_74_2","doi-asserted-by":"publisher","DOI":"10.1145\/1622176.1622213"},{"key":"e_1_3_3_2_75_2","unstructured":"Chen Yongxin Zhang Tonghui and Chen Jie. 2019. UI2code: How to fine-tune background and foreground analysis. Retrieved Feb 23 (2019) 2020."},{"key":"e_1_3_3_2_76_2","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303039"},{"key":"e_1_3_3_2_77_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445186"},{"key":"e_1_3_3_2_78_2","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng Siyuan Zhuang Zhanghao Wu Yonghao Zhuang Zi Lin Zhuohan Li Dacheng Li Eric Xing et\u00a0al. 2023. Judging LLM-as-a-judge with MT-Bench and Chatbot Arena. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.05685 (2023)."}],"event":{"name":"IUI '25: 30th International Conference on Intelligent User Interfaces","location":"Cagliari Italy","acronym":"IUI '25","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 30th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708359.3712129","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3708359.3712129","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:57:06Z","timestamp":1750298226000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3708359.3712129"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,24]]},"references-count":77,"alternative-id":["10.1145\/3708359.3712129","10.1145\/3708359"],"URL":"https:\/\/doi.org\/10.1145\/3708359.3712129","relation":{},"subject":[],"published":{"date-parts":[[2025,3,24]]},"assertion":[{"value":"2025-03-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}