{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:12:21Z","timestamp":1777655541751,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,19]],"date-time":"2023-04-19T00:00:00Z","timestamp":1681862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,19]]},"DOI":"10.1145\/3544548.3581158","type":"proceedings-article","created":{"date-parts":[[2023,4,20]],"date-time":"2023-04-20T04:26:08Z","timestamp":1681964768000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":47,"title":["WebUI: A Dataset for Enhancing Visual UI Understanding with Web Semantics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5101-0557","authenticated-orcid":false,"given":"Jason","family":"Wu","sequence":"first","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0593-0667","authenticated-orcid":false,"given":"Siyan","family":"Wang","sequence":"additional","affiliation":[{"name":"Wellesley College, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0983-5202","authenticated-orcid":false,"given":"Siman","family":"Shen","sequence":"additional","affiliation":[{"name":"Grinnell College, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6335-5904","authenticated-orcid":false,"given":"Yi-Hao","family":"Peng","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6880-8546","authenticated-orcid":false,"given":"Jeffrey","family":"Nichols","sequence":"additional","affiliation":[{"name":"Snooty Bird LLC, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2072-0625","authenticated-orcid":false,"given":"Jeffrey P","family":"Bigham","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,4,19]]},"reference":[{"key":"e_1_3_3_2_1_1","unstructured":"2022. AutoIt Function PixelSearch. https:\/\/www.autoitscript.com\/autoit3\/docs\/functions\/PixelSearch.htm."},{"key":"e_1_3_3_2_2_1","unstructured":"2022. Chrome DevTools engineering blog Full Accessibility Tree in Chrome DevTools. https:\/\/developer.chrome.com\/blog\/full-accessibility-tree\/. Accessed: 2022-09-15."},{"key":"e_1_3_3_2_3_1","unstructured":"2022. Puppeteer - Chrome. https:\/\/developer.chrome.com\/docs\/puppeteer\/. Accessed: 2022-09-15."},{"key":"e_1_3_3_2_4_1","unstructured":"2022. What is the ideal screen size for responsive design?https:\/\/www.browserstack.com\/guide\/ideal-screen-sizes-for-responsive-design. Accessed: 2022-09-15."},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2022.3192279"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445762"},{"key":"e_1_3_3_2_7_1","unstructured":"Andrea Burns Deniz Arsan Sanjna Agrawal Ranjitha Kumar Kate Saenko and Bryan\u00a0A Plummer. 2022. Interactive Mobile App Navigation with Uncertain or Under-specified Natural Language Commands. arXiv preprint arXiv:2202.02312(2022)."},{"key":"e_1_3_3_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2047196.2047228"},{"key":"e_1_3_3_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2009.2015974"},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.953"},{"key":"e_1_3_3_2_11_1","volume-title":"Towards Complete Icon Labeling in Mobile Applications. In CHI Conference on Human Factors in Computing Systems. 1\u201314","author":"Chen Jieshan","year":"2022","unstructured":"Jieshan Chen, Amanda Swearngin, Jason Wu, Titus Barik, Jeffrey Nichols, and Xiaoyi Zhang. 2022. Towards Complete Icon Labeling in Mobile Applications. In CHI Conference on Human Factors in Computing Systems. 1\u201314."},{"key":"e_1_3_3_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3409691"},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00091"},{"key":"e_1_3_3_2_14_1","volume-title":"Artificial Intelligence for Human Computer Interaction: A Modern Approach","author":"Deka Biplab","unstructured":"Biplab Deka, Bardia Doosti, Forrest Huang, Chad Franzen, Joshua Hibschman, Daniel Afergan, Yang Li, Ranjitha Kumar, Tao Dong, and Jeffrey P Nichols. 2021. An Early Rico Retrospective: Three Years of Uses for a Mobile App Dataset. In Artificial Intelligence for Human Computer Interaction: A Modern Approach. Springer, 229\u2013256."},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126651"},{"key":"e_1_3_3_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2984511.2984581"},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1753326.1753554"},{"key":"e_1_3_3_2_19_1","volume-title":"Understanding how image quality affects deep neural networks. In 2016 eighth international conference on quality of multimedia experience (QoMEX)","author":"Dodge Samuel","unstructured":"Samuel Dodge and Lina Karam. 2016. Understanding how image quality affects deep neural networks. In 2016 eighth international conference on quality of multimedia experience (QoMEX). IEEE, 1\u20136."},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511109"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502143"},{"key":"e_1_3_3_2_22_1","volume-title":"Domain-adversarial training of neural networks. The journal of machine learning research 17, 1","author":"Ganin Yaroslav","year":"2016","unstructured":"Yaroslav Ganin, Evgeniya Ustinova, Hana Ajakan, Pascal Germain, Hugo Larochelle, Fran\u00e7ois Laviolette, Mario Marchand, and Victor Lempitsky. 2016. Domain-adversarial training of neural networks. The journal of machine learning research 17, 1 (2016), 2096\u20132030."},{"key":"e_1_3_3_2_23_1","unstructured":"Leo Gao Stella Biderman Sid Black Laurence Golding Travis Hoppe Charles Foster Jason Phang Horace He Anish Thite Noa Nabeshima 2020. The pile: An 800gb dataset of diverse text for language modeling. arXiv preprint arXiv:2101.00027(2020)."},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.100"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_2_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i7.16741"},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_39"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2470654.2466420"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406324.3410710"},{"key":"e_1_3_3_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502042"},{"key":"e_1_3_3_2_31_1","volume-title":"Proceedings of the 2017 CHI conference on human factors in computing systems. 6038\u20136049","author":"Jia-Jun Li Toby","year":"2017","unstructured":"Toby Jia-Jun Li, Amos Azaria, and Brad\u00a0A Myers. 2017. SUGILITE: creating multimodal smartphone automation by demonstration. In Proceedings of the 2017 CHI conference on human factors in computing systems. 6038\u20136049."},{"key":"e_1_3_3_2_32_1","unstructured":"Yang Li Jiacong He Xin Zhou Yuan Zhang and Jason Baldridge. 2020. Mapping natural language instructions to mobile UI action sequences. arXiv preprint arXiv:2005.03776(2020)."},{"key":"e_1_3_3_2_33_1","volume-title":"2017 IEEE\/ACM 39th International Conference on Software Engineering Companion (ICSE-C). IEEE, 23\u201326","author":"Li Yuanchun","year":"2017","unstructured":"Yuanchun Li, Ziyue Yang, Yao Guo, and Xiangqun Chen. 2017. Droidbot: a lightweight ui-guided test input generator for android. In 2017 IEEE\/ACM 39th International Conference on Software Engineering Companion (ICSE-C). IEEE, 23\u201326."},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2019.00104"},{"key":"e_1_3_3_2_35_1","volume-title":"Multibench: Multiscale benchmarks for multimodal representation learning. arXiv preprint arXiv:2107.07502(2021).","author":"Liang Paul\u00a0Pu","year":"2021","unstructured":"Paul\u00a0Pu Liang, Yiwei Lyu, Xiang Fan, Zetian Wu, Yun Cheng, Jason Wu, Leslie Chen, Peter Wu, Michelle\u00a0A Lee, Yuke Zhu, 2021. Multibench: Multiscale benchmarks for multimodal representation learning. arXiv preprint arXiv:2107.07502(2021)."},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242650"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3191513"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2018.2844788"},{"key":"e_1_3_3_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3180155.3180246"},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"e_1_3_3_2_43_1","unstructured":"Richard\u00a0S. Schwerdtfeger. 1991. Making the GUI Talk.ftp:\/\/service.boulder.ibm.com\/sns\/sr-os2\/sr2doc\/guitalk.txt."},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397482.3450725"},{"key":"e_1_3_3_2_45_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014)."},{"key":"e_1_3_3_2_46_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1 (2014), 1929\u20131958."},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300305"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00972"},{"key":"e_1_3_3_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474765"},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474763"},{"key":"e_1_3_3_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417940"},{"key":"e_1_3_3_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"e_1_3_3_2_53_1","unstructured":"I\u00a0Zeki Yalniz Herv\u00e9 J\u00e9gou Kan Chen Manohar Paluri and Dhruv Mahajan. 2019. Billion-scale semi-supervised learning for image classification. arXiv preprint arXiv:1905.00546(2019)."},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/1622176.1622213"},{"key":"e_1_3_3_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445186"}],"event":{"name":"CHI '23: CHI Conference on Human Factors in Computing Systems","location":"Hamburg Germany","acronym":"CHI '23","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2023 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581158","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3544548.3581158","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:46Z","timestamp":1750178806000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3544548.3581158"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,19]]},"references-count":55,"alternative-id":["10.1145\/3544548.3581158","10.1145\/3544548"],"URL":"https:\/\/doi.org\/10.1145\/3544548.3581158","relation":{},"subject":[],"published":{"date-parts":[[2023,4,19]]},"assertion":[{"value":"2023-04-19","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}