{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T17:06:48Z","timestamp":1776100008036,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706598.3713356","type":"proceedings-article","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T03:33:32Z","timestamp":1745465612000},"page":"1-24","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["From Operation to Cognition: Automatic Modeling Cognitive Dependencies from User Demonstrations for GUI Task Automation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0620-7902","authenticated-orcid":false,"given":"Yiwen","family":"Yin","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3126-2974","authenticated-orcid":false,"given":"Yu","family":"Mei","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2591-7993","authenticated-orcid":false,"given":"Chun","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7902-7625","authenticated-orcid":false,"given":"Toby Jia-Jun","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6975-1310","authenticated-orcid":false,"given":"Aamir Khan","family":"Jadoon","sequence":"additional","affiliation":[{"name":"Department of Computer science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1723-0613","authenticated-orcid":false,"given":"Sixiang","family":"Cheng","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1351-9034","authenticated-orcid":false,"given":"Weinan","family":"Shi","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8576-7882","authenticated-orcid":false,"given":"Mohan","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2273-6927","authenticated-orcid":false,"given":"Yuanchun","family":"Shi","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China and Qinghai University, Xining, Qinghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_3_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICAEA60387.2023.10414471"},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474808"},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/2983990.2984020"},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"crossref","unstructured":"Maxime B\u00e9dard Abderrahmane Leshob Imen Benzarti Hafedh Mili Raqeebir Rab and Omar Hussain. 2024. A rule-based method to effectively adopt robotic process automation. Journal of Software: Evolution and Process 36 11 (2024) e2709.","DOI":"10.1002\/smr.2709"},{"key":"e_1_3_3_3_6_2","volume-title":"Taxonomy of educational objectives: The classification of educational goals. Handbook 1: Cognitive domain","author":"Bloom Benjamin\u00a0S","year":"1956","unstructured":"Benjamin\u00a0S Bloom, Max\u00a0D Engelhart, Edward\u00a0J Furst, Walker\u00a0H Hill, and David\u00a0R Krathwohl. 1956. Taxonomy of educational objectives: The classification of educational goals. Handbook 1: Cognitive domain. McKay New York."},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"crossref","unstructured":"Marc\u00a0H Bornstein. 1986. Frames of Mind: The theory of multiple intelligences.","DOI":"10.2307\/3332707"},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3242587.3242661"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/1378773.1378794"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3453483.3454047"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606720"},{"key":"e_1_3_3_3_12_2","unstructured":"Kanzhi Cheng Qiushi Sun Yougang Chu Fangzhi Xu Yantao Li Jianbing Zhang and Zhiyong Wu. 2024. Seeclick: Harnessing gui grounding for advanced visual gui agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.10935 (2024)."},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/108844.108850"},{"key":"e_1_3_3_3_14_2","volume-title":"Watch what I do: programming by demonstration","author":"Cypher Allen","year":"1993","unstructured":"Allen Cypher and Daniel\u00a0Conrad Halbert. 1993. Watch what I do: programming by demonstration. MIT press."},{"key":"e_1_3_3_3_15_2","unstructured":"Xiang Deng Yu Gu Boyuan Zheng Shijie Chen Sam Stevens Boshi Wang Huan Sun and Yu Su. 2024. Mind2web: Towards a generalist agent for the web. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511109"},{"key":"e_1_3_3_3_17_2","unstructured":"Jerry Fodor. 1975. The language of thought."},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1093\/0198236360.001.0001"},{"key":"e_1_3_3_3_19_2","volume-title":"An introduction to qualitative research","author":"Hancock Beverley","year":"2001","unstructured":"Beverley Hancock, Elizabeth Ockleford, and Kate Windridge. 2001. An introduction to qualitative research. Trent focus group London."},{"key":"e_1_3_3_3_20_2","unstructured":"Sandra\u00a0G Hart. 1986. NASA task load index (TLX). (1986)."},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01354"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3170427.3188532"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"crossref","unstructured":"Thanapong Intharah Daniyar Turmukhambetov and Gabriel\u00a0J Brostow. 2019. Hilc: domain-independent pbd system via computer vision and follow-up questions. ACM Transactions on Interactive Intelligent Systems (TiiS) 9 2-3 (2019) 1\u201327.","DOI":"10.1145\/3234508"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"crossref","unstructured":"Ziwei Ji Nayeon Lee Rita Frieske Tiezheng Yu Dan Su Yan Xu Etsuko Ishii Ye\u00a0Jin Bang Andrea Madotto and Pascale Fung. 2023. Survey of hallucination in natural language generation. Comput. Surveys 55 12 (2023) 1\u201338.","DOI":"10.1145\/3571730"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3671620"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"crossref","unstructured":"Tessa Lau. 2009. Why programming-by-demonstration systems fail: Lessons learned for usable ai. AI Magazine 30 4 (2009) 65\u201365.","DOI":"10.1609\/aimag.v30i4.2262"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/1866029.1866067"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"crossref","unstructured":"Tessa Lau Steven\u00a0A Wolfman Pedro Domingos and Daniel\u00a0S Weld. 2003. Programming by demonstration using version space algebra. Machine Learning 53 (2003) 111\u2013156.","DOI":"10.1023\/A:1025671410623"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3641915"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025483"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/VLHCC.2018.8506506"},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445049"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3332165.3347899"},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3210240.3210339"},{"key":"e_1_3_3_3_35_2","unstructured":"Yang Li Jiacong He Xin Zhou Yuan Zhang and Jason Baldridge. 2020. Mapping natural language instructions to mobile UI action sequences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.03776 (2020)."},{"key":"e_1_3_3_3_36_2","volume-title":"Your wish is my command: Programming by example","author":"Lieberman Henry","year":"2001","unstructured":"Henry Lieberman. 2001. Your wish is my command: Programming by example. Morgan Kaufmann."},{"key":"e_1_3_3_3_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/1502650.1502667"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/1240624.1240767"},{"key":"e_1_3_3_3_39_2","volume-title":"The new taxonomy of educational objectives","author":"Marzano Robert\u00a0J","year":"2006","unstructured":"Robert\u00a0J Marzano and John\u00a0S Kendall. 2006. The new taxonomy of educational objectives. Corwin Press."},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"crossref","unstructured":"Benjamin Matthies. 2020. Assessing the automation potentials of management reporting processes. International Journal of Digital Accounting Research 20 (2020) 75\u2013101.","DOI":"10.4192\/1577-8517-v20_4"},{"key":"e_1_3_3_3_41_2","unstructured":"Microsoft. 2021. Hooks. https:\/\/learn.microsoft.com\/en-us\/windows\/win32\/winmsg\/hooks Accessed: 2024-04."},{"key":"e_1_3_3_3_42_2","unstructured":"Microsoft. 2021. IUIAutomation interface. https:\/\/learn.microsoft.com\/en-us\/windows\/win32\/api\/uiautomationclient\/nn-uiautomationclient-iuiautomation Accessed: 2024-04."},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517459"},{"key":"e_1_3_3_3_44_2","unstructured":"Blue Prism. 2023. SS&C Blue Prism. https:\/\/www.blueprism.com\/ Accessed: 2024-04."},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545691"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606822"},{"key":"e_1_3_3_3_47_2","unstructured":"Christopher Rawles Alice Li Daniel Rodriguez Oriana Riva and Timothy Lillicrap. 2023. Android in the wild: A large-scale dataset for android device control. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.10088 (2023)."},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/2700648.2811322"},{"key":"e_1_3_3_3_49_2","unstructured":"Keita Saito Akifumi Wachi Koki Wataoka and Youhei Akimoto. 2023. Verbosity bias in preference labeling by large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.10076 (2023)."},{"key":"e_1_3_3_3_50_2","unstructured":"Sivan Schwartz Avi Yaeli and Segev Shlomov. 2023. Enhancing trust in LLM-based AI automation agents: New considerations and future challenges. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.05391 (2023)."},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377515"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1016\/B0-08-044854-2\/01056-7"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/237091.237118"},{"key":"e_1_3_3_3_54_2","unstructured":"UIPath. 2023. UIPath. https:\/\/www.uipath.com\/ Accessed: 2024-04."},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580895"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/2556288.2557407"},{"key":"e_1_3_3_3_57_2","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837."},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/1622176.1622213"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445186"},{"key":"e_1_3_3_3_60_2","unstructured":"Zirui Zhao Wee\u00a0Sun Lee and David Hsu. 2024. Large language models as commonsense knowledge for large-scale task planning. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_3_61_2","unstructured":"Boyuan Zheng Boyu Gou Jihyung Kil Huan Sun and Yu Su. 2024. Gpt-4v (ision) is a generalist web agent if grounded. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.01614 (2024)."},{"key":"e_1_3_3_3_62_2","unstructured":"Shuyan Zhou Frank\u00a0F Xu Hao Zhu Xuhui Zhou Robert Lo Abishek Sridhar Xianyi Cheng Tianyue Ou Yonatan Bisk Daniel Fried et\u00a0al. 2023. Webarena: A realistic web environment for building autonomous agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.13854 (2023)."}],"event":{"name":"CHI 2025: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713356","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3713356","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T05:00:26Z","timestamp":1751605226000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3713356"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":61,"alternative-id":["10.1145\/3706598.3713356","10.1145\/3706598"],"URL":"https:\/\/doi.org\/10.1145\/3706598.3713356","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}