{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:16:06Z","timestamp":1765307766394,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","funder":[{"name":"Zhejiang Leading Innovative and Entrepreneur Team Introduction Program","award":["2024R01007"],"award-info":[{"award-number":["2024R01007"]}]},{"name":"Digital Trust Centre Innovation Grant","award":["DTC-IGC-02"],"award-info":[{"award-number":["DTC-IGC-02"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755559","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:44:48Z","timestamp":1761371088000},"page":"6996-7005","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning to Be a Doctor: Searching for Effective Medical Agent Architectures"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5183-3934","authenticated-orcid":false,"given":"Yangyang","family":"Zhuang","sequence":"first","affiliation":[{"name":"AGI Lab, Westlake University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6067-1334","authenticated-orcid":false,"given":"Wenjia","family":"Jiang","sequence":"additional","affiliation":[{"name":"AGI Lab, Westlake University, Hangzhou, China and Henan University, Kaifeng, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9120-6340","authenticated-orcid":false,"given":"Jia-Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Affiliated Hospital of Xuzhou Medical University, Xuzhou, China and Xuzhou Medical University, Xuzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3786-8842","authenticated-orcid":false,"given":"Ze","family":"Yang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4675-7055","authenticated-orcid":false,"given":"Joey Tianyi","family":"Zhou","sequence":"additional","affiliation":[{"name":"IHPC, Agency for Science, Technology and Research, Singapore, Singapore and CFAR, Agency for Science, Technology and Research, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6344-2824","authenticated-orcid":false,"given":"Chi","family":"Zhang","sequence":"additional","affiliation":[{"name":"AGI Lab, Westlake University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","unstructured":"Anthropic. 2024. Claude 3.5 Sonnet. https:\/\/www.anthropic.com\/news\/3-5-models-and-computer-use"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3630104"},{"key":"e_1_3_2_1_4_1","unstructured":"Guangyao Chen Siwei Dong Yu Shu Ge Zhang Jaward Sesay B\u00f6rje F. Karlsson Jie Fu and Yemin Shi. 2024a. AutoAgents: A Framework for Automatic Agent Generation. arXiv:2309.17288 [cs.AI] https:\/\/arxiv.org\/abs\/2309.17288"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.381"},{"key":"e_1_3_2_1_6_1","unstructured":"Shuaihang Chen Yuanxing Liu Wei Han Weinan Zhang and Ting Liu. 2025. A Survey on LLM-based Multi-Agent System: Recent Advances and New Frontiers in Application. arXiv:2412.17481 [cs.CL] https:\/\/arxiv.org\/abs\/2412.17481"},{"key":"e_1_3_2_1_7_1","unstructured":"Weize Chen Yusheng Su Jingwei Zuo Cheng Yang Chenfei Yuan Chi-Min Chan Heyang Yu Yaxi Lu Yi-Hsin Hung Chen Qian Yujia Qin Xin Cong Ruobing Xie Zhiyuan Liu Maosong Sun and Jie Zhou. 2023. AgentVerse: Facilitating Multi-Agent Collaboration and Exploring Emergent Behaviors. arXiv:2308.10848 [cs.CL] https:\/\/arxiv.org\/abs\/2308.10848"},{"key":"e_1_3_2_1_8_1","volume-title":"Roberto Novoa, and James Zou.","author":"Daneshjou Roxana","year":"2023","unstructured":"Roxana Daneshjou, Mert Yuksekgonul, Zhuo Ran Cai, Roberto Novoa, and James Zou. 2023. SkinCon: A skin disease dataset densely annotated by domain experts for fine-grained model debugging and analysis. arXiv:2302.00785 [cs.CV] https:\/\/arxiv.org\/abs\/2302.00785"},{"key":"e_1_3_2_1_9_1","volume-title":"Automated machine learning: State-of-the-art and open challenges. arXiv preprint arXiv:1906.02287","author":"Elshawi Radwa","year":"2019","unstructured":"Radwa Elshawi, Mohamed Maher, and Sherif Sakr. 2019. Automated machine learning: State-of-the-art and open challenges. arXiv preprint arXiv:1906.02287 (2019)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3555634"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00201"},{"key":"e_1_3_2_1_12_1","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948 (2025)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-03097-1"},{"key":"e_1_3_2_1_14_1","volume-title":"Zijuan Lin, Liyang Zhou, Chenyu Ran, Lingfeng Xiao, Chenglin Wu, and J\u00fcrgen Schmidhuber.","author":"Hong Sirui","year":"2024","unstructured":"Sirui Hong, Mingchen Zhuge, Jiaqi Chen, Xiawu Zheng, Yuheng Cheng, Ceyao Zhang, Jinlin Wang, Zili Wang, Steven Ka Shing Yau, Zijuan Lin, Liyang Zhou, Chenyu Ran, Lingfeng Xiao, Chenglin Wu, and J\u00fcrgen Schmidhuber. 2024. MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework. arXiv:2308.00352 [cs.AI] https:\/\/arxiv.org\/abs\/2308.00352"},{"key":"e_1_3_2_1_15_1","unstructured":"Yue Hu Yuzhu Cai Yaxin Du Xinyu Zhu Xiangrui Liu Zijie Yu Yuchen Hou Shuo Tang and Siheng Chen. 2024. Self-Evolving Multi-Agent Collaboration Networks for Software Development. arXiv:2410.16946 [cs.SE] https:\/\/arxiv.org\/abs\/2410.16946"},{"key":"e_1_3_2_1_16_1","unstructured":"LangChain Inc. 2023. LangGraph. LangChain Inc. https:\/\/langchain-ai.github.io\/langgraph\/"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Dongfu Jiang Xiang Ren and Bill Yuchen Lin. 2023. LLM-Blender: Ensembling Large Language Models with Pairwise Ranking and Generative Fusion. arXiv:2306.02561 [cs.CL] https:\/\/arxiv.org\/abs\/2306.02561","DOI":"10.18653\/v1\/2023.acl-long.792"},{"key":"e_1_3_2_1_18_1","volume-title":"Joey Tianyi Zhou, and Chi Zhang","author":"Jiang Wenjia","year":"2025","unstructured":"Wenjia Jiang, Yangyang Zhuang, Chenxi Song, Xu Yang, Joey Tianyi Zhou, and Chi Zhang. 2025. Appagentx: Evolving gui agents as proficient smartphone users. arXiv preprint arXiv:2503.02268 (2025)."},{"key":"e_1_3_2_1_19_1","volume-title":"Xuhai Xu, Daniel McDuff, Hyeonhoon Lee, Marzyeh Ghassemi, Cynthia Breazeal, and Hae Won Park.","author":"Kim Yubin","year":"2024","unstructured":"Yubin Kim, Chanwoo Park, Hyewon Jeong, Yik Siu Chan, Xuhai Xu, Daniel McDuff, Hyeonhoon Lee, Marzyeh Ghassemi, Cynthia Breazeal, and Hae Won Park. 2024. MDAgents: An Adaptive Collaboration of LLMs for Medical Decision-Making. arXiv:2404.15155 [cs.CL] https:\/\/arxiv.org\/abs\/2404.15155"},{"key":"e_1_3_2_1_20_1","unstructured":"Knut Sveidqvist. 2024. Mermaid Documentation. https:\/\/mermaid.js.org\/. Accessed: 2025-07-30."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.510"},{"key":"e_1_3_2_1_22_1","volume-title":"Agent Hospital: A Simulacrum of Hospital with Evolvable Medical Agents. arXiv:2405.02957 [cs.AI] https:\/\/arxiv.org\/abs\/2405.02957","author":"Li Junkai","year":"2025","unstructured":"Junkai Li, Yunghwei Lai, Weitao Li, Jingyi Ren, Meng Zhang, Xinhui Kang, Siyu Wang, Peng Li, Ya-Qin Zhang, Weizhi Ma, and Yang Liu. 2025. Agent Hospital: A Simulacrum of Hospital with Evolvable Medical Agents. arXiv:2405.02957 [cs.AI] https:\/\/arxiv.org\/abs\/2405.02957"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.7759\/cureus.40895"},{"key":"e_1_3_2_1_24_1","volume-title":"Appagent v2: Advanced agent for flexible mobile interactions. arXiv preprint arXiv:2408.11824","author":"Li Yanda","year":"2024","unstructured":"Yanda Li, Chi Zhang, Wanqi Yang, Bin Fu, Pei Cheng, Xin Chen, Ling Chen, and Yunchao Wei. 2024b. Appagent v2: Advanced agent for flexible mobile interactions. arXiv preprint arXiv:2408.11824 (2024)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.992"},{"key":"e_1_3_2_1_26_1","unstructured":"Ben Mann Nick Ryder Melanie Subbiah J Kaplan P Dhariwal A Neelakantan P Shyam G Sastry A Askell S Agarwal et al. 2020. Language models are few-shot learners. arXiv preprint arXiv:2005.14165 Vol. 1 3 (2020) 3."},{"key":"e_1_3_2_1_27_1","volume-title":"CA, USA","author":"Menick Jacob","year":"2024","unstructured":"Jacob Menick, Kevin Lu, Shengjia Zhao, E Wallace, H Ren, H Hu, N Stathas, and F Petroski Such. 2024. GPT-4o mini: advancing cost-efficient intelligence. Open AI: San Francisco, CA, USA (2024)."},{"key":"e_1_3_2_1_28_1","unstructured":"Syed Ali Raza Naqvi. 2023. Augmented Skin Conditions Image Dataset. Kaggle. https:\/\/www.kaggle.com\/datasets\/syedalinaqvi\/augmented-skin-conditions-image-dataset"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"e_1_3_2_1_30_1","volume-title":"Pinecone Systems","author":"Inc.","year":"2023","unstructured":"Inc. Pinecone Systems. 2023. Pinecone: The Vector Database for AI Applications. Pinecone Systems, Inc. https:\/\/www.pinecone.io\/ A vector database designed for scalable and performant AI applications, enabling semantic search, metadata filtering, and real-time updates.."},{"key":"e_1_3_2_1_31_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs.CV] https:\/\/arxiv.org\/abs\/2103.00020"},{"key":"e_1_3_2_1_32_1","volume-title":"AgentClinic: a multimodal agent benchmark to evaluate AI in simulated clinical environments. ArXiv","author":"Schmidgall Samuel","year":"2024","unstructured":"Samuel Schmidgall, Rojin Ziaei, Carl Harris, Eduardo Reis, Jeffrey Jopling, and Michael Moor. 2024. AgentClinic: a multimodal agent benchmark to evaluate AI in simulated clinical environments. ArXiv, Vol. abs\/2405.07960 (2024). https:\/\/api.semanticscholar.org\/CorpusID:269757778"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Shinn Noah","year":"2023","unstructured":"Noah Shinn, Federico Cassano, Ashwin Gopinath, Karthik Narasimhan, and Shunyu Yao. 2023a. Reflexion: language agents with verbal reinforcement learning. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS '23). Curran Associates Inc., Red Hook, NY, USA, Article 377, 19 pages."},{"key":"e_1_3_2_1_34_1","volume-title":"Reflexion: Language agents with verbal reinforcement learning","author":"Shinn Noah","year":"2023","unstructured":"Noah Shinn, Federico Cassano, Beck Labash, Ashwin Gopinath, Karthik Narasimhan, and Shunyu Yao. 2023b. Reflexion: Language agents with verbal reinforcement learning, 2023. URL https:\/\/arxiv.org\/abs\/2303.11366, Vol. 1 (2023)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-023-06291-2"},{"key":"e_1_3_2_1_36_1","volume-title":"Nguyen","author":"Tran Khanh-Tung","year":"2025","unstructured":"Khanh-Tung Tran, Dung Dao, Minh-Duong Nguyen, Quoc-Viet Pham, Barry O'Sullivan, and Hoang D. Nguyen. 2025. Multi-Agent Collaboration Mechanisms: A Survey of LLMs. arXiv:2501.06322 [cs.AI] https:\/\/arxiv.org\/abs\/2501.06322"},{"key":"e_1_3_2_1_37_1","volume-title":"Rethinking the bounds of llm reasoning: Are multi-agent discussions the key? arXiv preprint arXiv:2402.18272","author":"Wang Qineng","year":"2024","unstructured":"Qineng Wang, Zihao Wang, Ying Su, Hanghang Tong, and Yangqiu Song. 2024a. Rethinking the bounds of llm reasoning: Are multi-agent discussions the key? arXiv preprint arXiv:2402.18272 (2024)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1038\/s44172-024-00271-8"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/3600270.3602070"},{"key":"e_1_3_2_1_40_1","volume-title":"A survey on neural architecture search. arXiv preprint arXiv:1905.01392","author":"Wistuba Martin","year":"2019","unstructured":"Martin Wistuba, Ambrish Rawat, and Tejaswini Pedapati. 2019. A survey on neural architecture search. arXiv preprint arXiv:1905.01392 (2019)."},{"volume-title":"Workshop, Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra Sasha Luccioni, Fran\u00e7ois Yvon, et al., 2022","year":"2022","key":"e_1_3_2_1_41_1","unstructured":"BigScience Workshop, Teven Le Scao, Angela Fan, Christopher Akiki, Ellie Pavlick, Suzana Ili\u0107, Daniel Hesslow, Roman Castagn\u00e9, Alexandra Sasha Luccioni, Fran\u00e7ois Yvon, et al., 2022. Bloom: A 176b-parameter open-access multilingual language model. arXiv preprint arXiv:2211.05100 (2022)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00930"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713600"},{"key":"e_1_3_2_1_44_1","unstructured":"Jiayi Zhang Jinyu Xiang Zhaoyang Yu Fengwei Teng Xionghui Chen Jiaqi Chen Mingchen Zhuge Xin Cheng Sirui Hong Jinlin Wang Bingnan Zheng Bang Liu Yuyu Luo and Chenglin Wu. 2025a. AFlow: Automating Agentic Workflow Generation. arXiv:2410.10762 [cs.AI] https:\/\/arxiv.org\/abs\/2410.10762"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-50043-3"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755559","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:13:27Z","timestamp":1765307607000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755559"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":45,"alternative-id":["10.1145\/3746027.3755559","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755559","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}