{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:53:59Z","timestamp":1772906039674,"version":"3.50.1"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032051134","type":"print"},{"value":"9783032051141","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T00:00:00Z","timestamp":1758412800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,21]],"date-time":"2025-09-21T00:00:00Z","timestamp":1758412800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-05114-1_14","type":"book-chapter","created":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T14:10:18Z","timestamp":1758377418000},"page":"139-148","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["CSAP-Assist: Instrument-Agent Dialogue Empowered Vision-Language Models for\u00a0Collaborative Surgical Action Planning"],"prefix":"10.1007","author":[{"given":"Jie","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Mengya","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Yiwei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Dou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,9,21]]},"reference":[{"issue":"1","key":"14_CR1","doi-asserted-by":"publisher","first-page":"6676","DOI":"10.1038\/s41467-023-42451-8","volume":"14","author":"J Cao","year":"2023","unstructured":"Cao, J., et al.: Intelligent surgical workflow recognition for endoscopic submucosal dissection with real-time animal study. Nat. Commun. 14(1), 6676 (2023)","journal-title":"Nat. Commun."},{"key":"14_CR2","unstructured":"Chen, Y., et al.: Egoplan-bench: benchmarking multimodal large language models for human-level planning. arXiv preprint arXiv:2312.06722 (2023)"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Y., Arkin, J., Zhang, Y., Roy, N., Fan, C.: Scalable multi-robot collaboration with large language models: Centralized or decentralized systems? In: 2024 IEEE International Conference on Robotics and Automation (ICRA), pp. 4311\u20134317. IEEE (2024)","DOI":"10.1109\/ICRA57147.2024.10610676"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Fu, J., Long, Y., Chen, K., Wei, W., Dou, Q.: Multi-objective cross-task learning via goal-conditioned gpt-based decision transformers for surgical robot task automation. arXiv preprint arXiv:2405.18757 (2024)","DOI":"10.1109\/ICRA57147.2024.10611051"},{"key":"14_CR5","unstructured":"Huang, D., Hilliges, O., Van\u00a0Gool, L., Wang, X.: Palm: predicting actions through language models @ ego4d long-term action anticipation challenge. arXiv preprint arXiv:2306.16545 (2023)"},{"key":"14_CR6","doi-asserted-by":"publisher","first-page":"106452","DOI":"10.1016\/j.cmpb.2021.106452","volume":"212","author":"A Huaulm\u00e9","year":"2021","unstructured":"Huaulm\u00e9, A., et al.: Micro-surgical anastomose workflow recognition challenge report. Comput. Methods Programs Biomed. 212, 106452 (2021)","journal-title":"Comput. Methods Programs Biomed."},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Islam, M.M., et al.: Propose, assess, search: harnessing llms for goal-oriented planning in instructional videos. arXiv preprint arXiv:2409.20557 (2024)","DOI":"10.1007\/978-3-031-72655-2_25"},{"key":"14_CR8","unstructured":"Khan, A., et al.: Debating with more persuasive llms leads to more truthful answers. arXiv preprint arXiv:2402.06782 (2024)"},{"key":"14_CR9","doi-asserted-by":"crossref","unstructured":"Kim, S., Huang, D., Xian, Y., Hilliges, O., Van\u00a0Gool, L., Wang, X.: Palm: predicting actions through language models. In: European Conference on Computer Vision, pp. 140\u2013158. Springer (2024)","DOI":"10.1007\/978-3-031-73007-8_9"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Long, Y., Li, X., Cai, W., Dong, H.: Discuss before moving: visual language navigation via multi-expert discussions. In: 2024 IEEE International Conference on Robotics and Automation (ICRA), pp. 17380\u201317387. IEEE (2024)","DOI":"10.1109\/ICRA57147.2024.10611565"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Mandi, Z., Jain, S., Song, S.: Roco: Dialectic multi-robot collaboration with large language models. In: 2024 IEEE International Conference on Robotics and Automation (ICRA), pp. 286\u2013299. IEEE (2024)","DOI":"10.1109\/ICRA57147.2024.10610855"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Mutegeki, R., Han, D.S.: A cnn-lstm approach to human activity recognition. In: 2020 International Conference on Artificial Intelligence in Information and Communication (ICAIIC), pp. 362\u2013366. IEEE (2020)","DOI":"10.1109\/ICAIIC48513.2020.9065078"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Patel, D., Eghbalzadeh, H., Kamra, N., Iuzzolino, M.L., Jain, U., Desai, R.: Pretrained language models as visual planners for human assistance. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15302\u201315314 (2023)","DOI":"10.1109\/ICCV51070.2023.01404"},{"key":"14_CR14","unstructured":"Psychogyios, D., et\u00a0al.: Sar-rarp50: segmentation of surgical instrumentation and action recognition on robot-assisted radical prostatectomy challenge. arXiv preprint arXiv:2401.00496 (2023)"},{"key":"14_CR15","unstructured":"Xie, J., et al.: Revealing the barriers of language agents in planning. arXiv preprint arXiv:2410.12409 (2024)"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Zhang, C., et\u00a0al.: Proagent: building proactive cooperative agents with large language models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 17591\u201317599 (2024)","DOI":"10.1609\/aaai.v38i16.29710"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Zhang, J., et al.: Leveraging surgical activity grammar for primary intention prediction in laparoscopy procedures. arXiv preprint arXiv:2409.19579 (2024)","DOI":"10.1109\/ICRA55743.2025.11127338"},{"key":"14_CR18","unstructured":"Zhao, Q., et al.: Antgpt: can large language models help long-term action anticipation from videos? arXiv preprint arXiv:2307.16368 (2023)"},{"key":"14_CR19","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Fang, F., Yang, X., Xu, Q., Guan, C., Zhou, S.K.: See, predict, plan: Diffusion for procedure planning in robotic surgical videos. In: International Conference on Medical Image Computing and Computer-Assisted Intervention. pp. 553\u2013563. Springer (2024)","DOI":"10.1007\/978-3-031-72089-5_52"},{"key":"14_CR20","first-page":"28611","volume":"36","author":"Z Zhou","year":"2023","unstructured":"Zhou, Z., Alabi, O., Wei, M., Vercauteren, T., Shi, M.: Text promptable surgical instrument segmentation with vision-language models. Adv. Neural. Inf. Process. Syst. 36, 28611\u201328623 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2025"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-05114-1_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,20]],"date-time":"2025-09-20T14:10:25Z","timestamp":1758377425000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-05114-1_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,21]]},"ISBN":["9783032051134","9783032051141"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-05114-1_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,21]]},"assertion":[{"value":"21 September 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}