{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T19:54:47Z","timestamp":1775073287416,"version":"3.50.1"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032080080","type":"print"},{"value":"9783032080097","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T00:00:00Z","timestamp":1760227200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-08009-7_20","type":"book-chapter","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T04:37:14Z","timestamp":1760157434000},"page":"201-211","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Estimating 2D Keypoints of\u00a0Surgical Tools Using Vision-Language Models with\u00a0Low-Rank Adaptation"],"prefix":"10.1007","author":[{"given":"Krit","family":"Duangprom","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tryphon","family":"Lambrou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Binod","family":"Bhattarai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Wu, Z., et al.: SurgPose: A Dataset for Articulated Robotic Surgical Tool Pose Estimation and Tracking. arXiv preprint arXiv:2502.11534 (2025)","DOI":"10.1109\/ICRA55743.2025.11127958"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Hamza, H., Shabir, D., Aboumarzouk, O., Al-Ansari, A., Shaban, K., Navkar, N.V.: Automated skills assessment in open surgery: a scoping review. Eng. Appl. Artif. Intell. 153, 110893. Elsevier (2025)","DOI":"10.1016\/j.engappai.2025.110893"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Bkheet, E., D\u2019Angelo, A.-L., Goldbraikh, A., Laufer, S.: Using hand pose estimation to automate open surgery training feedback. Int. J. Comput. Assist. Radiol. Surgery 18(7), 1279\u20131285. Springer (2023)","DOI":"10.1007\/s11548-023-02947-6"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Hein, J., et al.: Next-generation surgical navigation: marker-less multi-view 6dof pose estimation of surgical instruments. Med. Image Anal. 103613. Elsevier (2025)","DOI":"10.1016\/j.media.2025.103613"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Spektor, R., Friedman, T., Or, I., Bolotin, G., Laufer, S.: Monocular Pose Estimation of Articulated Surgical Instruments in Open Surgery. arXiv preprint arXiv:2407.12138 (2024)","DOI":"10.1016\/j.media.2025.103618"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Maji, D., Nagori, S., Mathew, M., Poddar, D.: YOLO-Pose: Enhancing YOLO for multi-person pose estimation using object keypoint similarity loss. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2637\u20132646 (2022)","DOI":"10.1109\/CVPRW56347.2022.00297"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Aboukhadra, A.T., Robertini, N., Malik, J., Elhayek, A., Reis, G., Stricker, D.: SurgeoNet: realtime 3D pose estimation of articulated surgical instruments from stereo images using a synthetically-trained network. In: DAGM German Conference on Pattern Recognition, pp. 199\u2013211. Springer (2024)","DOI":"10.1007\/978-3-031-85181-0_13"},{"key":"20_CR9","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"20_CR10","unstructured":"Bai, S., et al.: Qwen2.5-VL Technical Report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Subramanian, S., Ng, E., M\u00fcller, L., Klein, D., Ginosar, S., Darrell, T.: Pose priors from language models. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp. 7125\u20137135 (2025)","DOI":"10.1109\/CVPR52734.2025.00668"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Huang, J., Limberg, C., Arshad, S.M.N., Zhang, Q., Li, Q.: Combining VLM and LLM for enhanced semantic object perception in robotic handover tasks. In: 2024 WRC Symposium on Advanced Robotics and Automation (WRC SARA), pp. 135\u2013140. IEEE (2024)","DOI":"10.1109\/WRCSARA64167.2024.10685688"},{"key":"20_CR13","unstructured":"Gong, B., et al.: ZeroKey: Point-Level Reasoning and Zero-Shot 3D Keypoint Detection from Large Language Models. arXiv preprint arXiv:2412.06292 (2024)"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Lin, X., et al.: CLIPose: category-level object pose estimation with pre-trained vision-language knowledge. IEEE Transactions on Circuits and Systems for Video Technology (2024)","DOI":"10.1109\/TCSVT.2024.3397997"},{"key":"20_CR15","unstructured":"Yang, J., et al.: KPT-LLM: Unveiling the Power of Large Language Model for Keypoint Comprehension. arXiv preprint arXiv:2411.01846 (2024)"},{"key":"20_CR16","unstructured":"Kim, J., Chung, H., Kim, B.H. CapE-LLM Support-Free Category-Agnostic Pose Estimation with Multimodal Large Language Models. arXiv preprint arXiv:2411.06869 (2024)"},{"key":"20_CR17","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations (ICLR) (2022)"},{"key":"20_CR18","unstructured":"Wu, Z., et al.: DeepSeek-VL2: Mixture-of-Experts Vision-Language Models for Advanced Multimodal Understanding. arXiv preprint arXiv:2412.10302 (2024)"},{"key":"20_CR19","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.Ultralytics YOLOv8, Version 8.0.0. (2023)"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Zhao, Y., et al.: SWIFT: a scalable lightweight infrastructure for fine-tuning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a039, no.\u00a028, pp.\u00a029733\u201329735 (2025)","DOI":"10.1609\/aaai.v39i28.35383"},{"key":"20_CR21","unstructured":"Dosovitskiy, A., et al.:An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale.arXiv preprint arXiv:2010.11929 (2020)"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Wang, R., Ktistakis, S., Zhang, S., Meboldt, M., Lohmeyer, Q.: POV-surgery: a dataset for egocentric hand and tool pose estimation during surgical activities. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 440\u2013450. Springer (2023)","DOI":"10.1007\/978-3-031-43996-4_42"},{"key":"20_CR23","unstructured":"Dong, H., Chharia, A., Gou, W., Vicente Carrasco, F., De la Torre, F.D.: HAMBA: single-view 3d hand reconstruction with graph-guided bi-scanning mamba. In: Advances in Neural Information Processing Systems, vol. 37, pp. 2127\u20132160 (2024)"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Qi, H., Zhao, C., Salzmann, M., Mathis, A.: HOISDF: constraining 3D hand-object pose estimation with global signed distance fields. In: 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10392\u201310402. IEEE (2024)","DOI":"10.1109\/CVPR52733.2024.00989"},{"key":"20_CR25","unstructured":"Shrestha, P., Amgain, S., Khanal, B., Linte, C. A., Bhattarai, B.: Medical vision language pretraining: a survey (2023). arXiv preprint arXiv:2312.06224"}],"container-title":["Lecture Notes in Computer Science","Data Engineering in Medical Imaging"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-08009-7_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T04:37:23Z","timestamp":1760157443000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-08009-7_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"ISBN":["9783032080080","9783032080097"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-08009-7_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"12 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DEMI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"MICCAI Workshop on Data Engineering in Medical Imaging","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Daejeon","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"demi2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}