{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T04:51:45Z","timestamp":1774500705191,"version":"3.50.1"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Research Grants Council of Hong Kong Special Administrative Region, China","award":["T45-401\/22-N"],"award-info":[{"award-number":["T45-401\/22-N"]}]},{"name":"Research Grants Council of Hong Kong Special Administrative Region, China","award":["C4036-22G"],"award-info":[{"award-number":["C4036-22G"]}]},{"DOI":"10.13039\/501100006730","name":"Ministry of Education Tier 1 grant, NUS, Singapore","doi-asserted-by":"publisher","award":["24-1250-P0001"],"award-info":[{"award-number":["24-1250-P0001"]}],"id":[{"id":"10.13039\/501100006730","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Med. Imaging"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1109\/tmi.2025.3590457","type":"journal-article","created":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T17:45:44Z","timestamp":1752860744000},"page":"5278-5289","source":"Crossref","is-referenced-by-count":4,"title":["Instrument-Tissue-Guided Surgical Action Triplet Detection via Textual-Temporal Trail Exploration"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2630-2838","authenticated-orcid":false,"given":"Jialun","family":"Pei","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong, SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2278-4356","authenticated-orcid":false,"given":"Jiaan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Department of Biomedical Engineering, National University of Singapore, Queenstown, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8414-9870","authenticated-orcid":false,"given":"Guanyi","family":"Qin","sequence":"additional","affiliation":[{"name":"Department of Biomedical Engineering, National University of Singapore, Queenstown, Singapore"}]},{"given":"Kai","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of General Surgery, Nanfang Hospital, Division of Hepatobiliopancreatic Surgery, The First School of Clinical Medicine, Southern Medical University, Guangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3775-3877","authenticated-orcid":false,"given":"Yueming","family":"Jin","sequence":"additional","affiliation":[{"name":"Department of Biomedical Engineering, National University of Singapore, Queenstown, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3055-5034","authenticated-orcid":false,"given":"Pheng-Ann","family":"Heng","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering and the Institute of Medical Intelligence and XR, The Chinese University of Hong Kong, Hong Kong, SAR, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/s41551-017-0132-7"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2019.101572"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2024.3444279"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2016.2593957"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59716-0_35"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3299518"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16449-1_38"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43996-4_43"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102888"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2022.102433"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43996-4_48"},{"key":"ref12","first-page":"1","article-title":"Deformable DETR: Deformable transformers for end-to-end object detection","volume-title":"Proc. ICLR","author":"Zhu"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43996-4_61"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3191838"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611898"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3148454"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72120-5_64"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-024-03147-6"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3345736"},{"key":"ref20","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref21","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. NeurIPS","author":"Liu"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01949"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02251"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"ref27","article-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023","journal-title":"arXiv:2312.00752"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3563411"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-42451-8"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15711-0_50"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-016-1371-x"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2014.2340473"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2017.2787657"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00369"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2021.3069471"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59716-0_33"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87202-1_57"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-018-1882-8"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i3.32332"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref41","first-page":"62429","article-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","volume-title":"Proc. ICML","author":"Zhu"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73347-5_14"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00081"},{"key":"ref45","article-title":"Data splits and metrics for method benchmarking on surgical action triplet datasets","author":"Innocent Nwoye","year":"2022","journal-title":"arXiv:2204.05235"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102803"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.256"},{"key":"ref50","first-page":"28541","article-title":"LLaVA-med: Training a large language-and-vision assistant for biomedicine in one day","volume-title":"Proc. NeurIPS","author":"Li"}],"container-title":["IEEE Transactions on Medical Imaging"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/42\/11279972\/11084985.pdf?arnumber=11084985","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T18:40:43Z","timestamp":1765219243000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11084985\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":50,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tmi.2025.3590457","relation":{},"ISSN":["0278-0062","1558-254X"],"issn-type":[{"value":"0278-0062","type":"print"},{"value":"1558-254X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12]]}}}