{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T08:04:08Z","timestamp":1772611448431,"version":"3.50.1"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,10]]},"DOI":"10.1109\/robio64047.2024.10907318","type":"proceedings-article","created":{"date-parts":[[2025,3,7]],"date-time":"2025-03-07T18:33:40Z","timestamp":1741372420000},"page":"1752-1757","source":"Crossref","is-referenced-by-count":1,"title":["MT-ViT: Multi-Task Video Transformer for Surgical Skill Assessment from Streaming Long-term Video"],"prefix":"10.1109","author":[{"given":"Jiaxin","family":"Guo","sequence":"first","affiliation":[{"name":"CUHK T Stone Robotics Institute, The Chinese University of Hong Kong,Hong Kong,China"}]},{"given":"Shaocong","family":"Han","sequence":"additional","affiliation":[{"name":"The Second Affliated Hospital of Zhengzhou University,Zhengzhou,China"}]},{"given":"Yun-Hui","family":"Liu","sequence":"additional","affiliation":[{"name":"CUHK T Stone Robotics Institute, The Chinese University of Hong Kong,Hong Kong,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3068852"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0002-9610(05)80843-8"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1067\/mog.2002.123481"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-021-88175-x"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1046\/j.1365-2168.1997.02502.x"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2021.3101646"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-bioeng-071516-044435"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.05095"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-019-01995-1"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-020-02269-x"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00940"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-018-1860-1"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59716-0_64"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32254-0_53"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2023.3242466"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/DICTA51227.2020.9363408"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43996-4_4"},{"key":"ref18","article-title":"Joint surgical gesture and task classification with multi-task and multimodal learning","author":"Sarikaya","year":"2018","journal-title":"arXiv preprint"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197301"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s11548-021-02388-z"},{"key":"ref21","volume-title":"Not all patches are what you need: Expediting vision transformers via token reorganizations","author":"Liang"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00986"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_14"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16449-1_39"},{"issue":"3","key":"ref27","article-title":"Jhu-isi gesture and skill assessment working set (jigsaws): A surgical activity dataset for human motion modeling","volume-title":"MICCAI workshop","volume":"3","author":"Gao","year":"2014"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2023.102770"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16449-1_46"},{"key":"ref30","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00161"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/tmi.2016.2593957"}],"event":{"name":"2024 IEEE International Conference on Robotics and Biomimetics (ROBIO)","location":"Bangkok, Thailand","start":{"date-parts":[[2024,12,10]]},"end":{"date-parts":[[2024,12,14]]}},"container-title":["2024 IEEE International Conference on Robotics and Biomimetics (ROBIO)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10906779\/10907273\/10907318.pdf?arnumber=10907318","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,8]],"date-time":"2025-03-08T08:15:10Z","timestamp":1741421710000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10907318\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/robio64047.2024.10907318","relation":{},"subject":[],"published":{"date-parts":[[2024,12,10]]}}}