{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T18:21:29Z","timestamp":1776968489943,"version":"3.51.4"},"reference-count":25,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,3,24]],"date-time":"2021-03-24T00:00:00Z","timestamp":1616544000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,24]],"date-time":"2021-03-24T00:00:00Z","timestamp":1616544000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J CARS"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s11548-021-02343-y","type":"journal-article","created":{"date-parts":[[2021,3,24]],"date-time":"2021-03-24T04:46:52Z","timestamp":1616561212000},"page":"779-787","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Cross-modal self-supervised representation learning for gesture and skill recognition in robotic surgery"],"prefix":"10.1007","volume":"16","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7306-8140","authenticated-orcid":false,"given":"Jie Ying","family":"Wu","sequence":"first","affiliation":[]},{"given":"Aniruddha","family":"Tamhane","sequence":"additional","affiliation":[]},{"given":"Peter","family":"Kazanzides","sequence":"additional","affiliation":[]},{"given":"Mathias","family":"Unberath","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,3,24]]},"reference":[{"issue":"9","key":"2343_CR1","doi-asserted-by":"publisher","first-page":"2025","DOI":"10.1109\/TBME.2016.2647680","volume":"64","author":"N Ahmidi","year":"2017","unstructured":"Ahmidi N, Tao L, Sefati S, Gao Y, Lea C, Haro BB, Zappella L, Khudanpur S, Vidal R, Hager GD (2017) A dataset and benchmarks for segmentation and recognition of gestures in robotic surgery. IEEE Trans Biomed Eng 64(9):2025\u20132041","journal-title":"IEEE Trans Biomed Eng"},{"key":"2343_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovic R, Zisserman A (2018) Objects that sound. In: Proceedings of the European conference on computer vision, pp. 435\u2013451","DOI":"10.1007\/978-3-030-01246-5_27"},{"key":"2343_CR3","doi-asserted-by":"crossref","unstructured":"Chen T, Guestrin C (2016) Xgboost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD international conference on knowledge discovery and data mining, pp. 785\u2013794","DOI":"10.1145\/2939672.2939785"},{"key":"2343_CR4","doi-asserted-by":"crossref","unstructured":"DiPietro R, Hager GD (2018) Unsupervised learning for surgical motion by learning to predict the future. In: International conference on medical image computing and computer-assisted intervention, pp. 281\u2013288. Springer","DOI":"10.1007\/978-3-030-00937-3_33"},{"key":"2343_CR5","doi-asserted-by":"crossref","unstructured":"DiPietro R, Hager GD (2019) Automated surgical activity recognition with one labeled sequence. In: International conference on medical image computing and computer-assisted intervention, pp. 458\u2013466. Springer","DOI":"10.1007\/978-3-030-32254-0_51"},{"key":"2343_CR6","doi-asserted-by":"crossref","unstructured":"Farneb\u00e4ck G (2003) Two-frame motion estimation based on polynomial expansion. In: Scandinavian conference on image analysis, pp. 363\u2013370. Springer","DOI":"10.1007\/3-540-45103-X_50"},{"issue":"7","key":"2343_CR7","doi-asserted-by":"publisher","first-page":"1217","DOI":"10.1007\/s11548-019-01995-1","volume":"14","author":"I Funke","year":"2019","unstructured":"Funke I, Mees ST, Weitz J, Speidel S (2019) Video-based surgical skill assessment using 3D convolutional neural networks. Int J Comput Assist Radiol Surg 14(7):1217\u20131225","journal-title":"Int J Comput Assist Radiol Surg"},{"key":"2343_CR8","unstructured":"Gao Y, Vedula SS, Reiley CE, Ahmidi N, Varadarajan B, Lin HC, Tao L, Zappella L, B\u00e9jar B, Yuh DD, Chen CCG, Vidal R, Khudanpur S, Hager GD (2014) JHU-ISI gesture and skill assessment working set (jigsaws): a surgical activity dataset for human motion modeling. In: MICCAI workshop: M2CAI, vol. 3, p. 3"},{"key":"2343_CR9","unstructured":"Guthart GS, Salisbury JK (2000) The intuitive$$^{TM}$$ telesurgery system: overview and application. In: IEEE international conference on robotics and automation, vol. 1, pp. 618\u2013621"},{"key":"2343_CR10","doi-asserted-by":"crossref","unstructured":"Jing L, Tian Y (2020) Self-supervised visual feature learning with deep neural networks: a survey. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2020.2992393"},{"key":"2343_CR11","doi-asserted-by":"crossref","unstructured":"Kazanzides P, Chen Z, Deguet A, Fischer GS, Taylor RH, DiMaio SP (2014) An open-source research kit for the da vinci$$^{\\textregistered }$$ surgical system. In: IEEE international conference on robotics and automation, pp. 6434\u20136439","DOI":"10.1109\/ICRA.2014.6907809"},{"key":"2343_CR12","doi-asserted-by":"crossref","unstructured":"Long YH, Wu JY, Lu B, Jin YM, Unberath M, Liu YH, Heng PA, Dou Q (2020) Relational graph learning on visual and kinematics embeddings for accurate gesture recognition in robotic surgery","DOI":"10.1109\/ICRA48506.2021.9561028"},{"key":"2343_CR13","unstructured":"Mazomenos E, Watson D, Kotorov R, Stoyanov D (2018) Gesture classification in robotic surgery using recurrent neural networks with kinematic information. In: 8th Joint workshop on new technologies for computer\/robotic assisted surgery"},{"key":"2343_CR14","doi-asserted-by":"crossref","unstructured":"McInnes L, Healy J, Melville J (2018) Umap: uniform manifold approximation and projection for dimension reduction. arXiv preprint arXiv:1802.03426","DOI":"10.21105\/joss.00861"},{"key":"2343_CR15","doi-asserted-by":"crossref","unstructured":"Murali A, Garg A, Krishnan S, Pokorny FT, Abbeel P, Darrell T, Goldberg K (2016) TSC-DL: unsupervised trajectory segmentation of multi-modal surgical demonstrations with deep learning. In: IEEE international conference on robotics and automation, pp. 4150\u20134157","DOI":"10.1109\/ICRA.2016.7487607"},{"key":"2343_CR16","doi-asserted-by":"crossref","unstructured":"Qin Y, Feyzabadi S, Allan M, Burdick JW, Azizian M (2020) davincinet: joint prediction of motion and surgical state in robot-assisted surgery. arXiv preprint arXiv:2009.11937","DOI":"10.1109\/IROS45743.2020.9340723"},{"key":"2343_CR17","unstructured":"Sarikaya D, Jannin P (2019) Surgical gesture recognition with optical flow only. arXiv preprint arXiv:1904.01143"},{"key":"2343_CR18","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. Adv Neural Inf Process Syst pp. 568\u2013576"},{"key":"2343_CR19","doi-asserted-by":"crossref","unstructured":"Tanwani AK, Sermanet P, Yan A, Anand R, Phielipp M, Goldberg K (2020) Motion2vec: semi-supervised representation learning from surgical videos. arXiv preprint arXiv:2006.00545","DOI":"10.1109\/ICRA40945.2020.9197324"},{"key":"2343_CR20","doi-asserted-by":"crossref","unstructured":"van Amsterdam B, Nakawala H, De Momi E, Stoyanov D (2019) Weakly supervised recognition of surgical gestures. In: IEEE international conference on robotics and automation, pp. 9565\u20139571","DOI":"10.1109\/ICRA.2019.8793696"},{"issue":"12","key":"2343_CR21","doi-asserted-by":"publisher","first-page":"1959","DOI":"10.1007\/s11548-018-1860-1","volume":"13","author":"Z Wang","year":"2018","unstructured":"Wang Z, Fey AM (2018) Deep learning with convolutional neural network for objective skill evaluation in robot-assisted surgery. Int J Comput Assist Radiol Surg 13(12):1959\u20131970","journal-title":"Int J Comput Assist Radiol Surg"},{"issue":"2","key":"2343_CR22","doi-asserted-by":"crossref","first-page":"202","DOI":"10.1093\/ons\/opz129","volume":"18","author":"MY Weiss","year":"2020","unstructured":"Weiss MY, Melnyk R, Mix D, Ghazi A, Vates GE, Stone JJ (2020) Design and validation of a cervical laminectomy simulator using 3D printing and hydrogel phantoms. Oper Neurosurg 18(2):202\u2013208","journal-title":"Oper Neurosurg"},{"key":"2343_CR23","unstructured":"Wu JY, Kazanzides P, Unberath M (2020) Leveraging vision and kinematics data to improve realism of biomechanic soft tissue simulation for robotic surgery. Int J Comput Assist Radiol Surg pp. 1\u20138"},{"key":"2343_CR24","doi-asserted-by":"crossref","unstructured":"Zhang Y, Lu H (2018) Deep cross-modal projection learning for image-text matching. In: Proceedings of the European conference on computer vision, pp. 686\u2013701","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"2343_CR25","doi-asserted-by":"crossref","unstructured":"Zhen L, Hu P, Wang X, Peng D (2019) Deep supervised cross-modal retrieval. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 10394\u201310403","DOI":"10.1109\/CVPR.2019.01064"}],"container-title":["International Journal of Computer Assisted Radiology and Surgery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11548-021-02343-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11548-021-02343-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11548-021-02343-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,22]],"date-time":"2022-12-22T08:06:30Z","timestamp":1671696390000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11548-021-02343-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,24]]},"references-count":25,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["2343"],"URL":"https:\/\/doi.org\/10.1007\/s11548-021-02343-y","relation":{},"ISSN":["1861-6410","1861-6429"],"issn-type":[{"value":"1861-6410","type":"print"},{"value":"1861-6429","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,3,24]]},"assertion":[{"value":"19 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest. This article does not contain any studies with human participants or animals performed by any of the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}