{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T03:02:45Z","timestamp":1742958165264,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031188138"},{"type":"electronic","value":"9783031188145"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-18814-5_2","type":"book-chapter","created":{"date-parts":[[2022,10,13]],"date-time":"2022-10-13T07:06:39Z","timestamp":1665644799000},"page":"11-23","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Visual Modalities Based Multimodal Fusion for\u00a0Surgical Phase Recognition"],"prefix":"10.1007","author":[{"given":"Bogyu","family":"Park","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyeongyu","family":"Chi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bokyung","family":"Park","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiwon","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sunghyun","family":"Park","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Woo Jin","family":"Hyung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min-Kook","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,12]]},"reference":[{"key":"2_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1007\/978-3-030-00937-3_31","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2018","author":"O Zisimopoulos","year":"2018","unstructured":"Zisimopoulos, O., et al.: DeepPhase: surgical phase recognition in CATARACTS Videos. In: Frangi, A.F., Schnabel, J.A., Davatzikos, C., Alberola-L\u00f3pez, C., Fichtinger, G. (eds.) MICCAI 2018. LNCS, vol. 11073, pp. 265\u2013272. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00937-3_31"},{"issue":"3","key":"2_CR2","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s11548-008-0223-8","volume":"3","author":"U Klank","year":"2008","unstructured":"Klank, U., Padoy, N., Feussner, H., Navab, N.: Automatic feature generation in endoscopic images. Int. J. Comput. Assist. Radiol. Surg. 3(3), 331\u2013339 (2008). https:\/\/doi.org\/10.1007\/s11548-008-0223-8","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"key":"2_CR3","unstructured":"Hong, S., et al.: Rethinking generalization performance of surgical phase recognition with expert-generated annotations. arXiv preprint. arXiv:2110.11626 (2021)"},{"issue":"3","key":"2_CR4","doi-asserted-by":"publisher","first-page":"632","DOI":"10.1016\/j.media.2010.10.001","volume":"16","author":"N Padoy","year":"2012","unstructured":"Padoy, N., Blum, T., Ahmadi, S.-A., Feussner, H., Berger, M.-O., Navab, N.: Statistical modeling and recognition of surgical workflow. Med. Image Anal. 16(3), 632\u2013641 (2012)","journal-title":"Med. Image Anal."},{"issue":"1","key":"2_CR5","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1109\/TMI.2016.2593957","volume":"36","author":"AP Twinanda","year":"2016","unstructured":"Twinanda, A.P., Shehata, S., Mutter, D., Marescaux, J., De Mathelin, M., Padoy, N.: Endonet: a deep architecture for recognition tasks on laparoscopic videos. IEEE Trans. Med. Imaging 36(1), 86\u201397 (2016)","journal-title":"IEEE Trans. Med. Imaging"},{"key":"2_CR6","doi-asserted-by":"publisher","first-page":"101572","DOI":"10.1016\/j.media.2019.101572","volume":"59","author":"Y Jin","year":"2020","unstructured":"Jin, Y.: Multi-task recurrent convolutional network with correlation loss for surgical video analysis. Med. Image Anal. 59, 101572 (2020)","journal-title":"Med. Image Anal."},{"issue":"4","key":"2_CR7","doi-asserted-by":"publisher","first-page":"673","DOI":"10.1007\/s11548-019-02108-8","volume":"15","author":"G Lecuyer","year":"2020","unstructured":"Lecuyer, G., Ragot, M., Martin, N., Launay, L., Jannin, P.: Assisted phase and step annotation for surgical videos. Int. J. Comput. Assist. Radiol. Surg. 15(4), 673\u2013680 (2020). https:\/\/doi.org\/10.1007\/s11548-019-02108-8","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"issue":"6","key":"2_CR8","doi-asserted-by":"publisher","first-page":"1081","DOI":"10.1007\/s11548-016-1371-x","volume":"11","author":"O Dergachyova","year":"2016","unstructured":"Dergachyova, O., Bouget, D., Huaulm\u00e9, A., Morandi, X., Jannin, P.: Automatic data-driven real-time segmentation and recognition of surgical workflow. Int. J. Comput. Assist. Radiol. Surg. 11(6), 1081\u20131089 (2016). https:\/\/doi.org\/10.1007\/s11548-016-1371-x","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"issue":"2","key":"2_CR9","doi-asserted-by":"publisher","first-page":"553","DOI":"10.1007\/s00464-017-5878-1","volume":"32","author":"C Loukas","year":"2017","unstructured":"Loukas, C.: Video content analysis of surgical procedures. Surg. Endosc. 32(2), 553\u2013568 (2017). https:\/\/doi.org\/10.1007\/s00464-017-5878-1","journal-title":"Surg. Endosc."},{"key":"2_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1007\/978-3-030-59716-0_33","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2020","author":"T Czempiel","year":"2020","unstructured":"Czempiel, T., et al.: TeCNO: surgical phase recognition with multi-stage temporal convolutional networks. In: Martel, A.L., et al. (eds.) MICCAI 2020. LNCS, vol. 12263, pp. 343\u2013352. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-59716-0_33"},{"issue":"9","key":"2_CR11","doi-asserted-by":"publisher","first-page":"691","DOI":"10.1038\/s41551-017-0132-7","volume":"1","author":"L Maier-Hein","year":"2017","unstructured":"Maier-Hein, L., et al.: Surgical data science for next-generation interventions. Nat. Biomed. Eng. 1(9), 691\u2013696 (2017)","journal-title":"Nat. Biomed. Eng."},{"key":"2_CR12","unstructured":"Gao, Y., et al.: Jhu-isi gesture and skill assessment working set (jigsaws): a surgical activity dataset for human motion modeling. In: MICCAI Workshop: M2cai, vol. 3 (2014)"},{"key":"2_CR13","doi-asserted-by":"publisher","first-page":"106452","DOI":"10.1016\/j.cmpb.2021.106452","volume":"212","author":"A Huaulm\u00e9","year":"2021","unstructured":"Huaulm\u00e9, A., et al.: Micro-surgical anastomose workflow recognition challenge report. Comput. Methods Programs Biomed. 212, 106452 (2021)","journal-title":"Comput. Methods Programs Biomed."},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Huaulm\u00e9, A., et al.: Peg transfer workflow recognition challenge report: does multi-modal data improve recognition? arXiv preprint. arXiv:2202.05821 (2022)","DOI":"10.2139\/ssrn.4088403"},{"issue":"3","key":"2_CR15","doi-asserted-by":"publisher","first-page":"e201664","DOI":"10.1001\/jamanetworkopen.2020.1664","volume":"3","author":"S Khalid","year":"2020","unstructured":"Khalid, S., Goldenberg, M., Grantcharov, T., Taati, B., Rudzicz, F.: Evaluation of deep learning models for identifying surgical actions and measuring performance. JAMA Netw. Open 3(3), e201664\u2013e201664 (2020)","journal-title":"JAMA Netw. Open"},{"issue":"7","key":"2_CR16","doi-asserted-by":"publisher","first-page":"1217","DOI":"10.1007\/s11548-019-01995-1","volume":"14","author":"I Funke","year":"2019","unstructured":"Funke, I., Mees, S.T., Weitz, J., Speidel, S.: Video-based surgical skill assessment using 3D convolutional neural networks. Int. J. Comput. Assist. Radiol. Surg. 14(7), 1217\u20131225 (2019). https:\/\/doi.org\/10.1007\/s11548-019-01995-1","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"issue":"1","key":"2_CR17","doi-asserted-by":"publisher","first-page":"296","DOI":"10.1016\/j.juro.2017.07.081","volume":"199","author":"AJ Hung","year":"2018","unstructured":"Hung, A.J., Chen, J., Jarc, A., Hatcher, D., Djaladat, H., Gill, I.S.: Development and validation of objective performance metrics for robot-assisted radical prostatectomy: a pilot study. J. Urol. 199(1), 296\u2013304 (2018)","journal-title":"J. Urol."},{"issue":"6","key":"2_CR18","doi-asserted-by":"publisher","first-page":"1964","DOI":"10.3390\/jcm9061964","volume":"9","author":"D Lee","year":"2020","unstructured":"Lee, D., Yu, H.W., Kwon, H., Kong, H.J., Lee, K.E., Kim, H.C.: Evaluation of surgical skills during robotic surgery by deep learning-based multiple surgical instrument tracking in training and actual operations. J. Clin. Med. 9(6), 1964 (2020)","journal-title":"J. Clin. Med."},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Liu, D., et al.: Towards unified surgical skill assessment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9522\u20139531 (2021)","DOI":"10.1109\/CVPR46437.2021.00940"},{"issue":"15","key":"2_CR20","doi-asserted-by":"publisher","first-page":"1434","DOI":"10.1056\/NEJMsa1300625","volume":"369","author":"JD Birkmeyer","year":"2013","unstructured":"Birkmeyer, J.D., et al.: Surgical skill and complication rates after bariatric surgery. N. Engl. J. Med. 369(15), 1434\u20131442 (2013)","journal-title":"N. Engl. J. Med."},{"issue":"3","key":"2_CR21","doi-asserted-by":"publisher","first-page":"1029","DOI":"10.1007\/s00464-012-2513-z","volume":"27","author":"I Oropesa","year":"2013","unstructured":"Oropesa, I., et al.: Eva: laparoscopic instrument tracking based on endoscopic video analysis for psychomotor skills assessment. Surg. Endosc. 27(3), 1029\u20131039 (2013). https:\/\/doi.org\/10.1007\/s00464-012-2513-z","journal-title":"Surg. Endosc."},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized intersection over union: a metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666 (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R., Ren, D.: Distance-iou loss: Faster and better learning for bounding box regression. In: Proceedings of the AAAI Conference on Artificial Intelligence 34, 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"2_CR24","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple siamese representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15750\u201315758 (2021)","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"issue":"11","key":"2_CR26","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Sig. Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Sig. Process."},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 418\u2013434 (2018)","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jia, M., Lin, T.Y., Song, Y., Belongie, S.: Class-balanced loss based on effective number of samples. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9268\u20139277 (2019)","DOI":"10.1109\/CVPR.2019.00949"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Chen, L.C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 801\u2013818 (2018)","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"2_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"173","DOI":"10.1007\/978-3-030-58539-6_11","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Yuan","year":"2020","unstructured":"Yuan, Y., Chen, X., Wang, J.: Object-contextual representations for semantic segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12351, pp. 173\u2013190. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58539-6_11"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"2_CR33","unstructured":"MMSegmentation Contributors. MMSegmentation: Openmmlab semantic segmentation toolbox and benchmark. https:\/\/github.com\/open-mmlab\/mmsegmentation (2020)"}],"container-title":["Lecture Notes in Computer Science","Multiscale Multimodal Medical Imaging"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-18814-5_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,13]],"date-time":"2022-10-13T07:07:39Z","timestamp":1665644859000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-18814-5_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031188138","9783031188145"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-18814-5_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"12 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMMI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Multiscale Multimodal Medical Imaging","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Singapore","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmmi2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmmi2022.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"12","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"67% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2,5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1,67","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}