{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T14:09:16Z","timestamp":1780582156308,"version":"3.54.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,3,21]],"date-time":"2024-03-21T00:00:00Z","timestamp":1710979200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,21]],"date-time":"2024-03-21T00:00:00Z","timestamp":1710979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J CARS"],"DOI":"10.1007\/s11548-024-03095-1","type":"journal-article","created":{"date-parts":[[2024,3,21]],"date-time":"2024-03-21T09:01:31Z","timestamp":1711011691000},"page":"871-880","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["SF-TMN: SlowFast temporal modeling network for surgical phase recognition"],"prefix":"10.1007","volume":"19","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1906-2116","authenticated-orcid":false,"given":"Bokai","family":"Zhang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mohammad Hasan","family":"Sarhan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bharti","family":"Goel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Svetlana","family":"Petculescu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Amer","family":"Ghanem","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,3,21]]},"reference":[{"issue":"8","key":"3095_CR1","doi-asserted-by":"publisher","first-page":"3285","DOI":"10.1007\/s00464-020-07628-y","volume":"34","author":"LS Feldman","year":"2020","unstructured":"Feldman LS, Pryor AD, Gardner AK, Dunkin BJ, Schultz L, Awad MM, Ritter EM (2020) Sages video-based assessment (vba) program: a vision for life-long learning for surgeons. Surg Endosc 34(8):3285\u20133288","journal-title":"Surg Endosc"},{"issue":"1","key":"3095_CR2","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1109\/TMI.2016.2593957","volume":"36","author":"AP Twinanda","year":"2016","unstructured":"Twinanda AP, Shehata S, Mutter D, Marescaux J, De Mathelin M, Padoy N (2016) Endonet: a deep architecture for recognition tasks on laparoscopic videos. IEEE Trans Med Imaging 36(1):86\u201397","journal-title":"IEEE Trans Med Imaging"},{"issue":"12","key":"3095_CR3","first-page":"2193","volume":"17","author":"Y Jin","year":"2022","unstructured":"Jin Y, Long Y, Gao X, Stoyanov D, Dou Q, Heng P-A (2022) Trans-svnet: hybrid embedding aggregation transformer for surgical workflow analysis. IJCARS 17(12):2193\u20132202","journal-title":"IJCARS"},{"issue":"4","key":"3095_CR4","first-page":"349","volume":"10","author":"B Zhang","year":"2022","unstructured":"Zhang B, Abbing J, Ghanem A, Fer D, Barker J, Abukhalil R, Goel VK, Milletar\u00ec F (2022) Towards accurate surgical workflow recognition with convolutional networks and transformers. CMBBE: Imag Visual 10(4):349\u2013356","journal-title":"CMBBE: Imag Visual"},{"issue":"5","key":"3095_CR5","doi-asserted-by":"publisher","first-page":"1114","DOI":"10.1109\/TMI.2017.2787657","volume":"37","author":"Y Jin","year":"2017","unstructured":"Jin Y, Dou Q, Chen H, Yu L, Qin J, Fu C-W, Heng P-A (2017) Sv-rcnet: workflow recognition from surgical videos using recurrent convolutional network. IEEE Trans Med Imaging 37(5):1114\u20131126","journal-title":"IEEE Trans Med Imaging"},{"issue":"11","key":"3095_CR6","first-page":"2029","volume":"16","author":"B Zhang","year":"2021","unstructured":"Zhang B, Ghanem A, Simes A, Choi H, Yoo A (2021) Surgical workflow recognition with 3dcnn for sleeve gastrectomy. IJCARS 16(11):2029\u20132036","journal-title":"IJCARS"},{"key":"3095_CR7","doi-asserted-by":"crossref","unstructured":"Czempiel T, Paschali M, Keicher M, Simson W, Feussner H, Kim ST, Navab N (2020) Tecno: Surgical phase recognition with multi-stage temporal convolutional networks. In: MICCAI. Springer, pp 343\u2013352","DOI":"10.1007\/978-3-030-59716-0_33"},{"key":"3095_CR8","first-page":"1","volume":"1","author":"D Fer","year":"2023","unstructured":"Fer D, Zhang B, Abukhalil R, Goel V, Goel B, Barker J, Kalesan B, Barragan I, Gaddis ML, Kilroy PG (2023) An artificial intelligence model that automatically labels roux-en-y gastric bypasses, a comparison to trained surgeon annotators. Surg Endosc 1:1\u20138","journal-title":"Surg Endosc"},{"key":"3095_CR9","unstructured":"Zhang B, Ghanem A, Simes A, Choi H, Yoo A, Min A (2021) Swnet: surgical workflow recognition with deep convolutional network. In: MIDL. PMLR, pp 855\u2013869"},{"issue":"11","key":"3095_CR10","doi-asserted-by":"publisher","first-page":"3309","DOI":"10.1109\/TMI.2022.3182995","volume":"41","author":"X Ding","year":"2022","unstructured":"Ding X, Li X (2022) Exploring segment-level semantics for online phase recognition from surgical videos. IEEE Trans Med Imaging 41(11):3309\u20133319","journal-title":"IEEE Trans Med Imaging"},{"key":"3095_CR11","first-page":"1","volume":"1","author":"B Zhang","year":"2022","unstructured":"Zhang B, Goel B, Sarhan MH, Goel VK, Abukhalil R, Kalesan B, Stottler N, Petculescu S (2022) Surgical workflow recognition with temporal convolution and transformer for action segmentation. IJCARS 1:1\u201310","journal-title":"IJCARS"},{"key":"3095_CR12","first-page":"1","volume":"1","author":"B Zhang","year":"2022","unstructured":"Zhang B, Sturgeon D, Shankar AR, Goel VK, Barker J, Ghanem A, Lee P, Milecky M, Stottler N, Petculescu S (2022) Surgical instrument recognition for instrument usage documentation and surgical video library indexing. CMBBE Imag Visual 1:1\u20139","journal-title":"CMBBE Imag Visual"},{"key":"3095_CR13","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Fan H, Malik J, He K (2019) Slowfast networks for video recognition. In: ICCV, pp 6202\u20136211","DOI":"10.1109\/ICCV.2019.00630"},{"key":"3095_CR14","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: CVPR, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"3095_CR15","doi-asserted-by":"crossref","unstructured":"Farha YA, Gall J (2019) Ms-tcn: Multi-stage temporal convolutional network for action segmentation. In: CVPR, pp 3575\u20133584. https:\/\/github.com\/yabufarha\/ms-tcn","DOI":"10.1109\/CVPR.2019.00369"},{"key":"3095_CR16","unstructured":"Yi F, Wen H, Jiang T (2021) Asformer: transformer for action segmentation. In: BMVC, p 236 . https:\/\/github.com\/ChinaYi\/ASFormer"},{"key":"3095_CR17","unstructured":"He Z, Mottaghi A, Sharghi A, Jamal MA, Mohareri O (2022) An empirical study on activity recognition in long surgical videos. In: Machine learning for health. PMLR, pp 356\u2013372"},{"key":"3095_CR18","doi-asserted-by":"crossref","unstructured":"Schoeffmann K, Taschwer M, Sarny S, M\u00fcnzer B, Primus MJ, Putzgruber D (2018) Cataract-101: video dataset of 101 cataract surgeries. In: Proceedings of the 9th ACM multimedia systems conference, pp 421\u2013425","DOI":"10.1145\/3204949.3208137"},{"key":"3095_CR19","doi-asserted-by":"crossref","unstructured":"Stein S, McKenna SJ (2013) Combining embedded accelerometers with computer vision for recognizing food preparation activities. In: Proceedings of the 2013 ACM international joint conference on pervasive and ubiquitous computing, pp 729\u2013738","DOI":"10.1145\/2493432.2493482"},{"key":"3095_CR20","doi-asserted-by":"crossref","unstructured":"Fathi A, Ren X, Rehg JM (2011) Learning to recognize objects in egocentric activities. In: CVPR 2011. IEEE, pp 3281\u20133288","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"3095_CR21","doi-asserted-by":"crossref","unstructured":"Kuehne H, Arslan A, Serre T (2014) The language of actions: recovering the syntax and semantics of goal-directed human activities. In: CVPR, pp 780\u2013787","DOI":"10.1109\/CVPR.2014.105"},{"issue":"6","key":"3095_CR22","doi-asserted-by":"publisher","first-page":"1897","DOI":"10.1109\/TMI.2023.3242980","volume":"42","author":"X Ding","year":"2023","unstructured":"Ding X, Yan X, Wang Z, Zhao W, Zhuang J, Xu X, Li X (2023) Less is more: surgical phase recognition from timestamp supervision. IEEE Trans Med Imaging 42(6):1897\u20131910","journal-title":"IEEE Trans Med Imaging"},{"key":"3095_CR23","unstructured":"Yi F, Yang Y, Jiang T (2022) Not end-to-end: Explore multi-stage architecture for online surgical phase recognition. In: ACCV, pp 2613\u20132628"},{"key":"3095_CR24","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR, pp 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"issue":"6","key":"3095_CR25","doi-asserted-by":"publisher","first-page":"6647","DOI":"10.1109\/TPAMI.2020.3021756","volume":"45","author":"S Li","year":"2023","unstructured":"Li S, Farha YA, Liu Y, Cheng M-M, Gall J (2023) Ms-tcn++: multi-stage temporal convolutional network for action segmentation. IEEE Trans Pattern Anal Mach Intell 45(6):6647\u20136658","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3095_CR26","unstructured":"Funke I, Rivoir D, Speidel S (2023) Metrics matter in surgical phase recognition. arXiv preprint arXiv:2305.13961"},{"key":"3095_CR27","doi-asserted-by":"crossref","unstructured":"Lea C, Vidal R, Hager GD (2016) Learning convolutional action primitives for fine-grained action recognition. In: ICRA. IEEE, pp 1642\u20131649","DOI":"10.1109\/ICRA.2016.7487305"},{"key":"3095_CR28","doi-asserted-by":"crossref","unstructured":"Lea C, Flynn MD, Vidal R, Reiter A, Hager GD (2017) Temporal convolutional networks for action segmentation and detection. In: CVPR, pp 156\u2013165","DOI":"10.1109\/CVPR.2017.113"},{"key":"3095_CR29","doi-asserted-by":"crossref","unstructured":"Li M, Chen L, Duan Y, Hu Z, Feng J, Zhou J, Lu J (2022) Bridge-prompt: towards ordinal action understanding in instructional videos. In: CVPR, pp 19880\u201319889","DOI":"10.1109\/CVPR52688.2022.01926"},{"key":"3095_CR30","doi-asserted-by":"crossref","unstructured":"Ishihara K, Nakano G, Inoshita T (2022) Mcfm: mutual cross fusion module for intermediate fusion-based action segmentation. In: ICIP. IEEE, pp 1701\u20131705","DOI":"10.1109\/ICIP46576.2022.9897444"},{"key":"3095_CR31","doi-asserted-by":"crossref","unstructured":"Zhang Y, Bano S, Page A-S, Deprest J, Stoyanov D, Vasconcelos F (2022) Retrieval of surgical phase transitions using reinforcement learning. In: International conference on medical image computing and computer-assisted intervention. Springer, pp 497\u2013506","DOI":"10.1007\/978-3-031-16449-1_47"},{"key":"3095_CR32","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa F, Varoquaux G, Gramfort A, Michel V, Thirion B, Grisel O, Blondel M, Prettenhofer P, Weiss R, Dubourg V, Vanderplas J, Passos A, Cournapeau D, Brucher M, Perrot M, Duchesnay E (2011) Scikit-learn: machine learning in Python. J Mach Learn Res 12:2825\u20132830","journal-title":"J Mach Learn Res"},{"key":"3095_CR33","doi-asserted-by":"crossref","unstructured":"Behrmann N, Golestaneh SA, Kolter Z, Gall J, Noroozi M (2022) Unified fully and timestamp supervised temporal action segmentation via sequence to sequence translation. In: ECCV. Springer, pp 52\u201368","DOI":"10.1007\/978-3-031-19833-5_4"},{"key":"3095_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108764","volume":"129","author":"J Park","year":"2022","unstructured":"Park J, Kim D, Huh S, Jo S (2022) Maximization and restoration: action segmentation through dilation passing and temporal reconstruction. Pattern Recogn 129:108764","journal-title":"Pattern Recogn"},{"key":"3095_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104567","volume":"128","author":"N Aziere","year":"2022","unstructured":"Aziere N, Todorovic S (2022) Multistage temporal convolution transformer for action segmentation. Image Vis Comput 128:104567","journal-title":"Image Vis Comput"},{"key":"3095_CR36","doi-asserted-by":"crossref","unstructured":"Chen M-H, Li B, Bao Y, AlRegib G, Kira Z (2020) Action segmentation with joint self-supervised temporal domain adaptation. In: CVPR, pp 9454\u20139463","DOI":"10.1109\/CVPR42600.2020.00947"},{"key":"3095_CR37","doi-asserted-by":"crossref","unstructured":"Wang Z, Gao Z, Wang L, Li Z, Wu G (2020) Boundary-aware cascade networks for temporal action segmentation. In: ECCV. Springer, pp 34\u201351","DOI":"10.1007\/978-3-030-58595-2_3"},{"key":"3095_CR38","doi-asserted-by":"crossref","unstructured":"Ahn H, Lee D (2021) Refining action segmentation with hierarchical video representations. In: ICCV, pp 16302\u201316310","DOI":"10.1109\/ICCV48922.2021.01599"},{"key":"3095_CR39","doi-asserted-by":"crossref","unstructured":"Ishikawa Y, Kasai S, Aoki Y, Kataoka H (2021) Alleviating over-segmentation errors by detecting action boundaries. In: WACV, pp 2322\u20132331","DOI":"10.1109\/WACV48630.2021.00237"},{"key":"3095_CR40","doi-asserted-by":"crossref","unstructured":"Chen L, Li M, Duan Y, Zhou J, Lu J (2022) Uncertainty-aware representation learning for action segmentation. In: IJCAI, vol 2, p 6","DOI":"10.24963\/ijcai.2022\/115"},{"key":"3095_CR41","first-page":"1","volume":"1","author":"Z Du","year":"2022","unstructured":"Du Z, Wang Q (2022) Dilated transformer with feature aggregation module for action segmentation. Neural Process Lett 1:1\u201317","journal-title":"Neural Process Lett"}],"container-title":["International Journal of Computer Assisted Radiology and Surgery"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11548-024-03095-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11548-024-03095-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11548-024-03095-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,16]],"date-time":"2024-05-16T13:16:55Z","timestamp":1715865415000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11548-024-03095-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,21]]},"references-count":41,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2024,5]]}},"alternative-id":["3095"],"URL":"https:\/\/doi.org\/10.1007\/s11548-024-03095-1","relation":{},"ISSN":["1861-6429"],"issn-type":[{"value":"1861-6429","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,21]]},"assertion":[{"value":"14 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 February 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 March 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"For this type of study, formal consent is not required.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"This article does not contain patient data.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}]}}