{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T12:07:22Z","timestamp":1778760442354,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T00:00:00Z","timestamp":1776902400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"NIH \/ NIGMS via NM-INBRE","award":["P20GM103451"],"award-info":[{"award-number":["P20GM103451"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,23]]},"DOI":"10.1145\/3746467.3801528","type":"proceedings-article","created":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:06:32Z","timestamp":1778756792000},"page":"93-102","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Unified Generative Multimodal Modeling of Surgical Gestures and Phases via Transfer Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-1209-8344","authenticated-orcid":false,"given":"Hemanth Reddy","family":"Madduri","sequence":"first","affiliation":[{"name":"Computer Science, New Mexico Institute of Mining and Technology, Socorro, NM, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8935-0720","authenticated-orcid":false,"given":"Xin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Texas Tech University, Lubbock, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6727-5867","authenticated-orcid":false,"given":"Jun","family":"Zheng","sequence":"additional","affiliation":[{"name":"New Mexico Institute of Mining and Technology, Socorro, NM, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8926-1941","authenticated-orcid":false,"given":"Huixin","family":"Zhan","sequence":"additional","affiliation":[{"name":"New Mexico Institute of Mining and Technology, Socorro, NM, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2016.2647680"},{"key":"e_1_3_2_1_2_1","unstructured":"Guillaume Alain and Yoshua Bengio. 2018. Understanding Intermediate Layers Using Linear Classifier Probes. arXiv:1610.01644 https:\/\/arxiv.org\/abs\/1610.01644"},{"key":"e_1_3_2_1_3_1","volume-title":"Navigating the Synthetic Realm: Harnessing Diffusion-Based Models for Laparoscopic Text-to-Image Generation","author":"Allmendinger Simeon","unstructured":"Simeon Allmendinger, Patrick Hemmer, Moritz Queisner, Igor Sauer, Leopold M\u00fcller, Johannes Jakubik, Michael V\u00f6ssing, and Niklas K\u00fchl. 2024. Navigating the Synthetic Realm: Harnessing Diffusion-Based Models for Laparoscopic Text-to-Image Generation. Springer Nature Switzerland, Cham, Switzerland, 31\u201346."},{"key":"e_1_3_2_1_4_1","volume-title":"2025 International Symposium on Medical Robotics (ISMR). IEEE","author":"Atoum Jumanh","year":"2025","unstructured":"Jumanh Atoum, Garrison L.H. Johnston, Nabil Simaan, and Jie Ying Wu. 2025. Multi-Modal Gesture Recognition from Video and Surgical Tool Pose Information via Motion Invariants. In 2025 International Symposium on Medical Robotics (ISMR). IEEE, Atlanta, GA, USA, 150\u2013156."},{"key":"e_1_3_2_1_5_1","volume-title":"Structured Denoising Diffusion Models in Discrete State-Spaces. In Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21","volume":"1376","author":"Austin Jacob","unstructured":"Jacob Austin, Daniel D. Johnson, Jonathan Ho, Daniel Tarlow, and Rianne van den Berg. 2021. Structured Denoising Diffusion Models in Discrete State-Spaces. In Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21, Vol. 34). Curran Associates Inc., Online, Article 1376, 13 pages."},{"key":"e_1_3_2_1_6_1","volume-title":"Chinedu Innocent Nwoye, and Nicolas Padoy","author":"Bhat Aditya","year":"2025","unstructured":"Aditya Bhat, Rupak Bose, Chinedu Innocent Nwoye, and Nicolas Padoy. 2025. SimGen: A Diffusion-Based Framework for Simultaneous Surgical Image and Segmentation Mask Generation. arXiv:2501.09008 https:\/\/arxiv.org\/abs\/2501.09008"},{"key":"e_1_3_2_1_7_1","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI","author":"Biagini Diego","year":"2025","unstructured":"Diego Biagini, Nassir Navab, and Azade Farshad. 2026. HieraSurg: Hierarchy-Aware Diffusion Model for Surgical Video Generation. In Medical Image Computing and Computer Assisted Intervention - MICCAI 2025. Springer Nature Switzerland, Daejeon, South Korea, 310\u2013319."},{"key":"e_1_3_2_1_8_1","unstructured":"Joseph Cho Samuel Schmidgall Cyril Zakka Mrudang Mathur Dhamanpreet Kaur Rohan Shad and William Hiesinger. 2024. SurGen: Text-Guided Diffusion Model for Surgical Video Generation. arXiv:2408.14028 https:\/\/arxiv.org\/abs\/2408.14028"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","first-page":"3110","DOI":"10.1007\/s10278-025-01416-7","article-title":"Multi-class Classification of Retinal Eye Diseases from Ophthalmoscopy Images Using Transfer Learning-Based Vision Transformers","volume":"38","author":"Cutur Elif Setenay","year":"2025","unstructured":"Elif Setenay Cutur and Neslihan Gokmen Inan. 2025. Multi-class Classification of Retinal Eye Diseases from Ophthalmoscopy Images Using Transfer Learning-Based Vision Transformers. Journal of Imaging Informatics in Medicine 38, 5 (2025), 3110\u20133124.","journal-title":"Journal of Imaging Informatics in Medicine"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-Training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers). Association for Computational Linguistics, Minneapolis, Minnesota, 4171\u20134186."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","first-page":"1217","DOI":"10.1007\/s11548-019-01995-1","article-title":"Video-Based Surgical Skill Assessment Using 3D Convolutional Neural Networks","volume":"14","author":"Funke Isabel","year":"2019","unstructured":"Isabel Funke, S\u00f6ren Torge Mees, J\u00fcrgen Weitz, and Stefanie Speidel. 2019. Video-Based Surgical Skill Assessment Using 3D Convolutional Neural Networks. International Journal of Computer Assisted Radiology and Surgery 14, 7 (2019), 1217\u20131225.","journal-title":"International Journal of Computer Assisted Radiology and Surgery"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","first-page":"S55","DOI":"10.1097\/00001888-199601000-00043","article-title":"Global Rating Scales in Residency Education","volume":"71","author":"Gray Jean D","year":"1996","unstructured":"Jean D Gray. 1996. Global Rating Scales in Residency Education. Academic Medicine 71, 1 (1996), S55\u201363.","journal-title":"Academic Medicine"},{"key":"e_1_3_2_1_13_1","volume-title":"Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE","author":"He Kaiming","year":"2016","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, Las Vegas, NV, USA, 770\u2013778."},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21","volume":"953","author":"Hoogeboom Emiel","year":"2021","unstructured":"Emiel Hoogeboom, Didrik Nielsen, Priyank Jaini, Patrick Forr\u00e9, and Max Welling. 2021. Argmax Flows and Multinomial Diffusion: Learning Categorical Distributions. In Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21, Vol. 34). Curran Associates Inc., Online, Article 953, 12 pages."},{"key":"e_1_3_2_1_15_1","volume-title":"Deep Generative Models","author":"Iliash Ivan","unstructured":"Ivan Iliash, Simeon Allmendinger, Felix Meissen, Niklas K\u00fchl, and Daniel R\u00fcckert. 2025. Interactive Generation of Laparoscopic Videos with Diffusion Models. In Deep Generative Models. Springer Nature Switzerland, Marrakesh, Morocco, 109\u2013118."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41551-023-01010-8"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1007\/s10120-023-01450-w","article-title":"Automatic Surgical Phase Recognition-Based Skill Assessment in Laparoscopic Distal Gastrectomy Using Multicenter Videos","volume":"27","author":"Komatsu Masaru","year":"2024","unstructured":"Masaru Komatsu, Daichi Kitaguchi, Masahiro Yura, Nobuyoshi Takeshita, Mitsumasa Yoshida, Masayuki Yamaguchi, Hibiki Kondo, Takahiro Kinoshita, and Masaaki Ito. 2024. Automatic Surgical Phase Recognition-Based Skill Assessment in Laparoscopic Distal Gastrectomy Using Multicenter Videos. Gastric Cancer 27, 1 (2024), 187\u2013196.","journal-title":"Gastric Cancer"},{"key":"e_1_3_2_1_18_1","volume-title":"Advances in Computer Graphics","author":"Ma Le","unstructured":"Le Ma, Hangyeol Kang, Nadia Magnenat-Thalmann, and Katarzyna Wac. 2025. TransSG: A Spatial-Temporal Transformer for Surgical Gesture Recognition. In Advances in Computer Graphics. Springer Nature Switzerland, Geneva, Switzerland, 151\u2013165."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the Fourth Conference on Medical Imaging with Deep Learning (Proceedings of Machine Learning Research","volume":"544","author":"Neimark Daniel","year":"2021","unstructured":"Daniel Neimark, Omri Bar, Maya Zohar, Gregory D. Hager, and Dotan Asselmann. 2021. \"Train One, Classify One, Teach One\" - Cross-Surgery Transfer Learning for Surgical Step Recognition. In Proceedings of the Fourth Conference on Medical Imaging with Deep Learning (Proceedings of Machine Learning Research, Vol. 143). PMLR, L\u00fcbeck, Germany, 532\u2013544."},{"key":"e_1_3_2_1_20_1","volume-title":"C","author":"Nwoye Chinedu Innocent","year":"2025","unstructured":"Chinedu Innocent Nwoye, Rupak Bose, Kareem Elgohary, Lorenzo Arboit, Giorgio Carlino, Jo\u00ebl L. Lavanchy, Pietro Mascagni, and Nicolas Padoy. 2025. Surgical Text-to-Image Generation. Pattern Recognition Letters 190, C (2025), 73\u201380."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"e_1_3_2_1_22_1","volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward Yang, Zach DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. Curran Associates Inc., Red Hook, NY, USA, 8026\u20138037."},{"key":"e_1_3_2_1_23_1","volume-title":"Recognition and Prediction of Surgical Gestures and Trajectories Using Transformer Models in Robot-Assisted Surgery. In 2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","author":"Shi Chang","unstructured":"Chang Shi, Yi Zheng, and Ann Majewicz Fey. 2022. Recognition and Prediction of Surgical Gestures and Trajectories Using Transformer Models in Robot-Assisted Surgery. In 2022 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS). IEEE, Kyoto, Japan, 8017\u20138024."},{"key":"e_1_3_2_1_24_1","volume-title":"Muddit: Liberating Generation Beyond Text-to-Image with a Unified Discrete Diffusion Model. arXiv:2505.23606 https:\/\/arxiv.org\/abs\/2505.23606","author":"Shi Qingyu","year":"2025","unstructured":"Qingyu Shi, Jinbin Bai, Zhuoran Zhao, Wenhao Chai, Kaidong Yu, Jianzong Wu, Shuangyong Song, Yunhai Tong, Xiangtai Li, Xuelong Li, and Shuicheng Yan. 2025. Muddit: Liberating Generation Beyond Text-to-Image with a Unified Discrete Diffusion Model. arXiv:2505.23606 https:\/\/arxiv.org\/abs\/2505.23606"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 32nd International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep Unsupervised Learning Using Nonequilibrium Thermodynamics. In Proceedings of the 32nd International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 37). PMLR, Lille, France, 2256\u20132265."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2016.2535302"},{"key":"e_1_3_2_1_27_1","first-page":"3","article-title":"Multitask Learning 1997\u20132024: Part I Fundamentals","volume":"7","author":"Yu Jun","year":"2025","unstructured":"Jun Yu, Xiaokang Liu, Chongliang Luo, Rong Zhou, Yixin Liu, Jie Hu, Jianmin Chen, Ke Zhang, Dazheng Zhang, Yishan Shen, Eashan Adhikarla, Yutong Dai, Kai Zhang, Zhaoming Kong, Wenxuan Ye, Yilong Yin, Vinod Namboodiri, Brian Davison, Jason Moore, and Yong Chen. 2025. Multitask Learning 1997\u20132024: Part I Fundamentals. Harvard Data Science Review 7, 3 (jul 31 2025). https:\/\/hdsr.mitpress.mit.edu\/pub\/7fcc3jhv.","journal-title":"Harvard Data Science Review"},{"key":"e_1_3_2_1_28_1","volume-title":"Moore","author":"Zhan Huixin","year":"2026","unstructured":"Huixin Zhan and Jason H. Moore. 2026. Agentic Surgical AI: Surgeon Style Fingerprinting and Privacy Risk Quantification via Discrete Diffusion in a Vision-Language-Action Framework. In AI for Clinical Applications. Springer Nature Switzerland, Daejeon, South Korea, 13\u201322."}],"event":{"name":"ACMSE 2026: 2026 ACM Southeast Conference","location":"Troy University Troy AL USA","acronym":"ACMSE 2026"},"container-title":["Proceedings of the 2026 ACM Southeast Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746467.3801528","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T11:07:54Z","timestamp":1778756874000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746467.3801528"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,23]]},"references-count":28,"alternative-id":["10.1145\/3746467.3801528","10.1145\/3746467"],"URL":"https:\/\/doi.org\/10.1145\/3746467.3801528","relation":{},"subject":[],"published":{"date-parts":[[2026,4,23]]},"assertion":[{"value":"2026-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}