{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T21:05:54Z","timestamp":1761253554174,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030781903"},{"type":"electronic","value":"9783030781910"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-78191-0_41","type":"book-chapter","created":{"date-parts":[[2021,6,20]],"date-time":"2021-06-20T06:02:29Z","timestamp":1624168949000},"page":"533-544","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Future Frame Prediction for Robot-Assisted Surgery"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6517-7630","authenticated-orcid":false,"given":"Xiaojie","family":"Gao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3775-3877","authenticated-orcid":false,"given":"Yueming","family":"Jin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9399-3475","authenticated-orcid":false,"given":"Zixu","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3416-9950","authenticated-orcid":false,"given":"Qi","family":"Dou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3055-5034","authenticated-orcid":false,"given":"Pheng-Ann","family":"Heng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,14]]},"reference":[{"issue":"9","key":"41_CR1","doi-asserted-by":"publisher","first-page":"2025","DOI":"10.1109\/TBME.2016.2647680","volume":"64","author":"N Ahmidi","year":"2017","unstructured":"Ahmidi, N., et al.: A dataset and benchmarks for segmentation and recognition of gestures in robotic surgery. IEEE. Trans. Biomed. Eng. 64(9), 2025\u20132041 (2017)","journal-title":"IEEE. Trans. Biomed. Eng."},{"key":"41_CR2","unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction. In: ICLR (2018)"},{"key":"41_CR3","unstructured":"Bhatia, B., Oates, T., Xiao, Y., Hu, P.: Real-time identification of operating room state from video. In: AAAI (2007)"},{"issue":"1","key":"41_CR4","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1016\/j.ijmedinf.2006.01.003","volume":"76","author":"N Bricon-Souf","year":"2007","unstructured":"Bricon-Souf, N., Newman, C.R.: Context awareness in health care: a review. Int. J. Med. Inform. 76(1), 2\u201312 (2007)","journal-title":"Int. J. Med. Inform."},{"key":"41_CR5","doi-asserted-by":"crossref","unstructured":"Colleoni, E., Moccia, S., Du, X., De Momi, E., Stoyanov, D.: Deep learning based robotic tool detection and articulation estimation with spatio-temporal layers. RA-L (2019)","DOI":"10.1109\/LRA.2019.2917163"},{"key":"41_CR6","unstructured":"Denton, E., Birodkar, V.: Unsupervised learning of disentangled representations from video. In: NurIPS (2017)"},{"key":"41_CR7","unstructured":"Denton, E., Fergus, R.: Stochastic video generation with a learned prior. In: ICML (2018)"},{"key":"41_CR8","unstructured":"Finn, C., Goodfellow, I., Levine, S.: Unsupervised learning for physical interaction through video prediction. In: NurIPS (2016)"},{"issue":"4","key":"41_CR9","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1002\/rcs.1468","volume":"9","author":"C Freschi","year":"2013","unstructured":"Freschi, C., Ferrari, V., Melfi, F., Ferrari, M., Mosca, F., Cuschieri, A.: Technical review of the da Vinci surgical telemanipulator. Int. J. Med. Robot. 9(4), 396\u2013406 (2013)","journal-title":"Int. J. Med. Robot."},{"key":"41_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1007\/978-3-030-32254-0_52","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2019","author":"I Funke","year":"2019","unstructured":"Funke, I., Bodenstedt, S., Oehme, F., von Bechtolsheim, F., Weitz, J., Speidel, S.: Using 3D convolutional neural networks to learn spatiotemporal features for automatic surgical gesture recognition in video. In: Shen, D., et al. (eds.) MICCAI 2019. LNCS, vol. 11768, pp. 467\u2013475. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-32254-0_52"},{"key":"41_CR11","doi-asserted-by":"crossref","unstructured":"Gao, X., Jin, Y., Dou, Q., Heng, P.A.: Automatic gesture recognition in robot-assisted surgery with reinforcement learning and tree search. In: ICRA (2020)","DOI":"10.1109\/ICRA40945.2020.9196674"},{"key":"41_CR12","unstructured":"Gao, Y., et al.: JHU-ISI gesture and skill assessment working set (JIGSAWS): a surgical activity dataset for human motion modeling. In: MICCAI Workshop: M2CAI (2014)"},{"key":"41_CR13","doi-asserted-by":"crossref","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Computation (1997)","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"41_CR14","doi-asserted-by":"crossref","unstructured":"Islam, M., Atputharuban, D.A., Ramesh, R., Ren, H.: Real-time instrument segmentation in robotic surgery using auxiliary supervised deep adversarial learning. RA-L (2019)","DOI":"10.1109\/LRA.2019.2900854"},{"key":"41_CR15","doi-asserted-by":"crossref","unstructured":"Jin, B., et al.: Exploring spatial-temporal multi-frequency analysis for high-fidelity and temporal-consistency video prediction. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00461"},{"key":"41_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"440","DOI":"10.1007\/978-3-030-32254-0_49","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2019","author":"Y Jin","year":"2019","unstructured":"Jin, Y., Cheng, K., Dou, Q., Heng, P.-A.: Incorporating temporal prior from motion flow for instrument segmentation in minimally invasive surgery video. In: Shen, D., et al. (eds.) MICCAI 2019. LNCS, vol. 11768, pp. 440\u2013448. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-32254-0_49"},{"key":"41_CR17","doi-asserted-by":"crossref","unstructured":"Jin, Y., et al.: SV-RCNet: workflow recognition from surgical videos using recurrent convolutional network. IEEE Trans. Med, Imaging 37, 1114\u20131126 (2017)","DOI":"10.1109\/TMI.2017.2787657"},{"key":"41_CR18","doi-asserted-by":"crossref","unstructured":"Jin, Y., et al.: Multi-task recurrent convolutional network with correlation loss for surgical video analysis. Med. Image Anal. 59, 101572 (2020)","DOI":"10.1016\/j.media.2019.101572"},{"key":"41_CR19","unstructured":"Kalchbrenner, N., et al.: Video pixel networks. In: ICML (2017)"},{"key":"41_CR20","doi-asserted-by":"crossref","unstructured":"Kim, Y., Nam, S., Cho, I., Kim, S.J.: Unsupervised keypoint learning for guiding class-conditional video prediction. In: NurIPS (2019)","DOI":"10.1186\/s13640-019-0478-8"},{"key":"41_CR21","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"41_CR22","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: ICLR (2014)"},{"key":"41_CR23","unstructured":"Kumar, M., et al.: VideoFlow: A Conditional Flow-Based Model for Stochastic Video Generation. In: ICLR (2020)"},{"key":"41_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1007\/978-3-030-00937-3_29","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2018","author":"D Liu","year":"2018","unstructured":"Liu, D., Jiang, T.: Deep reinforcement learning for surgical gesture segmentation and classification. In: Frangi, A.F., Schnabel, J.A., Davatzikos, C., Alberola-L\u00f3pez, C., Fichtinger, G. (eds.) MICCAI 2018. LNCS, vol. 11073, pp. 247\u2013255. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00937-3_29"},{"key":"41_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1007\/978-3-030-00937-3_76","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2018","author":"F Milletari","year":"2018","unstructured":"Milletari, F., Rieke, N., Baust, M., Esposito, M., Navab, N.: CFCM: segmentation via coarse to fine context memory. In: Frangi, A.F., Schnabel, J.A., Davatzikos, C., Alberola-L\u00f3pez, C., Fichtinger, G. (eds.) MICCAI 2018. LNCS, vol. 11073, pp. 667\u2013674. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00937-3_76"},{"key":"41_CR26","unstructured":"Minderer, M., Sun, C., Villegas, R., Cole, F., Murphy, K.P., Lee, H.: Unsupervised learning of object structure and dynamics from videos. In: NurIPS (2019)"},{"key":"41_CR27","unstructured":"Oh, J., Guo, X., Lee, H., Lewis, R.L., Singh, S.: Action-conditional video prediction using deep networks in Atari games. In: NurIPS (2015)"},{"key":"41_CR28","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D.Y., Wong, W.K., Woo, W.C.: Convolutional lstm network: a machine learning approach for precipitation nowcasting. In: NurIPS (2015)"},{"key":"41_CR29","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. In: ICLR (2015)"},{"key":"41_CR30","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMs. In: ICML (2015)"},{"key":"41_CR31","doi-asserted-by":"crossref","unstructured":"Tanwani, A.K., Sermanet, P., Yan, A., Anand, R., Phielipp, M., Goldberg, K.: Motion2Vec: semi-Supervised Representation Learning from Surgical Videos. In: ICRA (2020)","DOI":"10.1109\/ICRA40945.2020.9197324"},{"key":"41_CR32","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: MoCoGAN: decomposing motion and content for video generation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"41_CR33","volume-title":"EndoNet: a deep architecture for recognition tasks on laparoscopic videos","author":"AP Twinanda","year":"2016","unstructured":"Twinanda, A.P., Shehata, S., Mutter, D., Marescaux, J., De Mathelin, M., Padoy, N.: EndoNet: a deep architecture for recognition tasks on laparoscopic videos. IEEE Trans. Med, Imaging (2016)"},{"key":"41_CR34","unstructured":"Villegas, R., Pathak, A., Kannan, H., Erhan, D., Le, Q.V., Lee, H.: High fidelity video prediction with large stochastic recurrent neural networks. In: NurIPS (2019)"},{"key":"41_CR35","unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., Lee, H.: Decomposing motion and content for natural video sequence prediction. In: ICLR (2017)"},{"key":"41_CR36","unstructured":"Villegas, R., Yang, J., Zou, Y., Sohn, S., Lin, X., Lee, H.: Learning to generate long-term future via hierarchical prediction. In: ICML (2017)"}],"container-title":["Lecture Notes in Computer Science","Information Processing in Medical Imaging"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-78191-0_41","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,31]],"date-time":"2022-12-31T20:15:52Z","timestamp":1672517752000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-78191-0_41"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030781903","9783030781910"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-78191-0_41","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"14 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IPMI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Information Processing in Medical Imaging","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ipmi2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ipmi2021.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"200","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"59","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}