{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T22:42:33Z","timestamp":1759444953492,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030609450"},{"type":"electronic","value":"9783030609467"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-60946-7_6","type":"book-chapter","created":{"date-parts":[[2020,10,3]],"date-time":"2020-10-03T16:02:29Z","timestamp":1601740949000},"page":"54-63","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning Representations of Endoscopic Videos to Detect Tool Presence Without Supervision"],"prefix":"10.1007","author":[{"given":"David Z.","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Masaru","family":"Ishii","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Russell H.","family":"Taylor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gregory D.","family":"Hager","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ayushi","family":"Sinha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,10,1]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Attia, M., Hossny, M., Nahavandi, S., Asadi, H.: Surgical tool segmentation using a hybrid deep CNN-RNN auto encoder-decoder. In: 2017 IEEE International Conference on Systems, Man, and Cybernetics (SMC), pp. 3373\u20133378, October 2017","DOI":"10.1109\/SMC.2017.8123151"},{"key":"6_CR2","doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1724\u20131734 (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"6_CR3","doi-asserted-by":"publisher","unstructured":"Deng, J., Dong, W., Socher, R., Li, L., Kai, L., Li, F.-F.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255, June 2009. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"6_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1007\/978-3-030-00937-3_33","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2018","author":"R DiPietro","year":"2018","unstructured":"DiPietro, R., Hager, G.D.: Unsupervised learning for surgical motion by learning to predict the future. In: Frangi, A.F., Schnabel, J.A., Davatzikos, C., Alberola-L\u00f3pez, C., Fichtinger, G. (eds.) MICCAI 2018. LNCS, vol. 11073, pp. 281\u2013288. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-00937-3_33"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"DiPietro, R., et al.: Recognizing surgical activities with recurrent neural networks. In: Medical Image Computing & Computer-Assisted Intervention, pp. 551\u2013558 (2016)","DOI":"10.1007\/978-3-319-46720-7_64"},{"key":"6_CR6","unstructured":"Ephrat, M.: Acute sinusitis in HD (2013). www.youtube.com\/watch?v=6niL7Poc_qQ"},{"key":"6_CR7","doi-asserted-by":"crossref","unstructured":"Garc\u00eda-Peraza-Herrera, L.C., et al.: Real-time segmentation of non-rigid surgical tools based on deep learning and tracking. In: Computer-Assisted and Robotic Endoscopy (CARE), pp. 84\u201395 (2017)","DOI":"10.1007\/978-3-319-54057-3_8"},{"key":"6_CR8","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.A.: Learning to forget: continual prediction with LSTM. Neural Comput. 12, 2451\u20132471 (2000)","journal-title":"Neural Comput."},{"issue":"8","key":"6_CR9","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"1","key":"6_CR10","first-page":"1593","volume":"15","author":"MD Hoffman","year":"2014","unstructured":"Hoffman, M.D., Gelman, A.: The No-U-turn sampler: adaptively setting path lengths in hamiltonian monte carlo. J. Mach. Learn. Res. 15(1), 1593\u20131623 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Jin, A., Yeung, S., Jopling, J., Krause, J., Azagury, D., Milstein, A., Fei-Fei, L.: Tool detection and operative skill assessment in surgical videos using region-based convolutional neural networks. In: IEEE Winter Conference on Applications of Computer Vision (2018)","DOI":"10.1109\/WACV.2018.00081"},{"key":"6_CR12","unstructured":"Karen Simonyan, A.Z.: Very deep convolutional networks for large-scale image recognition. ArXiv abs\/1409.1556 (2014)"},{"key":"6_CR13","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv:1412.6980 (2014)"},{"key":"6_CR14","unstructured":"Kingma, D.P., Welling, M.: Auto-Encoding Variational Bayes. arXiv:1312.6114 (2013)"},{"key":"6_CR15","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: Imagenet classification with deep convolutional neural networks. In: Pereira, F., Burges, C.J.C., Bottou, L., Weinberger, K.Q. (eds.) Advances in Neural Information Processing Systems, vol. 25, pp. 1097\u20131105. Curran Associates, Inc. (2012). http:\/\/papers.nips.cc\/paper\/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Lea, C., Vidal, R., Hager, G.D.: Learning convolutional action primitives for fine-grained action recognition. In: 2016 IEEE International Conference on Robotics and Automation (ICRA), pp. 1642\u20131649, May 2016","DOI":"10.1109\/ICRA.2016.7487305"},{"key":"6_CR17","doi-asserted-by":"crossref","unstructured":"Liu, X., et al.: Self-supervised learning for dense depth estimation in monocular endoscopy. In: Computer Assisted Robotic Endoscopy (CARE), pp. 128\u2013138 (2018)","DOI":"10.1007\/978-3-030-01201-4_15"},{"issue":"9","key":"6_CR18","doi-asserted-by":"publisher","first-page":"1435","DOI":"10.1007\/s11548-015-1238-6","volume":"10","author":"A Malpani","year":"2015","unstructured":"Malpani, A., Vedula, S.S., Chen, C.C.G., Hager, G.D.: A study of crowdsourced segment-level surgical skill assessment using pairwise rankings. Int. J. Comput. Assisted Radiol. Surg. 10(9), 1435\u20131447 (2015). https:\/\/doi.org\/10.1007\/s11548-015-1238-6","journal-title":"Int. J. Comput. Assisted Radiol. Surg."},{"key":"6_CR19","unstructured":"Murphy, K.P.: Machine Learning: A Probabilistic Perspective. MIT Press, Cambridge (2012)"},{"key":"6_CR20","unstructured":"Pakhomov, D., Premachandran, V., Allan, M., Azizian, M., Navab, N.: Deep Residual Learning for Instrument Segmentation in Robotic Surgery. arXiv:1703.08580 (2017)"},{"key":"6_CR21","unstructured":"Paszke, A., et al.: Automatic differentiation in pytorch. In: NIPS-W (2017)"},{"key":"6_CR22","unstructured":"Raju, A., Wang, S., Huang, J.: M2cai surgical tool detection challenge report (2016)"},{"key":"6_CR23","unstructured":"Sahu, M., Mukhopadhyay, A., Szengel, A., Zachow, S.: Tool and phase recognition using contextual CNN features. ArXiv abs\/1610.08854 (2016)"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Shvets, A.A., Rakhlin, A., Kalinin, A.A., Iglovikov, V.I.: Automatic instrument segmentation in robot-assisted surgery using deep learning. In: 17th IEEE International Conference on Machine Learning and Applications (ICMLA), pp. 624\u2013628 (2018)","DOI":"10.1109\/ICMLA.2018.00100"},{"key":"6_CR25","unstructured":"Srivastava, N., Mansimov, E., Salakhutdinov, R.: Unsupervised learning of video representations using LSTMS. In: Proceedings 32nd International Conference on International Conference on Machine Learning. ICML 2015, vol. 37, pp. 843\u2013852. JMLR.org (2015)"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Stan Development Team: PyStan: the Python interface to Stan, Version 2.17.1.0. (2018). http:\/\/mc-stan.org","DOI":"10.2478\/msd-2018-0003"},{"key":"6_CR27","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Computer Vision and Pattern Recognition (CVPR) (2015). http:\/\/arxiv.org\/abs\/1409.4842","DOI":"10.1109\/CVPR.2015.7298594"},{"issue":"7","key":"6_CR28","doi-asserted-by":"publisher","first-page":"2253","DOI":"10.1007\/s00464-013-2973-9","volume":"27","author":"C Tsui","year":"2013","unstructured":"Tsui, C., Klein, R., Garabrant, M.: Minimally invasive surgery: national trends in adoption and future directions for hospital strategy. Surgical Endoscopy 27(7), 2253\u20132257 (2013)","journal-title":"Surgical Endoscopy"},{"key":"6_CR29","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1109\/TMI.2016.2593957","volume":"36","author":"AP Twinanda","year":"2016","unstructured":"Twinanda, A.P., Shehata, S., Mutter, D., Marescaux, J., de Mathelin, M., Padoy, N.: Endonet: a deep architecture for recognition tasks on laparoscopic videos. IEEE Trans. Med. Imag. 36, 86\u201397 (2016)","journal-title":"IEEE Trans. Med. Imag."},{"key":"6_CR30","unstructured":"Zhao, S., Song, J., Ermon, S.: InfoVAE: Information Maximizing Variational Autoencoders. arXiv:1706.02262 (2017)"},{"key":"6_CR31","unstructured":"Zhu, M.: Recall, precision and average precision. In: Department of Statistics and Actuarial Science, University of Waterloo, Waterloo 2, p. 30 (2004)"}],"container-title":["Lecture Notes in Computer Science","Multimodal Learning for Clinical Decision Support and Clinical Image-Based Procedures"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-60946-7_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T22:05:23Z","timestamp":1759442723000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-60946-7_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030609450","9783030609467"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-60946-7_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"1 October 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CLIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on Clinical Image-Based Procedures","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lima","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Peru","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"clip2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/miccai-clip.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}