{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,31]],"date-time":"2026-07-31T22:36:52Z","timestamp":1785537412659,"version":"3.56.0"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031733758","type":"print"},{"value":"9783031733765","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,9]],"date-time":"2024-10-09T00:00:00Z","timestamp":1728432000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,9]],"date-time":"2024-10-09T00:00:00Z","timestamp":1728432000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73376-5_3","type":"book-chapter","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T10:12:01Z","timestamp":1728382321000},"page":"26-36","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Classification of\u00a0Endoscopy and\u00a0Video Capsule Images Using CNN-Transformer Model"],"prefix":"10.1007","author":[{"given":"Aliza","family":"Subedi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Smriti","family":"Regmi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nisha","family":"Regmi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bhumi","family":"Bhusal","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ulas","family":"Bagci","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Debesh","family":"Jha","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,9]]},"reference":[{"key":"3_CR1","unstructured":"Abadi, M., et\u00a0al.: Tensorflow: large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467 (2016)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Afriyie, Y., A.\u00a0Weyori, B., A.\u00a0Opoku, A.: Gastrointestinal tract disease recognition based on denoising capsule network. Cogent Engineering 9(1), 2142072 (2022)","DOI":"10.1080\/23311916.2022.2142072"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Ahmed, A.: Classification of gastrointestinal images based on transfer learning and denoising convolutional neural networks. In: Proceedings of International Conference on Data Science and Applications: ICDSA 2021, vol. 1, pp. 631\u2013639 (2022)","DOI":"10.1007\/978-981-16-5120-5_48"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Alom, M.Z., Hasan, M., Yakopcic, C., Taha, T.M., Asari, V.K.: Recurrent residual convolutional neural network based on u-net (r2u-net) for medical image segmentation. arXiv preprint arXiv:1802.06955 (2018)","DOI":"10.1109\/NAECON.2018.8556686"},{"key":"3_CR5","unstructured":"Chang, Y.Y., et al.: Deep learning-based endoscopic anatomy classification: an accelerated approach for data preparation and model validation. Surgical Endoscopy, pp. 1\u201311 (2021)"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1251\u20131258 (2017)","DOI":"10.1109\/CVPR.2017.195"},{"issue":"15","key":"3_CR7","doi-asserted-by":"publisher","first-page":"3783","DOI":"10.3390\/cancers15153783","volume":"15","author":"CK Chou","year":"2023","unstructured":"Chou, C.K., Nguyen, H.T., Wang, Y.K., Chen, T.H., Wu, I.C., Huang, C.W., Wang, H.C.: Preparing well for esophageal endoscopic detection using a hybrid model and transfer learning. Cancers 15(15), 3783 (2023)","journal-title":"Cancers"},{"key":"3_CR8","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Gamage, C., Wijesinghe, I., Chitraranjan, C., Perera, I.: Gi-net: anomalies classification in gastrointestinal tract through endoscopic imagery with deep learning. In: 2019 Moratuwa Engineering Research Conference (MERCon), pp. 66\u201371 (2019)","DOI":"10.1109\/MERCon.2019.8818929"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"3_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-319-46493-0_38","volume-title":"Computer Vision \u2013 ECCV 2016","author":"K He","year":"2016","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Identity mappings in deep residual networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 630\u2013645. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_38"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Jha, D., et\u00a0al.: Gastrovision: A multi-class endoscopy image dataset for computer aided gastrointestinal disease detection. arXiv preprint arXiv:2307.08140 (2023)","DOI":"10.1007\/978-3-031-47679-2_10"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Li, Q., Cai, W., Wang, X., Zhou, Y., Feng, D.D., Chen, M.: Medical image classification with convolutional neural network. In: 2014 13th International Conference on Control Automation Robotics & Vision (ICARCV), pp. 844\u2013848 (2014)","DOI":"10.1109\/ICARCV.2014.7064414"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3_CR16","doi-asserted-by":"publisher","unstructured":"Lopez-Tiro, F., et al.: Boosting kidney stone identification in endoscopic images using two-step transfer learning. In: Mexican International Conference on Artificial Intelligence, pp. 131\u2013141. Springer (2023). https:\/\/doi.org\/10.1007\/978-3-031-47640-2_11","DOI":"10.1007\/978-3-031-47640-2_11"},{"key":"3_CR17","unstructured":"Matsoukas, C., Haslum, J.F., S\u00f6derberg, M., Smith, K.: Is it time to replace cnns with transformers for medical images? arXiv preprint arXiv:2108.09038 (2021)"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Papageorgiou, C.P., Oren, M., Poggio, T.: A general framework for object detection. In: Sixth International Conference on Computer Vision (IEEE Cat. No. 98CH36271), pp. 555\u2013562 (1998)","DOI":"10.1109\/ICCV.1998.710772"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Pogorelov, K., et al.: Kvasir: a multi-class image dataset for computer aided gastrointestinal disease detection. In: Proceedings of the 8th ACM on Multimedia Systems Conference, pp. 164\u2013169 (2017)","DOI":"10.1145\/3083187.3083212"},{"key":"3_CR20","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"issue":"1","key":"3_CR22","doi-asserted-by":"publisher","first-page":"12","DOI":"10.3322\/caac.21820","volume":"74","author":"RL Siegel","year":"2024","unstructured":"Siegel, R.L., Giaquinto, A.N., Jemal, A.: Cancer statistics, 2024. CA Cancer J. Clin. 74(1), 12\u201349 (2024)","journal-title":"CA Cancer J. Clin."},{"key":"3_CR23","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"issue":"1","key":"3_CR24","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1038\/s41597-021-00920-z","volume":"8","author":"PH Smedsrud","year":"2021","unstructured":"Smedsrud, P.H., et al.: Kvasir-capsule, a video capsule endoscopy dataset. Sci. Data 8(1), 142 (2021)","journal-title":"Sci. Data"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Srivastava, A., Tomar, N.K., Bagci, U., Jha, D.: Video capsule endoscopy classification using focal modulation guided convolutional neural network. In: Proceedings of the IEEE 35th International Symposium on Computer-Based Medical Systems (CBMS), pp. 323\u2013328 (2022)","DOI":"10.1109\/CBMS55023.2022.00064"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"3_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.106723","volume":"157","author":"S Tang","year":"2023","unstructured":"Tang, S., Yu, X., Cheang, C.F., Liang, Y., Zhao, P., Yu, H.H., Choi, I.C.: Transformer-based multi-task learning for classification and segmentation of gastrointestinal tract endoscopic images. Comput. Biol. Med. 157, 106723 (2023)","journal-title":"Comput. Biol. Med."},{"key":"3_CR28","unstructured":"Thambawita, V., Jha, D., Riegler, M., Halvorsen, P., Hammer, H.L., Johansen, H.D., Johansen, D.: The medico-task 2018: Disease detection in the gastrointestinal tract using global features and deep learning. In: Proceedigns of the Medico 2018 (2018)"},{"key":"3_CR29","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: Proceedings of the International Conference on Machine Learning, pp. 10347\u201310357 (2021)"},{"issue":"6","key":"3_CR30","doi-asserted-by":"publisher","first-page":"1445","DOI":"10.1007\/s10278-022-00666-z","volume":"35","author":"M Usman","year":"2022","unstructured":"Usman, M., Zia, T., Tariq, A.: Analyzing transfer learning of vision transformers for interpreting chest radiography. J. Digit. Imaging 35(6), 1445\u20131462 (2022)","journal-title":"J. Digit. Imaging"}],"container-title":["Lecture Notes in Computer Science","Cancer Prevention, Detection, and Intervention"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73376-5_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T07:05:10Z","timestamp":1732863910000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73376-5_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,9]]},"ISBN":["9783031733758","9783031733765"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73376-5_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,9]]},"assertion":[{"value":"9 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CaPTion","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"MICCAI Workshop on Cancer Prevention through Early Detection","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Marrakesh","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Morocco","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"caption2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/caption-workshop.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}