{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:35:58Z","timestamp":1778081758209,"version":"3.51.4"},"reference-count":297,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T00:00:00Z","timestamp":1767744000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T00:00:00Z","timestamp":1767744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2026,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:p>\n                    Colonoscopy is currently one of the most sensitive screening methods for colorectal cancer. This study investigates the frontiers of intelligent colonoscopy techniques and their prospective implications for multimodal medical applications. With this goal, we begin by assessing the current data-centric and model-centric landscapes through four tasks for colonoscopic scene perception, including classification, detection, segmentation, and vision-language understanding. Our assessment reveals domain-specific challenges and underscores the need for further multimodal research in colonoscopy. To address these gaps, we establish three foundational initiatives: a large-scale multimodal instruction tuning dataset ColonINST, a colonoscopy-designed multimodal language model ColonGPT, and a multimodal benchmark. To facilitate continuous advancements in this rapidly evolving field, we provide a public website for the latest updates:\n                    <jats:ext-link xmlns:xlink=\"http:\/\/www.w3.org\/1999\/xlink\" xlink:href=\"https:\/\/github.com\/ai4colonoscopy\/IntelliScope\" ext-link-type=\"uri\">https:\/\/github.com\/ai4colonoscopy\/IntelliScope<\/jats:ext-link>\n                    .\n                  <\/jats:p>","DOI":"10.1007\/s11633-025-1597-6","type":"journal-article","created":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T10:53:50Z","timestamp":1767783230000},"page":"70-114","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Frontiers in Intelligent Colonoscopy"],"prefix":"10.1007","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7092-2877","authenticated-orcid":false,"given":"Ge-Peng","family":"Ji","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5792-127X","authenticated-orcid":false,"given":"Jingyi","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6431-1110","authenticated-orcid":false,"given":"Peng","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9343-9535","authenticated-orcid":false,"given":"Nick","family":"Barnes","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4263-3143","authenticated-orcid":false,"given":"Fahad Shahbaz","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9502-1749","authenticated-orcid":false,"given":"Salman","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5245-7518","authenticated-orcid":false,"given":"Deng-Ping","family":"Fan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,7]]},"reference":[{"issue":"10449","key":"1597_CR1","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1016\/S0140-6736(24)00360-X","volume":"404","author":"C Eng","year":"2024","unstructured":"C. Eng, T. Yoshino, E. Ru\u00edz-Garc\u00eda, N. Mostafa, C. G. Cann, B. O\u2019Brian, A. Benny, R. O. Perez, C. Cremolini. Colorectal cancer. The Lancet, vol. 404, no. 10449, pp. 294\u2013310, 2024. DOI: https:\/\/doi.org\/10.1016\/S0140-6736(24)00360-X.","journal-title":"The Lancet"},{"issue":"1","key":"1597_CR2","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1053\/j.gastro.2022.03.007","volume":"163","author":"M B Wallace","year":"2022","unstructured":"M. B. Wallace, P. Sharma, P. Bhandari, J. East, G. Antonelli, R. Lorenzetti, M. Vieth, I. Speranza, M. Spadaccini, M. Desai, F. J. Lukens, G. Babameto, D. Batista, D. Singh, W. Palmer, F. Ramirez, R. Palmer, T. Lunsford, K. Ruff, E. Bird-Liebermann, V. Ciofoaia, S. Arndtz, D. Cangemi, K. Puddick, G. Derfus, A. S. Johal, M. Barawi, L. Longo, L. Moro, A. Repici, C. Hassan. Impact of artificial intelligence on miss rate of colorectal neoplasia. Gastroenterology, vol. 163, no. 1, pp. 295\u2013304, 2022. DOI: https:\/\/doi.org\/10.1053\/j.gastro.2022.03.007.","journal-title":"Gastroenterology"},{"key":"1597_CR3","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1109\/CVPR.2009.5206848","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"J Deng","year":"2009","unstructured":"J. Deng, W. Dong, R. Socher, L. J. Li, K. Li, L. Fei-Fei. ImageNet: A large-scale hierarchical image database. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Miami, USA, pp. 248\u2013255, 2009. DOI: https:\/\/doi.org\/10.1109\/CVPR.2009.5206848."},{"key":"1597_CR4","volume-title":"GPT-4 technical report","author":"OpenAI","year":"2023","unstructured":"OpenAI. GPT-4 technical report, [Online], Available:https:\/\/arxiv.org\/abs\/2303.08774, 2023."},{"key":"1597_CR5","doi-asserted-by":"publisher","first-page":"11941","DOI":"10.1007\/978-3-030-96530-3","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"X Zhai","year":"2023","unstructured":"X. Zhai, B. Mustafa, A. Kolesnikov, L. Beyer. Sigmoid loss for language image pre-training. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Paris, France, pp. 11941\u201311952, 2023. DOI: https:\/\/doi.org\/10.1109\/ICCV51070.2023.01100."},{"key":"1597_CR6","volume-title":"Textbooks are all you need II: phi-1.5 technical report","author":"Y Li","year":"2023","unstructured":"Y. Li, S. Bubeck, R. Eldan, A. Del Giorno, S. Gunasekar, Y. T. Lee. Textbooks are all you need II: phi-1.5 technical report, [Online], Available: https:\/\/arxiv.org\/abs\/2309.05463, 2023."},{"key":"1597_CR7","volume-title":"MiniGPT-v2: large language model as a unified interface for vision-language multi-task learning","author":"J Chen","year":"2023","unstructured":"J. Chen, D. Zhu, X. Shen, X. Li, Z. Liu, P. Zhang, R. Krishnamoorthi, V. Chandra, Y. Xiong, M. Elhoseiny. MiniGPT-v2: large language model as a unified interface for vision-language multi-task learning, [Online], Available: https:\/\/arxiv.org\/abs\/2310.09478, 2023."},{"key":"1597_CR8","doi-asserted-by":"publisher","first-page":"26286","DOI":"10.1109\/CVPR52733.2024.02484","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"H Liu","year":"2024","unstructured":"H. Liu, C. Li, Y. Li, Y. J. Lee. Improved baselines with visual instruction tuning. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 26286\u201326296, 2024. DOI: https:\/\/doi.org\/10.1109\/CVPR52733.2024.02484."},{"key":"1597_CR9","volume-title":"Efficient multimodal learning from data-centric perspective","author":"M He","year":"2024","unstructured":"M. He, Y. Liu, B. Wu, J. Yuan, Y. Wang, T. Huang, B. Zhao. Efficient multimodal learning from data-centric perspective, [Online], Available: https:\/\/arxiv.org\/abs\/2402.11530, 2024."},{"key":"1597_CR10","doi-asserted-by":"publisher","unstructured":"V. B. S. Prasath. Polyp detection and segmentation from video capsule endoscopy: A review. Journal of Imaging, vol. 3, no. 1, Article number 1, 2017. DOI: https:\/\/doi.org\/10.3390\/jimaging3010001.","DOI":"10.3390\/jimaging3010001"},{"key":"1597_CR11","doi-asserted-by":"publisher","first-page":"233","DOI":"10.2316\/P.2017.852-031","volume-title":"Proceedings of the 13th IASTED International Conference on Biomedical Engineering","author":"B Taha","year":"2017","unstructured":"B. Taha, N. Werghi, J. Dias. Automatic polyp detection in endoscopy videos: A survey. In Proceedings of the 13th IASTED International Conference on Biomedical Engineering, Innsbruck, Austria, pp. 233\u2013240, 2017. DOI: https:\/\/doi.org\/10.2316\/P.2017.852-031."},{"key":"1597_CR12","doi-asserted-by":"publisher","unstructured":"L. F. S\u00e1nchez-Peralta, L. Bote-Curiel, A. Pic\u00f3n, F. M. S\u00e1nchez-Margallo, J. B. Pagador. Deep learning to find colorectal polyps in colonoscopy: A systematic literature review. Artificial Intelligence in Medicine, vol. 108, Article number 101923, 2020. DOI: https:\/\/doi.org\/10.1016\/j.artmed.2020.101923.","DOI":"10.1016\/j.artmed.2020.101923"},{"key":"1597_CR13","doi-asserted-by":"publisher","unstructured":"I. Pacal, D. Karaboga, A. Basturk, B. Akay, U. Nalbantoglu. A comprehensive review of deep learning in colon cancer. Computers in Biology and Medicine, vol. 126, Article number 104003, 2020. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2020.104003.","DOI":"10.1016\/j.compbiomed.2020.104003"},{"issue":"1","key":"1597_CR14","doi-asserted-by":"publisher","first-page":"1323","DOI":"10.1007\/s11042-016-4219-z","volume":"77","author":"B M\u00fcnzer","year":"2018","unstructured":"B. M\u00fcnzer, K. Schoeffmann, L. B\u00f6sz\u00f6rmenyi. Content-based processing and analysis of endoscopic images and videos: A survey. Multimedia Tools and Applications, vol. 77, no. 1, pp. 1323\u20131362, 2018. DOI: https:\/\/doi.org\/10.1007\/s11042-016-4219-z.","journal-title":"Multimedia Tools and Applications"},{"issue":"47","key":"1597_CR15","doi-asserted-by":"publisher","first-page":"8103","DOI":"10.3748\/wjg.v27.i47.8103","volume":"27","author":"M Taghiakbari","year":"2021","unstructured":"M. Taghiakbari, Y. Mori, D. von Renteln. Artificial intelligence-assisted colonoscopy: A review of current state of practice and research. World Journal of Gastroenterology, vol. 27, no. 47, pp. 8103\u20138122, 2021. DOI: https:\/\/doi.org\/10.3748\/wjg.v27.i47.8103.","journal-title":"World Journal of Gastroenterology"},{"issue":"7","key":"1597_CR16","doi-asserted-by":"publisher","first-page":"3360","DOI":"10.1109\/JBHI.2023.3270724","volume":"27","author":"G Yue","year":"2023","unstructured":"G. Yue, G. Zhuo, S. Li, T. Zhou, J. Du, W. Yan, J. Hou, W. Liu, T. Wang. Benchmarking polyp segmentation methods in narrow-band imaging colonoscopy images. IEEE Journal of Biomedical and Health Informatics, vol. 27, no. 7, pp. 3360\u20133371, 2023. DOI: https:\/\/doi.org\/10.1109\/JBHI.2023.3270724.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR17","volume-title":"Colorectal polyp segmentation in the deep learning era: A comprehensive survey","author":"Z Wu","year":"2024","unstructured":"Z. Wu, F. Lv, C. Chen, A. Hao, S. Li. Colorectal polyp segmentation in the deep learning era: A comprehensive survey, [Online], Available: https:\/\/arxiv.org\/abs\/2401.11734, 2024."},{"key":"1597_CR18","doi-asserted-by":"publisher","unstructured":"J. Mei, T. Zhou, K. Huang, Y. Zhang, Y. Zhou, Y. Wu, H. Fu. A survey on deep learning for polyp segmentation: Techniques, challenges and future trends. Visual Intelligence, vol. 3, no. 1, Article number 1, 2025. DOI: https:\/\/doi.org\/10.1007\/s44267-024-00071-w.","DOI":"10.1007\/s44267-024-00071-w"},{"issue":"8","key":"1597_CR19","doi-asserted-by":"publisher","first-page":"1061","DOI":"10.1136\/gut.2005.086371","volume":"55","author":"M V Sivak Jr.","year":"2006","unstructured":"M. V.Jr. Sivak. Gastrointestinal endoscopy: Past and future. Gut, vol. 55, no. 8, pp. 1061\u20131064, 2006. DOI: https:\/\/doi.org\/10.1136\/gut.2005.086371.","journal-title":"Gut"},{"key":"1597_CR20","doi-asserted-by":"publisher","unstructured":"T. M. Berzin, E. J. Topol. Adding artificial intelligence to gastrointestinal endoscopy. The Lancet, vol. 395, no. 10223, Article number 485, 2020. DOI: https:\/\/doi.org\/10.1016\/S0140-6736(20)30294-4.","DOI":"10.1016\/S0140-6736(20)30294-4"},{"issue":"6785","key":"1597_CR21","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1038\/35013140","volume":"405","author":"G Iddan","year":"2000","unstructured":"G. Iddan, G. Meron, A. Glukhovsky, P. Swain. Wireless capsule endoscopy. Nature, vol. 405, no. 6785, pp. 417\u2013417, 2000. DOI: https:\/\/doi.org\/10.1038\/35013140.","journal-title":"Nature"},{"issue":"1","key":"1597_CR22","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1109\/TMI.2016.2593957","volume":"36","author":"A P Twinanda","year":"2017","unstructured":"A. P. Twinanda, S. Shehata, D. Mutter, J. Marescaux, M. de Mathelin, N. Padoy. EndoNet: A deep architecture for recognition tasks on laparoscopic videos. IEEE Transactions on Medical Imaging, vol. 36, no. 1, pp. 86\u201397, 2017. DOI: https:\/\/doi.org\/10.1109\/TMI.2016.2593957.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR23","doi-asserted-by":"publisher","unstructured":"S. Shao, Z. Pei, W. Chen, W. Zhu, X. Wu, D. Sun, B. Zhang. Self-supervised monocular depth and ego-motion estimation in endoscopy: Appearance flow to the rescue. Medical Image Analysis, vol. 77, Article number 102338, 2022. DOI: https:\/\/doi.org\/10.1016\/j.media.2021.102338.","DOI":"10.1016\/j.media.2021.102338"},{"issue":"6","key":"1597_CR24","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1007\/s11633-022-1371-y","volume":"19","author":"G P Ji","year":"2022","unstructured":"G. P. Ji, G. Xiao, Y. C. Chou, D. P. Fan, K. Zhao, G. Chen, L. Van Gool. Video polyp segmentation: A deep learning perspective. Machine Intelligence Research, vol. 19, no. 6, pp. 531\u2013549, 2022. DOI: https:\/\/doi.org\/10.1007\/s11633-022-1371-y.","journal-title":"Machine Intelligence Research"},{"key":"1597_CR25","doi-asserted-by":"publisher","unstructured":"J. C. A. Cer\u00f3n, G. O. Ruiz, L. Chang, S. Ali. Real-time instance segmentation of surgical instruments using attention and multi-scale feature fusion. Medical Image Analysis, vol. 81, Article number 102569, 2022. DOI: https:\/\/doi.org\/10.1016\/j.media.2022.102569.","DOI":"10.1016\/j.media.2022.102569"},{"key":"1597_CR26","doi-asserted-by":"publisher","first-page":"3364","DOI":"10.1109\/ICCVW54120.2021.00376","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision Workshops","author":"Y Blau","year":"2021","unstructured":"Y. Blau, D. Freedman, V. Dashinsky, R. Goldenberg, E. Rivlin. Unsupervised 3D shape coverage estimation with applications to colonoscopy. In Proceedings of IEEE\/CVF International Conference on Computer Vision Workshops, Montreal, Canada, pp. 3364\u20133374, 2021. DOI: https:\/\/doi.org\/10.1109\/ICCVW54120.2021.00376."},{"key":"1597_CR27","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1007\/978-3-030-78191-0_43","volume-title":"Proceedings of the 27th International Conference on Computer Vision","author":"Y Zhang","year":"2021","unstructured":"Y. Zhang, S. Wang, R. Ma, S. K. McGill, J. G. Rosenman, S. M. Pizer. Lighting enhancement aids reconstruction of colonoscopic surfaces. In Proceedings of the 27th International Conference on Computer Vision, pp. 559\u2013570, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-78191-0_43."},{"key":"1597_CR28","doi-asserted-by":"publisher","unstructured":"D. P. Fan, G. P. Ji, P. Xu, M. M. Cheng, C. Sakaridis, L. Van Gool. Advances in deep concealed scene understanding. Visual Intelligence, vol. 1, no. 1, Article number 16, 2023. DOI: https:\/\/doi.org\/10.1007\/s44267-023-00019-6.","DOI":"10.1007\/s44267-023-00019-6"},{"issue":"9","key":"1597_CR29","doi-asserted-by":"publisher","first-page":"3166","DOI":"10.1016\/j.patcog.2012.03.002","volume":"45","author":"J Bernal","year":"2012","unstructured":"J. Bernal, J. S\u00e1nchez, F. Vilari\u00f1o. Towards automatic polyp detection with a polyp appearance model. Pattern Recognition, vol. 45, no. 9, pp. 3166\u20133182, 2012. DOI: https:\/\/doi.org\/10.1016\/j.patcog.2012.03.002.","journal-title":"Pattern Recognition"},{"issue":"2","key":"1597_CR30","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1007\/s11548-013-0926-3","volume":"9","author":"J Silva","year":"2014","unstructured":"J. Silva, A. Histace, O. Romain, X. Dray, B. Granado. Toward embedded detection of polyps in WCE images for early diagnosis of colorectal cancer. International Journal of Computer Assisted Radiology and Surgery, vol. 9, no. 2, pp. 283\u2013293, 2014. DOI: https:\/\/doi.org\/10.1007\/s11548-013-0926-3.","journal-title":"International Journal of Computer Assisted Radiology and Surgery"},{"key":"1597_CR31","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/j.compmedimag.2015.02.007","volume":"43","author":"J Bernal","year":"2015","unstructured":"J. Bernal, F. J. S\u00e1nchez, G. Fern\u00e1ndez-Esparrach, D. Gil, C. Rodr\u00edguez, F. Vilari\u00f1o. WM-DOVA maps for accurate polyp highlighting in colonoscopy: Validation vs. saliency maps from physicians. Computerized Medical Imaging and Graphics, vol. 43, pp. 99\u2013111, 2015. DOI: https:\/\/doi.org\/10.1016\/j.compmedimag.2015.02.007.","journal-title":"Computerized Medical Imaging and Graphics"},{"issue":"2","key":"1597_CR32","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1109\/TMI.2015.2487997","volume":"35","author":"N Tajbakhsh","year":"2016","unstructured":"N. Tajbakhsh, S. R. Gurudu, J. M. Liang. Automated polyp detection in colonoscopy videos using shape and context information. IEEE Transactions on Medical Imaging, vol. 35, no. 2, pp. 630\u2013644, 2016. DOI: https:\/\/doi.org\/10.1109\/TMI.2015.2487997.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR33","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1016\/j.media.2015.10.003","volume":"30","author":"M Ye","year":"2016","unstructured":"M. Ye, S. Giannarou, A. Meining, G. Z. Yang. Online tracking and retargeting with applications to optical biopsy in gastrointestinal endoscopic examinations. Medical Image Analysis, vol. 30, pp. 144\u2013157, 2016. DOI: https:\/\/doi.org\/10.1016\/j.media.2015.10.003.","journal-title":"Medical Image Analysis"},{"key":"1597_CR34","doi-asserted-by":"publisher","first-page":"4650","DOI":"10.1109\/IJCNN.2016.7727810","volume-title":"Proceedings of International Joint Conference on Neural Networks","author":"F Deeba","year":"2016","unstructured":"F. Deeba, F. M. Bui, K. A. Wahid. Automated Grow-Cut for segmentation of endoscopic images. In Proceedings of International Joint Conference on Neural Networks, Vancouver, Canada, pp. 4650\u20134657, 2016. DOI: https:\/\/doi.org\/10.1109\/IJCNN.2016.7727810."},{"issue":"1","key":"1597_CR35","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1109\/JBHI.2016.2635662","volume":"21","author":"R Zhang","year":"2017","unstructured":"R. Zhang, Y. Zheng, T. W. C. Mak, R. Yu, S. H. Wong, J. Y. Lau, C. C. Y. Poon. Automatic detection and classification of colorectal polyps by transferring low-level CNN features from nonmedical domain. IEEE Journal of Biomedical and Health Informatics, vol. 21, no. 1, pp. 41\u201347, 2017. DOI: https:\/\/doi.org\/10.1109\/JBHI.2016.2635662.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"issue":"9","key":"1597_CR36","doi-asserted-by":"publisher","first-page":"2051","DOI":"10.1109\/TMI.2016.2547947","volume":"35","author":"P Mesejo","year":"2016","unstructured":"P. Mesejo, D. Pizarro, A. Abergel, O. Rouquette, S. Beorchia, L. Poincloux, A. Bartoli. Computer-aided classification of gastrointestinal lesions in regular colonoscopy. IEEE Transactions on Medical Imaging, vol. 35, no. 9, pp. 2051\u20132063, 2016. DOI: https:\/\/doi.org\/10.1109\/TMI.2016.2547947.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR37","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1007\/978-3-319-67543-5_3","volume-title":"Proceedings of the 4th International Workshop on Computer Assisted and Robotic Endoscopy and Clinical Image-Based Procedures","author":"Q Angermann","year":"2017","unstructured":"Q. Angermann, J. Bernal, C. S\u00e1nchez-Montes, M. Hammami, G. Fern\u00e1ndez-Esparrach, X. Dray, O. Romain, F. J. S\u00e1nchez, A. Histace. Towards real-time polyp detection in colonoscopy videos: Adapting still frame-based methodologies for video sequences analysis. In Proceedings of the 4th International Workshop on Computer Assisted and Robotic Endoscopy and Clinical Image-Based Procedures, Qu\u00e9bec City, Canada, pp. 29\u201341, 2017. DOI: https:\/\/doi.org\/10.1007\/978-3-319-67543-5_3."},{"key":"1597_CR38","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1145\/3083187.3083212","volume-title":"Proceedings of the 8th ACM on Multimedia Systems Conference","author":"K Pogorelov","year":"2017","unstructured":"K. Pogorelov, K. R. Randel, C. Griwodz, S. L. Eskeland, T. de Lange, D. Johansen, C. Spampinato, D. T. Dang-Nguyen, M. Lux, P. T. Schmidt, M. Riegler, P. Halvorsen. KVASIR: A multi-class image dataset for computer aided gastrointestinal disease detection. In Proceedings of the 8th ACM on Multimedia Systems Conference, Taipei, China, pp. 164\u2013169, 2017. DOI: https:\/\/doi.org\/10.1145\/3083187.3083212."},{"key":"1597_CR39","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1145\/3083187.3083216","volume-title":"Proceedings of the 8th ACM on Multimedia Systems Conference","author":"K Pogorelov","year":"2017","unstructured":"K. Pogorelov, K. R. Randel, T. de Lange, S. L. Eskeland, C. Griwodz, D. Johansen, C. Spampinato, M. Taschwer, M. Lux, P. T. Schmidt, M. Riegler, P. Halvorsen. Nerthus: A bowel preparation quality video dataset. In Proceedings of the 8th ACM on Multimedia Systems Conference, Taipei, China, pp. 170\u2013174, 2017. DOI: https:\/\/doi.org\/10.1145\/3083187.3083216."},{"key":"1597_CR40","doi-asserted-by":"publisher","unstructured":"D. V\u00e1zquez, J. Bernal, F. J. S\u00e1nchez, G. Fern\u00e1ndez-Esparrach, A. M. L\u00f3pez, A. Romero, M. Drozdzal, A. Courville. A benchmark for endoluminal scene segmentation of colonoscopy images. Journal of Healthcare Engineering, vol. 2017, no. 1, Article number 4037190, 2017. DOI: https:\/\/doi.org\/10.1155\/2017\/4037190.","DOI":"10.1155\/2017\/4037190"},{"issue":"6","key":"1597_CR41","doi-asserted-by":"publisher","first-page":"E477","DOI":"10.1055\/s-0043-105488","volume":"5","author":"A Koulaouzidis","year":"2017","unstructured":"A. Koulaouzidis, D. K. Iakovidis, D. E. Yung, E. Rondonotti, U. Kopylov, J. N. Plevris, E. Toth, A. Eliakim, G. W. Johansson, W. Marlicz, G. Mavrogenis, A. Nemeth, H. Thorlacius, G. E. Tontini. KID project: An internet-based digital video atlas of capsule endoscopy for research purposes. Endoscopy International Open, vol. 5, no. 6, pp. E477\u2013E483, 2017. DOI: https:\/\/doi.org\/10.1055\/s-0043-105488.","journal-title":"Endoscopy International Open"},{"key":"1597_CR42","doi-asserted-by":"publisher","unstructured":"I. N. Figueiredo, L. Pinto, P. N. Figueiredo, R. Tsai. Unsupervised segmentation of colonic polyps in narrowband imaging data based on manifold representation of images and Wasserstein distance. Biomedical Signal Processing and Control, vol. 53, Article number 101577, 2019. DOI: https:\/\/doi.org\/10.1016\/j.bspc.2019.101577.","DOI":"10.1016\/j.bspc.2019.101577"},{"issue":"2","key":"1597_CR43","doi-asserted-by":"publisher","first-page":"E209","DOI":"10.1055\/a-0808-4456","volume":"7","author":"P N Figueiredo","year":"2019","unstructured":"P. N. Figueiredo, I. N. Figueiredo, L. Pinto, S. Kumar, Y. H. R. Tsai, A. V. Mamonov. Polyp detection with computer-aided diagnosis in white light colonoscopy: Comparison of three different methods. Endoscopy International Open, vol. 7, no. 2, pp. E209\u2013E215, 2019. DOI: https:\/\/doi.org\/10.1055\/a-0808-4456.","journal-title":"Endoscopy International Open"},{"key":"1597_CR44","doi-asserted-by":"publisher","first-page":"2578","DOI":"10.1145\/3343031.3356073","volume-title":"Proceedings of the 27th ACM International Conference on Multimedia","author":"T H Hoang","year":"2019","unstructured":"T. H. Hoang, H. D. Nguyen, V. A. Nguyen, T. A. Nguyen, V. T. Nguyen, M. T. Tran. Enhancing endoscopic image classification with symptom localization and data augmentation. In Proceedings of the 27th ACM International Conference on Multimedia, Nice, France, pp. 2578\u20132582, 2019. DOI: https:\/\/doi.org\/10.1145\/3343031.3356073."},{"key":"1597_CR45","doi-asserted-by":"publisher","unstructured":"M. Cho, J. H. Kim, K. S. Hong, J. S. Kim, H. J. Kong, S. Kim. Identification of cecum time-location in a colonoscopy video by deep learning analysis of colonoscope movement. PeerJ, vol. 7, Article number e7256, 2019. DOI: https:\/\/doi.org\/10.7717\/peerj.7256.","DOI":"10.7717\/peerj.7256"},{"key":"1597_CR46","volume-title":"Endoscopy artifact detection (EAD 2019) challenge dataset","author":"S Ali","year":"2019","unstructured":"S. Ali, F. Zhou, C. Daul, B. Braden, A. Bailey, S. Realdon, J. E. East, G. Wagni\u00e9res, V. Loschenov, E. Grisan, W. Blondel, J. Rittscher. Endoscopy artifact detection (EAD 2019) challenge dataset, [Online], Available: https:\/\/arxiv.org\/abs\/1905.03209, 2019."},{"key":"1597_CR47","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI45749.2020.9098406","volume-title":"Proceedings of the 17th International Symposium on Biomedical Imaging","author":"Y Liu","year":"2020","unstructured":"Y. Liu, Y. Tian, G. Maicas, L. Z. C. T. Pu, R. Singh, J. W. Verjans, G. Carneiro. Photoshopping colonoscopy video frames. In Proceedings of the 17th International Symposium on Biomedical Imaging, Iowa City, USA, 2020. DOI: https:\/\/doi.org\/10.1109\/ISBI45749.2020.9098406."},{"key":"1597_CR48","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1007\/978-3-030-37734-2_37","volume-title":"Proceedings of the 26th International Conference on MultiMedia Modeling","author":"D Jha","year":"2020","unstructured":"D. Jha, P. H. Smedsrud, M. A. Riegler, P. Halvorsen, T. de Lange, D. Johansen, H. D. Johansen. Kvasir-SEG: A segmented polyp dataset. In Proceedings of the 26th International Conference on MultiMedia Modeling, Daejeon, Republic of Korea pp. 451\u2013462, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-37734-2_37."},{"key":"1597_CR49","doi-asserted-by":"publisher","unstructured":"L. F. S\u00e1nchez-Peralta, J. B. Pagador, A. Pic\u00f3n, \u0152. J. Calder\u00f3n, F. Polo, N. Andraka, R. Bilbao, B. Glover, C. L. Saratxaga, F. M. S\u00e1nchez-Margallo. PICCOLO white-light and narrow-band imaging colonoscopic dataset: A performance comparative of models and datasets. Applied Sciences, vol. 10, no. 23, Article number 8501, 2020. DOI: https:\/\/doi.org\/10.3390\/app10238501.","DOI":"10.3390\/app10238501"},{"key":"1597_CR50","volume-title":"Endoscopy disease detection challenge 2020","author":"S Ali","year":"2020","unstructured":"S. Ali, N. Ghatwary, B. Braden, D. Lamarque, A. Bailey, S. Realdon, R. Cannizzaro, J. Rittscher, C. Daul, J. E. East. Endoscopy disease detection challenge 2020, [Online], Available: https:\/\/arxiv.org\/abs\/2003.03376, 2020."},{"issue":"3","key":"1597_CR51","doi-asserted-by":"publisher","first-page":"E415","DOI":"10.1055\/a-1035-9088","volume":"8","author":"R Leenhardt","year":"2020","unstructured":"R. Leenhardt, C. Li, J. P. Le Mouel, G. Rahmi, J. C. Saurin, F. Cholet, A. Boureille, X. Amiot, M. Delvaux, C. Duburque, C. Leandri, R. G\u00e9rard, S. Lecleire, F. Mesli, I. Nion-Larmurier, O. Romain, S. Sacher-Huvelin, C. Simon-Shane, G. Vanbiervliet, P. Marteau, A. Histace, X. Dray. CAD-CAP: A 25 000-image database serving the development of artificial intelligence for capsule endoscopy. Endoscopy International Open, vol. 8, no. 3, pp. E415\u2013E420, 2020. DOI: https:\/\/doi.org\/10.1055\/a-1035-9088.","journal-title":"Endoscopy International Open"},{"key":"1597_CR52","doi-asserted-by":"publisher","unstructured":"C. C. Y. Poon, Y. Jiang, R. Zhang, W. W. Y. Lo, M. S. H. Cheung, R. Yu, Y. Zheng, J. C. T. Wong, Q. Liu, S. H. Wong, T. W. C. Mak, J. Y. W. Lau. AI-doscopist: A real-time deep-learning-based algorithm for localising polyps in colonoscopy videos with edge computing devices. npj Digital Medicine, vol. 3, no. 1, Article number 73, 2020. DOI: https:\/\/doi.org\/10.1038\/s41746-020-0281-z.","DOI":"10.1038\/s41746-020-0281-z"},{"key":"1597_CR53","doi-asserted-by":"publisher","unstructured":"H. Borgli, V. Thambawita, P. H. Smedsrud, S. Hicks, D. Jha, S. L. Eskeland, K. R. Randel, K. Pogorelov, M. Lux, D. T. D. Nguyen, D. Johansen, C. Griwodz, H. K. Stensland, E. Garcia-Ceja, P. T. Schmidt, H. L. Hammer, M. A. Riegler, P. Halvorsen, T. de Lange. HyperKvasir, a comprehensive multi-class image and video dataset for gastrointestinal endoscopy. Scientific Data, vol. 7, no. 1, Article number 283, 2020. DOI: https:\/\/doi.org\/10.1038\/s41597-020-00622-y.","DOI":"10.1038\/s41597-020-00622-y"},{"issue":"4","key":"1597_CR54","doi-asserted-by":"publisher","first-page":"1134","DOI":"10.1109\/TMI.2020.3046843","volume":"40","author":"X Guo","year":"2021","unstructured":"X. Guo, C. Yang, Y. Liu, Y. Yuan. Learn to threshold: ThresholdNet with confidence-guided manifold mixup for polyp segmentation. IEEE Transactions on Medical Imaging, vol. 40, no. 4, pp. 1134\u20131146, 2021. DOI: https:\/\/doi.org\/10.1109\/TMI.2020.3046843.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR55","doi-asserted-by":"publisher","unstructured":"S. Ali, M. Dmitrieva, N. Ghatwary, S. Bano, G. Polat, A. Temizel, A. Krenzer, A. Hekalo, Y. B. Guo, B. Matuszewski, M. Gridach, I. Voiculescu, V. Yoganand, A. Chavan, A. Raj, N. T. Nguyen, D. Q. Tran, L. D. Huynh, N. Boutry, S. Rezvy, H. Chen, Y. H. Choi, A. Subramanian, V. Balasubramanian, X. W. Gao, H. Hu, Y. Liao, D. Stoyanov, C. Daul, S. Realdon, R. Cannizzaro, D. Lamarque, T. Tran-Nguyen, A. Bailey, B. Braden, J. E. East, J. Rittscher. Deep learning for detection and segmentation of artefact and disease instances in gastrointestinal endoscopy. Medical Image Analysis, vol. 70, Article number 102002, 2021. DOI: https:\/\/doi.org\/10.1016\/j.media.2021.102002.","DOI":"10.1016\/j.media.2021.102002"},{"key":"1597_CR56","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/978-3-030-90436-4_2","volume-title":"Proceedings of the 16th International Symposium on Advances in Visual Computing","author":"P Ngoc Lan","year":"2021","unstructured":"P. Ngoc Lan, N. S. An, D. V. Hang, D. V. Long, T. Q. Trung, N. T. Thuy, D. V. Sang. NeoUNet: Towards accurate colon polyp segmentation and neoplasm detection. In Proceedings of the 16th International Symposium on Advances in Visual Computing, pp. 15\u201328, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-90436-4_2."},{"key":"1597_CR57","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/9783-030-87234-2_3","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Q Wang","year":"2021","unstructured":"Q. Wang, H. Che, W. Ding, L. Xiang, G. Li, Z. Li, S. Cui. Colorectal polyp classification from white-light colonoscopy images via domain alignment. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 24\u201332, 2021. DOI: https:\/\/doi.org\/10.1007\/9783-030-87234-2_3."},{"key":"1597_CR58","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1007\/978-3-030-87240-3_37","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Ma","year":"2021","unstructured":"Y. Ma, X. Chen, K. Cheng, Y. Li, B. Sun. LDPoly-pVideo benchmark: A large-scale colonoscopy video dataset of diverse polyps. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 387\u2013396, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87240-3_37."},{"key":"1597_CR59","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/978-3-030-87199-4_29","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"N Celik","year":"2021","unstructured":"N. Celik, S. Ali, S. Gupta, B. Braden, J. Rittscher. EndoUDA: A modality independent segmentation approach for endoscopy imaging. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 303\u2013312, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87199-4_29."},{"key":"1597_CR60","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1007\/978-3-030-67835-7_19","volume-title":"Proceedings of the 27th International Conference on MultiMedia Modeling","author":"D Jha","year":"2021","unstructured":"D. Jha, S. Ali, K. Emanuelsen, S. A. Hicks, V. Thambawita, E. Garcia-Ceja, M. A. Riegler, T. de Lange, P. T. Schmidt, H. D. Johansen, D. Johansen, P. Halvorsen. Kvasir-instrument: Diagnostic and therapeutic tool segmentation dataset in gastrointestinal endoscopy. In Proceedings of the 27th International Conference on MultiMedia Modeling, Prague, Czech Republic, pp. 218\u2013229, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-67835-7_19."},{"key":"1597_CR61","doi-asserted-by":"publisher","unstructured":"W. Wang, J. Tian, C. Zhang, Y. Luo, X. Wang, J. Li. An improved deep learning approach and its applications on colonic polyp images detection. BMC Medical Imaging, vol. 20, no. 1, Article number 83, 2020. DOI: https:\/\/doi.org\/10.1186\/s12880-020-00482-3.","DOI":"10.1186\/s12880-020-00482-3"},{"issue":"7","key":"1597_CR62","doi-asserted-by":"publisher","first-page":"E1136","DOI":"10.1055\/a-1468-3964","volume":"9","author":"A de Maissin","year":"2021","unstructured":"A. de Maissin, R. Vall\u00e9e, M. Flamant, M. Fondain-Bossiere, C. Le Berre, A. Coutrot, N. Normand, H. Mouch\u00e9re, S. Coudol, C. Trang, A. Bourreille. Multi-expert annotation of Crohn\u2019 s disease images of the small bowel for automatic detection using a convolutional recurrent attention neural network. Endoscopy International Open, vol. 9, no. 7, pp. E1136\u2013E1144, 2021. DOI: https:\/\/doi.org\/10.1055\/a-1468-3964.","journal-title":"Endoscopy International Open"},{"key":"1597_CR63","doi-asserted-by":"publisher","unstructured":"Z. Kong, M. He, Q. Luo, X. Huang, P. Wei, Y. Cheng, L. Chen, Y. Liang, Y. Lu, X. Li, J. Chen. Multi-task classification and segmentation for explicable capsule endoscopy diagnostics. Frontiers in Molecular Biosciences, vol. 8, Article number 614277, 2021. DOI: https:\/\/doi.org\/10.3389\/fmolb.2021.614277.","DOI":"10.3389\/fmolb.2021.614277"},{"issue":"4","key":"1597_CR64","doi-asserted-by":"publisher","first-page":"960","DOI":"10.1016\/j.gie.2020.07.060","volume":"93","author":"M Misawa","year":"2021","unstructured":"M. Misawa, S. E. Kudo, Y. Mori, K. Hotta, K. Ohtsuka, T. Matsuda, S. Saito, T. Kudo, T. Baba, F. Ishida, H. Itoh, M. Oda, K. Mori. Development of a computer-aided detection system for colonoscopy and a publicly accessible large colonoscopy video database (with video). Gastrointestinal Endoscopy, vol. 93, no. 4, pp. 960\u2013967, 2021. DOI: https:\/\/doi.org\/10.1016\/j.gie.2020.07.060.","journal-title":"Gastrointestinal Endoscopy"},{"issue":"6","key":"1597_CR65","doi-asserted-by":"publisher","first-page":"2029","DOI":"10.1109\/JBHI.2021.3049304","volume":"25","author":"D Jha","year":"2021","unstructured":"D. Jha, P. H. Smedsrud, D. Johansen, T. de Lange, H. D. Johansen, P. Halvorsen, M. A. Riegler. A comprehensive study on colorectal polyp segmentation with ResUNet++, conditional random field and test-time augmentation. IEEE Journal of Biomedical and Health Informatics, vol. 25, no. 6, pp. 2029\u20132040, 2021. DOI: https:\/\/doi.org\/10.1109\/JBHI.2021.3049304.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR66","doi-asserted-by":"publisher","unstructured":"P. H. Smedsrud, V. Thambawita, S. A. Hicks, H. Gjestang, O. O. Nedrejord, E. N\u00e6ss, H. Borgli, D. Jha, T. J. D. Berstad, S. L. Eskeland, M. Lux, H. Espeland, A. Petlund, D. T. D. Nguyen, E. Garcia-Ceja, D. Johansen, P. T. Schmidt, E. Toth, H. L. Hammer, T. de Lange, M. A. Riegler, P. Halvorsen. Kvasir-Capsule, a video capsule endoscopy dataset. Scientific Data, vol. 8, no. 1, Article number 142, 2021. DOI: https:\/\/doi.org\/10.1038\/s41597-021-00920-z.","DOI":"10.1038\/s41597-021-00920-z"},{"key":"1597_CR67","doi-asserted-by":"publisher","unstructured":"K. Li, M. I. Fathan, K. Patel, T. Zhang, C. Zhong, A. Bansal, A. Rastogi, J. S. Wang, G. Wang. Colonoscopy polyp detection and classification: Dataset creation and comparative evaluations. PLoS One, vol. 16, no. 8, Article number e0255809, 2021. DOI: https:\/\/doi.org\/10.1371\/journal.pone.0255809.","DOI":"10.1371\/journal.pone.0255809"},{"key":"1597_CR68","volume-title":"ERS: A novel comprehensive endoscopy image dataset for machine learning, compliant with the MST 3.0 specification","author":"J Cychnerski","year":"2022","unstructured":"J. Cychnerski, T. Dziubich, A. Brzeski. ERS: A novel comprehensive endoscopy image dataset for machine learning, compliant with the MST 3.0 specification, [Online], Available: https:\/\/arxiv.org\/abs\/2201.08746, 2022."},{"key":"1597_CR69","doi-asserted-by":"publisher","first-page":"88","DOI":"10.1007\/978-3-031-16437-8_9","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Tian","year":"2022","unstructured":"Y. Tian, G. Pang, F. Liu, Y. Liu, C. Wang, Y. Chen, J. Verjans, G. Carneiro. Contrastive transformer-based multiple instance learning for weakly supervised polyp frame detection. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 88\u201398, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_9."},{"key":"1597_CR70","doi-asserted-by":"publisher","unstructured":"F. J. P. Montalbo. Diagnosing gastrointestinal diseases from endoscopy images through a multi-fused CNN with auxiliary layers, alpha dropouts, and a fusion residual block. Biomedical Signal Processing and Control, vol. 76, Article number 103683, 2022. DOI: https:\/\/doi.org\/10.1016\/j.bspc.2022.103683.","DOI":"10.1016\/j.bspc.2022.103683"},{"key":"1597_CR71","volume-title":"Endoscopic computer vision challenges 2.0","author":"S Ali","year":"2025","unstructured":"S. Ali, N. Ghatwary. Endoscopic computer vision challenges 2.0, [Online], Available: https:\/\/endocv2022.grand-challenge.org\/, 2025."},{"key":"1597_CR72","doi-asserted-by":"publisher","unstructured":"V. Thambawita, P. Salehi, S. A. Sheshkal, S. A. Hicks, H. L. Hammer, S. Parasa, T. de Lange, P. Halvorsen, M. A. Riegler. SinGAN-Seg: Synthetic training data generation for medical image segmentation. PLoS One, vol. 17, no. 5, Article number e0267976, 2022. DOI: https:\/\/doi.org\/10.1371\/journal.pone.0267976.","DOI":"10.1371\/journal.pone.0267976"},{"issue":"11","key":"1597_CR73","doi-asserted-by":"publisher","first-page":"1397","DOI":"10.1080\/00365521.2022.2085059","volume":"57","author":"D Fitting","year":"2022","unstructured":"D. Fitting, A. Krenzer, J. Troya, M. Banck, B. Sudarevic, M. Brand, W. B\u00f6ck, W. G. Zoller, T. R\u00f6sch, F. Puppe, A. Meining, A. Hann. A video based benchmark data set (ENDOTEST) to evaluate computer-aided polyp detection systems. Scandinavian Journal of Gastroenterology, vol. 57, no. 11, pp. 1397\u20131403, 2022. DOI: https:\/\/doi.org\/10.1080\/00365521.2022.2085059.","journal-title":"Scandinavian Journal of Gastroenterology"},{"key":"1597_CR74","volume-title":"Proceedings of CLEF: Conference and Labs of the Evaluation Forum","author":"S Hicks","year":"2023","unstructured":"S. Hicks, A. Stor\u00e5s, P. Halvorsen, T. de Lange, M. Riegler, V. Thambawita. Overview of ImageCLEFmedical 2023-medical visual question answering for gastrointestinal tract. In Proceedings of CLEF: Conference and Labs of the Evaluation Forum, Thessaloniki, Greece, 2023."},{"key":"1597_CR75","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/978-3-031-47679-2_10","volume-title":"Proceedings of the 1st International Workshop on Machine Learning for Multimodal Healthcare Data","author":"D Jha","year":"2023","unstructured":"D. Jha, V. Sharma, N. Dasu, N. K. Tomar, S. Hicks, M. K. Bhuyan, P. K. Das, M. A. Riegler, P. Halvorsen, U. Bagci, T. de Lange. GastroVision: A multi-class endoscopy image dataset for computer aided gastrointestinal disease detection. In Proceedings of the 1st International Workshop on Machine Learning for Multimodal Healthcare Data, Honolulu, USA, pp. 125\u2013140, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-47679-2_10."},{"key":"1597_CR76","doi-asserted-by":"publisher","first-page":"4355","DOI":"10.1109\/CVPRW59228.2023.00458","volume-title":"Proceedings of IEEE\/CVF Conference on Com hyphenputer Vision and Pattern Recognition Workshops","author":"G Ren","year":"2023","unstructured":"G. Ren, M. Lazarou, J. Yuan, T. Stathaki. Towards automated polyp segmentation using weakly- and semi-supervised learning and deformable transformers. In Proceedings of IEEE\/CVF Conference on Com hyphenputer Vision and Pattern Recognition Workshops, Vancouver, Canada, pp. 4355\u20134364, 2023. DOI: 4364, 2023. DOI: https:\/\/doi.org\/10.1109\/CVPRW59228.2023.00458."},{"issue":"9","key":"1597_CR77","doi-asserted-by":"publisher","first-page":"1431","DOI":"10.1093\/ibd\/izac226","volume":"29","author":"G Polat","year":"2023","unstructured":"G. Polat, H. T. Kani, I. Ergenc, Y. Ozen Alahdab, A. Temizel, O. Atug. Improving the computer-aided estimation of ulcerative colitis severity according to mayo endoscopic score by using regression-based deep learning. Inflammatory Bowel Diseases, vol. 29, no. 9, pp. 1431\u20131439, 2023. DOI: https:\/\/doi.org\/10.1093\/ibd\/izac226.","journal-title":"Inflammatory Bowel Diseases"},{"key":"1597_CR78","doi-asserted-by":"publisher","unstructured":"S. Ali, D. Jha, N. Ghatwary, S. Realdon, R. Cannizzaro, O. E. Salem, D. Lamarque, C. Daul, M. A. Riegler, K. V. Anonsen, A. Petlund, P. Halvorsen, J. Rittscher, T. de Lange, J. E. East. A multi-centre polyp detection and segmentation dataset for generalisability assessment. Scientific Data, vol. 10, no. 1, Article number 75, 2023. DOI: https:\/\/doi.org\/10.1038\/s41597-023-01981-y.","DOI":"10.1038\/s41597-023-01981-y"},{"key":"1597_CR79","doi-asserted-by":"publisher","unstructured":"D. Wang, X. Wang, L. Wang, M. Li, Q. Da, X. Liu, X. Gao, J. Shen, J. He, T. Shen, Q. Duan, J. Zhao, K. Li, Y. Qiao, S. Zhang. A real-world dataset and benchmark for foundation model adaptation in medical image classification. Scientific Data, vol. 10, no. 1, Article number 574, 2023. DOI: https:\/\/doi.org\/10.1038\/s41597-023-02460-0.","DOI":"10.1038\/s41597-023-02460-0"},{"key":"1597_CR80","doi-asserted-by":"publisher","DOI":"10.17632\/8pbbjf274w.1","volume-title":"Gastrointestinal bleeding WCE images dataset","author":"H Khan","year":"2025","unstructured":"H. Khan, H. Malik. Gastrointestinal bleeding WCE images dataset, [Online], Available: https:\/\/doi.org\/10.17632\/8pbbjf274w.1, 2025."},{"key":"1597_CR81","doi-asserted-by":"publisher","unstructured":"C. Biffi, G. Antonelli, S. Bernhofer, C. Hassan, D. Hirata, M. Iwatate, A. Maieron, P. Salvagnini, A. Cherubini. REAL-Colon: A dataset for developing real-world AI applications in colonoscopy. Scientific Data, vol. 11, no. 1, Article number 539, 2024. DOI: https:\/\/doi.org\/10.1038\/s41597-024-03359-0.","DOI":"10.1038\/s41597-024-03359-0"},{"issue":"12","key":"1597_CR82","doi-asserted-by":"publisher","first-page":"4105","DOI":"10.1109\/TMI.2024.3411933","volume":"43","author":"Z Xu","year":"2024","unstructured":"Z. Xu, J. Rittscher, S. Ali. SSL-CPCD: Self-supervised learning with composite pretext-class discrimination for improved generalisability in endoscopic image analysis. IEEE Transactions on Medical Imaging, vol. 43, no. 12, pp. 4105\u20134119, 2024. DOI: https:\/\/doi.org\/10.1109\/TMI.2024.3411933.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR83","doi-asserted-by":"publisher","DOI":"10.1145\/3689096.3689458","volume-title":"Proceedings of the 1st International Workshop on Vision-Language Models for Biomedical Applications","author":"S Gautam","year":"2024","unstructured":"S. Gautam, A. Stor\u00e5s, C. Midoglu, S. A. Hicks, V. Thambawita, P. Halvorsen, M. A. Riegler. Kvasir-VQA: A text-image pair GI tract dataset. In Proceedings of the 1st International Workshop on Vision-Language Models for Biomedical Applications, Melbourne, Australia, 2024. DOI: https:\/\/doi.org\/10.1145\/3689096.3689458."},{"key":"1597_CR84","volume-title":"Capsule vision 2024 challenge: Multi-class abnormality classification for video capsule endoscopy","author":"P Handa","year":"2024","unstructured":"P. Handa, A. Mahbod, F. Schwarzhans, R. Woitek, N. Goel, D. Chhabra, S. Jha, M. Dhir, D. Gunjan, J. Kakarla, B. Raman. Capsule vision 2024 challenge: Multi-class abnormality classification for video capsule endoscopy, [Online], Available: https:\/\/arxiv.org\/abs\/2408.04940, 2024."},{"key":"1597_CR85","volume-title":"COLON: The largest COlonoscopy LONg sequence public database","author":"L Ruiz","year":"2024","unstructured":"L. Ruiz, F. Sierra-Jerez, J. Ruiz, F. Mart\u00cdNez. COLON: The largest COlonoscopy LONg sequence public database, [Online], Available: https:\/\/arxiv.org\/abs\/2403.00663, 2024."},{"key":"1597_CR86","volume-title":"WCEBleedGen: A wireless capsule endoscopy dataset and its benchmarking for automatic bleeding classification, detection, and segmentation","author":"P Handa","year":"2024","unstructured":"P. Handa, M. Dhir, A. Mahbod, F. Schwarzhans, R. Woitek, N. Goel, D. Gunjan. WCEBleedGen: A wireless capsule endoscopy dataset and its benchmarking for automatic bleeding classification, detection, and segmentation, [Online], Available: https:\/\/arxiv.org\/abs\/2408.12466, 2024."},{"key":"1597_CR87","volume-title":"PolypDB: A curated multi-center dataset for development of AI algorithms in colonoscopy","author":"D Jha","year":"2024","unstructured":"D. Jha, N. K. Tomar, V. Sharma, Q. H. Trinh, K. Biswas, H. Pan, R. K. Jha, G. Durak, A. Hann, J. Varkey, H. V. Dao, L. Van Dao, B. P. Nguyen, N. Papachrysos, B. Rieders, P. T. Schmidt, E. Geissler, T. Berzin, P. Halvorsen, M. A. Riegler, T. de Lange, U. Bagci. PolypDB: A curated multi-center dataset for development of AI algorithms in colonoscopy, [Online], Available: https:\/\/arxiv.org\/abs\/2409.00045, 2024."},{"issue":"suppl_1","key":"1597_CR88","doi-asserted-by":"publisher","first-page":"S45","DOI":"10.1111\/j.1753-4887.2012.00505.x","volume":"70","author":"W M de Vos","year":"2012","unstructured":"W. M. de Vos, E. A. J. de Vos. Role of the intestinal microbiome in health and disease: From correlation to causation. Nutrition Reviews, vol. 70, no. suppl_1, pp. S45\u2013S56, 2012. DOI: https:\/\/doi.org\/10.1111\/j.1753-4887.2012.00505.x.","journal-title":"Nutrition Reviews"},{"key":"1597_CR89","doi-asserted-by":"publisher","unstructured":"Y. Li, P. Agarwal. A pathway-based view of human diseases and disease relationships. PLoS One, vol. 4, no. 2, Article number e4346, 2009. DOI: https:\/\/doi.org\/10.1371\/journal.pone.0004346.","DOI":"10.1371\/journal.pone.0004346"},{"issue":"11","key":"1597_CR90","first-page":"661","volume":"98","author":"B Veauthier","year":"2018","unstructured":"B. Veauthier, J. R. Hornecker. Crohn\u2019s disease: Diagnosis and management. American Family Physician, vol. 98, no. 11, pp. 661\u2013669, 2018.","journal-title":"American Family Physician"},{"issue":"7","key":"1597_CR91","doi-asserted-by":"publisher","first-page":"1837","DOI":"10.1007\/s11263-022-01622-8","volume":"130","author":"L Yang","year":"2022","unstructured":"L. Yang, H. Jiang, Q. Song, J. Guo. A survey on long-tailed visual recognition. International Journal of Computer Vision, vol. 130, no. 7, pp. 1837\u20131872, 2022. DOI: https:\/\/doi.org\/10.1007\/s11263-022-01622-8.","journal-title":"International Journal of Computer Vision"},{"issue":"7","key":"1597_CR92","doi-asserted-by":"publisher","first-page":"5092","DOI":"10.1109\/TPAMI.2024.3361862","volume":"46","author":"J Wu","year":"2024","unstructured":"J. Wu, X. Li, S. Xu, H. Yuan, H. Ding, Y. Yang, X. Li, J. Zhang, Y. Tong, X. Jiang, B. Ghanem, D. Tao. Towards open vocabulary learning: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 46, no. 7, pp. 5092\u20135113, 2024. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2024.3361862.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"7972","key":"1597_CR93","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1038\/s41586-023-06291-2","volume":"620","author":"K Singhal","year":"2023","unstructured":"K. Singhal, S. Azizi, T. Tu, S. S. Mahdavi, J. Wei, H. W. Chung, N. Scales, A. Tanwani, H. Cole-Lewis, S. Pfohl, P. Payne, M. Seneviratne, P. Gamble, C. Kelly, A. Babiker, N. Sch\u00e4rli, A. Chowdhery, P. Mansfield, D. Demner-Fushman, B. A. Y. Arcas, D. Webster, G. S. Corrado, Y. Matias, K. Chou, J. Gottweis, N. Tomasev, Y. Liu, A. Rajkomar, J. Barral, C. Semturs, A. Karthikesalingam, V. Natarajan. Large language models encode clinical knowledge. Nature, vol. 620, no. 7972, pp. 172\u2013180, 2023. DOI: https:\/\/doi.org\/10.1038\/s41586-023-06291-2.","journal-title":"Nature"},{"key":"1597_CR94","doi-asserted-by":"publisher","first-page":"1195","DOI":"10.1109\/CVPR46437.2021.00125","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"J Zhang","year":"2021","unstructured":"J. Zhang, Y. Xie, Y. Xia, C. Shen. DoDNet: Learning to segment multi-organ and tumors from multiple partially labeled datasets. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Nashville, USA, pp. 1195\u20131204, 2021. DOI: https:\/\/doi.org\/10.1109\/CVPR46437.2021.00125."},{"key":"1597_CR95","doi-asserted-by":"publisher","unstructured":"D. Karimi, H. Dou, S. K. Warfield, A. Gholipour. Deep learning with noisy labels: Exploring techniques and remedies in medical image analysis. Medical Image Analysis, vol. 65, Article number 101759, 2020. DOI: https:\/\/doi.org\/10.1016\/j.media.2020.101759.","DOI":"10.1016\/j.media.2020.101759"},{"key":"1597_CR96","first-page":"I-593","volume-title":"Proceedings of the 31st International Conference on International Conference on Machine Learning","author":"H F Yu","year":"2014","unstructured":"H. F. Yu, P. Jain, P. Kar, I. Dhillon. Large-scale multilabel learning with missing labels. In Proceedings of the 31st International Conference on International Conference on Machine Learning, Beijing, China, pp.I\u2013593\u2013I\u2013601, 2014."},{"key":"1597_CR97","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1145\/2647868.2654889","volume-title":"Proceedings of the 22nd ACM International Conference on Multimedia","author":"Y Jia","year":"2014","unstructured":"Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick, S. Guadarrama, T. Darrell. Caffe: Convolutional architecture for fast feature embedding. In Proceedings of the 22nd ACM International Conference on Multimedia, Orlando, USA, pp. 675\u2013678, 2014. DOI: https:\/\/doi.org\/10.1145\/2647868.2654889."},{"key":"1597_CR98","doi-asserted-by":"publisher","first-page":"2261","DOI":"10.1109\/CVPR.2017.243","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"G Huang","year":"2017","unstructured":"G. Huang, Z. Liu, L. Van Der Maaten, K. Q. Weinberger. Densely connected convolutional networks. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Honolulu, USA, pp. 2261\u20132269, 2017. DOI: https:\/\/doi.org\/10.1109\/CVPR.2017.243."},{"key":"1597_CR99","doi-asserted-by":"publisher","first-page":"770","DOI":"10.1109\/CVPR.2016.90","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"K He","year":"2016","unstructured":"K. He, X. Zhang, S. Ren, J. Sun. Deep residual learning for image recognition. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, USA, pp. 770\u2013778, 2016. DOI: https:\/\/doi.org\/10.1109\/CVPR.2016.90."},{"key":"1597_CR100","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"A Dosovitskiy","year":"2021","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, J. Uszkoreit, N. Houlsby. An image is worth 16\u00d716 words: Transformers for image recognition at scale. In Proceedings of the 9th International Conference on Learning Representations, 2021."},{"key":"1597_CR101","doi-asserted-by":"publisher","first-page":"4510","DOI":"10.1109\/CVPR.2018.00474","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"M Sandler","year":"2018","unstructured":"M. Sandler, A. Howard, M. Zhu, A. Zhmoginov, L. C. Chen. MobileNetV2: Inverted residuals and linear bottlenecks. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Salt Lake City, USA, pp. 4510\u20134520, 2018. DOI: https:\/\/doi.org\/10.1109\/CVPR.2018.00474."},{"key":"1597_CR102","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01234-2_1","volume-title":"Proceedings of the 15th European Conference on Computer Vision","author":"S Woo","year":"2018","unstructured":"S. Woo, J. Park, J. Y. Lee, I. S. Kweon. CBAM: Convolutional block attention module. In Proceedings of the 15th European Conference on Computer Vision, Munich, Germany, pp. 3\u201319, 2018. DOI: https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1."},{"key":"1597_CR103","doi-asserted-by":"publisher","first-page":"4489","DOI":"10.1109\/ICCV.2015.510","volume-title":"Proceedings of IEEE International Conference on Computer Vision","author":"D Tran","year":"2015","unstructured":"D. Tran, L. Bourdev, R. Fergus, L. Torresani, M. Paluri. Learning spatiotemporal features with 3D convolutional networks. In Proceedings of IEEE International Conference on Computer Vision, Santiago, Chile, pp. 4489\u20134497, 2015. DOI: https:\/\/doi.org\/10.1109\/ICCV.2015.510."},{"key":"1597_CR104","doi-asserted-by":"publisher","first-page":"2818","DOI":"10.1109\/CVPR.2016.308","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"C Szegedy","year":"2016","unstructured":"C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, Z. Wojna. Rethinking the inception architecture for computer vision. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, USA, pp. 2818\u20132826, 2016. DOI: https:\/\/doi.org\/10.1109\/CVPR.2016.308."},{"key":"1597_CR105","doi-asserted-by":"publisher","first-page":"4724","DOI":"10.1109\/CVPR.2017.502","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"J Carreira","year":"2017","unstructured":"J. Carreira, A. Zisserman. Quo vadis, action recognition? A new model and the kinetics dataset. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Honolulu, USA, pp. 4724\u20134733, 2017. DOI: https:\/\/doi.org\/10.1109\/CVPR.2017.502."},{"key":"1597_CR106","doi-asserted-by":"publisher","first-page":"620","DOI":"10.1007\/978-3-030-00934-2_69","volume-title":"Proceedings of the 21st International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Yuan","year":"2018","unstructured":"Y. Yuan, W. Qin, B. Ibragimov, B. Han, L. Xing. RIIS-DenseNet: Rotation-invariant and image similarity constrained densely connected convolutional network for polyp detection. In Proceedings of the 21st International Conference on Medical Image Computing and Computer Assisted Intervention, Granada, Spain, pp. 620\u2013628, 2018. DOI: https:\/\/doi.org\/10.1007\/978-3-030-00934-2_69."},{"key":"1597_CR107","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1007\/978-3-030-59725-2_27","volume-title":"Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Tian","year":"2020","unstructured":"Y. Tian, G. Maicas, L. Z. C. T. Pu, R. Singh, J. W. Verjans, G. Carneiro. Few-shot anomaly detection for polyp frames from colonoscopy. In Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention, Lima, Peru, pp. 274\u2013284, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-59725-2_27."},{"key":"1597_CR108","doi-asserted-by":"publisher","first-page":"742","DOI":"10.1007\/978-3-030-59716-0_71","volume-title":"Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"H Gammulle","year":"2020","unstructured":"H. Gammulle, S. Denman, S. Sridharan, C. Fookes. Two-stream deep feature modelling for automated video endoscopy data analysis. In Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention, Lima, Peru, pp. 742\u2013751, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-59716-0_71."},{"key":"1597_CR109","doi-asserted-by":"publisher","unstructured":"G. Carneiro, L. Z. C. T. Pu, R. Singh, A. Burt. Deep learning uncertainty and confidence calibration for the five-class polyp classification from colonoscopy. Medical Image Analysis, vol. 62, Article number 101653, 2020. DOI: https:\/\/doi.org\/10.1016\/j.media.2020.101653.","DOI":"10.1016\/j.media.2020.101653"},{"key":"1597_CR110","doi-asserted-by":"publisher","unstructured":"X. Guo, Y. Yuan. Semi-supervised WCE image classification with adaptive aggregated attention. Medical Image Analysis, vol. 64, Article number 101733, 2020. DOI: https:\/\/doi.org\/10.1016\/j.media.2020.101733.","DOI":"10.1016\/j.media.2020.101733"},{"key":"1597_CR111","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/978-3-031-16437-8_14","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"W Ma","year":"2022","unstructured":"W. Ma, Y. Zhu, R. Zhang, J. Yang, Y. Hu, Z. Li, L. Xiang. Toward clinically assisted colorectal polyp recognition via structured cross-modal representation consistency. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 141\u2013150, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_14."},{"key":"1597_CR112","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1007\/978-3-031-16437-8_8","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"K N Wang","year":"2022","unstructured":"K. N. Wang, Y. He, S. Zhuang, J. Miao, X. He, P. Zhou, G. Yang, G. Q. Zhou, S. Li. FFCNet: Fourier transform-based frequency learning and complex convolutional network for colon disease classification. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 78\u201387, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_8."},{"key":"1597_CR113","doi-asserted-by":"publisher","unstructured":"K. N. Wang, S. Zhuang, Q. Y. Ran, P. Zhou, J. Hua, G. Q. Zhou, X. He. DLGNet: A dual-branch lesionaware network with the supervised Gaussian mixture model for colon lesions classification in colonoscopy images. Medical Image Analysis, vol. 87, Article number 102832, 2023. DOI: https:\/\/doi.org\/10.1016\/j.media.2023.102832.","DOI":"10.1016\/j.media.2023.102832"},{"key":"1597_CR114","doi-asserted-by":"publisher","unstructured":"G. Yue, P. Wei, Y. Liu, Y. Luo, J. Du, T. Wang. Automated endoscopic image classification via deep neural network with class imbalance loss. IEEE Transactions on Instrumentation and Measurement, vol. 72, Article number 5010611, 2023. DOI: https:\/\/doi.org\/10.1109\/TIM.2023.3264047.","DOI":"10.1109\/TIM.2023.3264047"},{"key":"1597_CR115","doi-asserted-by":"publisher","unstructured":"Y. Luo, X. Guo, L. Liu, Y. Yuan. Dynamic attribute-guided few-shot open-set network for medical image diagnosis. Expert Systems with Applications, vol. 251, Article number 124098, 2024. DOI: https:\/\/doi.org\/10.1016\/j.eswa.2024.124098.","DOI":"10.1016\/j.eswa.2024.124098"},{"key":"1597_CR116","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1007\/978-3-030-00934-2_68","volume-title":"Proceedings of the 21st International Conference on Medical Image Computing and Computer Assisted Intervention","author":"H Itoh","year":"2018","unstructured":"H. Itoh, H. R. Roth, L. Lu, M. Oda, M. Misawa, Y. Mori, S. E. Kudo, K. Mori. Towards automated colonoscopy diagnosis: Binary polyp size estimation via unsupervised depth learning. In Proceedings of the 21st International Conference on Medical Image Computing and Computer Assisted Intervention, Granada, Spain, pp. 611\u2013619, 2018. DOI: https:\/\/doi.org\/10.1007\/978-3-030-00934-2_68."},{"issue":"8","key":"1597_CR117","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"S. Hochreiter, J. Schmidhuber. Long short-term memory. Neural Computation, vol. 9, no. 8, pp. 1735\u20131780, 1997. DOI: https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735.","journal-title":"Neural Computation"},{"issue":"1","key":"1597_CR118","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1136\/gutjnl-2017-314547","volume":"68","author":"M F Byrne","year":"2019","unstructured":"M. F. Byrne, N. Chapados, F. Soudan, C. Oertel, M. L. P\u00e9rez, R. Kelly, N. Iqbal, F. Chandelier, D. K. Rex. Real-time differentiation of adenomatous and hyperplastic diminutive colorectal polyps during analysis of unaltered videos of standard colonoscopy using a deep learning model. Gut, vol. 68, no. 1, pp. 94\u2013100, 2019. DOI: https:\/\/doi.org\/10.1136\/gutjnl-2017-314547.","journal-title":"Gut"},{"key":"1597_CR119","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1007\/978-3-031-21083-9_3","volume-title":"Proceedings of the 1st MICCAI Workshop on Imaging Systems for GI Endoscopy, and Graphs in Biomedical Image Analysis","author":"A Tamhane","year":"2022","unstructured":"A. Tamhane, T. Mida, E. Posner, M. Bouhnik. Colonoscopy landmark detection using vision transformers. In Proceedings of the 1st MICCAI Workshop on Imaging Systems for GI Endoscopy, and Graphs in Biomedical Image Analysis, Singapore, pp. 24\u201334, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-21083-9_3."},{"key":"1597_CR120","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"L Zhu","year":"2024","unstructured":"L. Zhu, B. Liao, Q. Zhang, X. Wang, W. Liu, X. Wang. Vision mamba: Efficient visual representation learning with bidirectional state space model. In Proceedings of the 41st International Conference on Machine Learning, Vienna, Austria, Article number 2584, 2024."},{"key":"1597_CR121","first-page":"8748","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"A Radford","year":"2021","unstructured":"A. Radford, J. W. Kim, C. Hallacy, A. Ramesh, G. Goh, S. Agarwal, G. Sastry, A. Askell, P. Mishkin, J. Clark, G. Krueger, I. Sutskever. Learning transferable visual models from natural language supervision. In Proceedings of the 38th International Conference on Machine Learning, pp. 8748\u20138763, 2021."},{"key":"1597_CR122","doi-asserted-by":"publisher","unstructured":"X. Yang, Q. Wei, C. Zhang, K. Zhou, L. Kong, W. Jiang. Colon polyp detection and segmentation based on improved MRCNN. IEEE Transactions on Instrumentation and Measurement, vol. 70, Article number 4501710, 2021. DOI: https:\/\/doi.org\/10.1109\/TIM.2020.3038011.","DOI":"10.1109\/TIM.2020.3038011"},{"key":"1597_CR123","doi-asserted-by":"publisher","unstructured":"X. Liu, X. Guo, Y. Liu, Y. Yuan. Consolidated domain adaptive detection and localization framework for cross-device colonoscopic images. Medical Image Analysis, vol. 71, Article number 102052, 2021. DOI: https:\/\/doi.org\/10.1016\/j.media.2021.102052.","DOI":"10.1016\/j.media.2021.102052"},{"key":"1597_CR124","doi-asserted-by":"publisher","unstructured":"H. A. Qadir, Y. Shin, J. Solhusvik, J. Bergsland, L. Aabakken, I. Balasingham. Toward real-time polyp detection using fully CNNs for 2D Gaussian shapes prediction. Medical Image Analysis, vol. 68, Article number 101897, 2021. DOI: https:\/\/doi.org\/10.1016\/j.media.2020.101897.","DOI":"10.1016\/j.media.2020.101897"},{"key":"1597_CR125","doi-asserted-by":"publisher","unstructured":"I. Pacal, A. Karaman, D. Karaboga, B. Akay, A. Basturk, U. Nalbantoglu, S. Coskun. An efficient real-time colonic polyp detection with yolo algorithms trained by using negative samples and large datasets. Computers in Biology and Medicine, vol. 141, Article number 105031, 2022. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2021.105031.","DOI":"10.1016\/j.compbiomed.2021.105031"},{"issue":"7","key":"1597_CR126","doi-asserted-by":"publisher","first-page":"1897","DOI":"10.1109\/TMI.2022.3150435","volume":"41","author":"X Liu","year":"2022","unstructured":"X. Liu, Y. Yuan. A source-free domain adaptive polyp detection framework with style diversification flow. IEEE Transactions on Medical Imaging, vol. 41, no. 7, pp. 1897\u20131908, 2022. DOI: https:\/\/doi.org\/10.1109\/TMI.2022.3150435.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR127","doi-asserted-by":"publisher","unstructured":"R. Gong, S. He, T. Tian, J. Chen, Y. Hao, C. Qiao. FRCNN-AA-CIF: An automatic detection model of colon polyps based on attention awareness and context information fusion. Computers in Biology and Medicine, vol. 158, Article number 106787, 2023. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2023.106787.","DOI":"10.1016\/j.compbiomed.2023.106787"},{"key":"1597_CR128","doi-asserted-by":"publisher","DOI":"10.1117\/12.2653048","volume-title":"Proceedings of SPIE 12465, Medical Imaging Computer-Aided Diagnosis","author":"M R Haugland","year":"2023","unstructured":"M. R. Haugland, H. A. Qadir, I. Balasingham. Deep learning for improved polyp detection from synthetic narrow-band imaging. In Proceedings of SPIE 12465, Medical Imaging Computer-Aided Diagnosis, San Diego, USA, Article number 124651N, 2023. DOI: https:\/\/doi.org\/10.1117\/12.2653048."},{"key":"1597_CR129","doi-asserted-by":"publisher","first-page":"7051","DOI":"10.1109\/TMM.2022.3217388","volume":"25","author":"W Li","year":"2023","unstructured":"W. Li, X. Liu, Y. Yuan. SCAN++: Enhanced semantic conditioned adaptation for domain adaptive object detection. IEEE Transactions on Multimedia, vol. 25, pp. 7051\u20137061, 2023. DOI: https:\/\/doi.org\/10.1109\/TMM.2022.3217388.","journal-title":"IEEE Transactions on Multimedia"},{"issue":"6","key":"1597_CR130","doi-asserted-by":"publisher","first-page":"7287","DOI":"10.1109\/TNNLS.2023.3272389","volume":"35","author":"X Liu","year":"2024","unstructured":"X. Liu, W. Li, Y. Yuan. Decoupled unbiased teacher for source-free domain adaptive medical object detection. IEEE Transactions on Neural Networks and Learning Systems, vol. 35, no. 6, pp. 7287\u20137298, 2024. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2023.3272389.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"1597_CR131","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1007\/978-3-319-19992-4_25","volume-title":"Proceedings of the 24th International Conference on Information Processing in Medical Imaging","author":"N Tajbakhsh","year":"2015","unstructured":"N. Tajbakhsh, S. R. Gurudu, J. Liang. A comprehensive computer-aided polyp detection system for colonoscopy videos. In Proceedings of the 24th International Conference on Information Processing in Medical Imaging, Isle of Skye, UK, pp. 327\u2013338, 2015. DOI: https:\/\/doi.org\/10.1007\/978-3-319-19992-4_25."},{"issue":"1","key":"1597_CR132","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1109\/JBHI.2016.2637004","volume":"21","author":"L Yu","year":"2017","unstructured":"L. Yu, H. Chen, Q. Dou, J. Qin, P. A. Heng. Integrating online and offline three-dimensional deep learning for automated polyp detection in colonoscopy videos. IEEE Journal of Biomedical and Health Informatics, vol. 21, no. 1, pp. 65\u201375, 2017. DOI: https:\/\/doi.org\/10.1109\/JBHI.2016.2637004.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR133","doi-asserted-by":"publisher","first-page":"3929","DOI":"10.1109\/ICPR.2018.8545174","volume-title":"Proceedings of the 24th International Conference on Pattern Recognition","author":"X Mo","year":"2018","unstructured":"X. Mo, K. Tao, Q. Wang, G. Wang. An efficient approach for polyps detection in endoscopic videos based on faster R-CNN. In Proceedings of the 24th International Conference on Pattern Recognition, Beijing, China, pp. 3929\u20133934, 2018. DOI: https:\/\/doi.org\/10.1109\/ICPR.2018.8545174."},{"issue":"1","key":"1597_CR134","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1109\/JBHI.2019.2907434","volume":"24","author":"H A Qadir","year":"2020","unstructured":"H. A. Qadir, I. Balasingham, J. Solhusvik, J. Bergsland, L. Aabakken, Y. Shin. Improving automatic polyp detection using CNN by exploiting temporal dependency in colonoscopy video. IEEE Journal of Biomedical and Health Informatics, vol. 24, no. 1, pp. 180\u2013193, 2020. DOI: https:\/\/doi.org\/10.1109\/JBHI.2019.2907434.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR135","doi-asserted-by":"publisher","first-page":"722","DOI":"10.1007\/978-3-030-59716-0_69","volume-title":"Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Z Zhang","year":"2020","unstructured":"Z. Zhang, H. Shang, H. Zheng, X. Wang, J. Wang, Z. Sun, J. Huang, J. Yao. Asynchronous in parallel detection and tracking (AIPDT): Real-time robust polyp detection. In Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention, Lima, Peru, pp. 722\u2013731, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-59716-0_69."},{"key":"1597_CR136","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/978-3-030-87240-3_29","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"L Wu","year":"2021","unstructured":"L. Wu, Z. Hu, Y. Ji, P. Luo, S. Zhang. Multi-frame collaboration for effective endoscopic video polyp detection via spatial-temporal feature transformation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 302\u2013312, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87240-3_29."},{"key":"1597_CR137","doi-asserted-by":"publisher","unstructured":"T. Yu, N. Lin, X. Zhang, Y. Pan, H. Hu, W. Zheng, J. Liu, W. Hu, H. Duan, J. Si. An end-to-end tracking method for polyp detectors in colonoscopy videos. Artificial Intelligence in Medicine, vol. 131, Article number 102363, 2022. DOI: https:\/\/doi.org\/10.1016\/j.artmed.2022.102363.","DOI":"10.1016\/j.artmed.2022.102363"},{"issue":"6","key":"1597_CR138","doi-asserted-by":"publisher","first-page":"7780","DOI":"10.1109\/TII.2022.3208364","volume":"19","author":"D Wang","year":"2023","unstructured":"D. Wang, X. Wang, S. Wang, Y. Yin. Explainable multitask Shapley explanation networks for real-time polyp diagnosis in videos. IEEE Transactions on Industrial Informatics, vol. 19, no. 6, pp. 7780\u20137789, 2023. DOI: https:\/\/doi.org\/10.1109\/TII.2022.3208364.","journal-title":"IEEE Transactions on Industrial Informatics"},{"key":"1597_CR139","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1007\/978-3-031-43904-9_5","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Jiang","year":"2023","unstructured":"Y. Jiang, Z. Zhang, R. Zhang, G. Li, S. Cui, Z. Li. YONA: You only need one adjacent reference-frame for accurate and fast video polyp detection. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 44\u201354, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43904-9_5."},{"key":"1597_CR140","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1007\/978-3-031-43904-9_57","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Intrator","year":"2023","unstructured":"Y. Intrator, N. Aizenberg, A. Livne, E. Rivlin, R. Goldenberg. Self-supervised polyp re-identification in colonoscopy. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 590\u2013600, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43904-9_57."},{"key":"1597_CR141","doi-asserted-by":"publisher","first-page":"944","DOI":"10.1109\/BIBM62325.2024.10822332","volume-title":"Proceedings of IEEE International Conference on Bioinformatics and Biomedicine","author":"Y Jiang","year":"2024","unstructured":"Y. Jiang, Z. Zhang, J. Wei, C. M. Feng, G. Li, X. Wan, S. Cui, Z. Li. Let video teaches you more: Video-to-image knowledge distillation using detection transformer for medical video lesion detection. In Proceedings of IEEE International Conference on Bioinformatics and Biomedicine, Lisbon, Portugal, pp. 944\u2013949, 2024. DOI: https:\/\/doi.org\/10.1109\/BIBM62325.2024.10822332."},{"key":"1597_CR142","volume-title":"YOLOv4: Optimal speed and accuracy of object detection","author":"A Bochkovskiy","year":"2020","unstructured":"A. Bochkovskiy, C. Y. Wang, H. Y. M. Liao. YOLOv4: Optimal speed and accuracy of object detection, [Online], Available: https:\/\/arxiv.org\/abs\/2004.10934, 2020."},{"key":"1597_CR143","volume-title":"YOLOV3: An incremental improvement","author":"J Redmon","year":"2018","unstructured":"J. Redmon, A. Farhadi. YOLOV3: An incremental improvement, [Online], Available: https:\/\/arxiv.org\/abs\/1804.02767, 2018."},{"key":"1597_CR144","doi-asserted-by":"publisher","first-page":"10778","DOI":"10.1109\/CVPR42600.2020.01079","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"M Tan","year":"2020","unstructured":"M. Tan, R. Pang, Q. V. Le. EfficientDet: Scalable and efficient object detection. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 10778\u201310787, 2020. DOI: https:\/\/doi.org\/10.1109\/CVPR42600.2020.01079."},{"key":"1597_CR145","doi-asserted-by":"publisher","first-page":"4277","DOI":"10.1109\/CVPR.2019.00441","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"B Li","year":"2019","unstructured":"B. Li, W. Wu, Q. Wang, F. Zhang, J. Xing, J. Yan. SiamRPN++: Evolution of Siamese visual tracking with very deep networks. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Long Beach, USA, pp. 4277\u20134286, 2019. DOI: https:\/\/doi.org\/10.1109\/CVPR.2019.00441."},{"key":"1597_CR146","volume-title":"Proceedings of the 3rd International Conference on Learning Representations","author":"K Simonyan","year":"2015","unstructured":"K. Simonyan, A. Zisserman. Very deep convolutional networks for large-scale image recognition. In Proceedings of the 3rd International Conference on Learning Representations, San Diego, USA, 2015."},{"key":"1597_CR147","doi-asserted-by":"publisher","first-page":"630","DOI":"10.1007\/978-3-319-46493-0_38","volume-title":"Proceedings of the 14th European Conference on Computer Vision","author":"K He","year":"2016","unstructured":"K. He, X. Zhang, S. Ren, J. Sun. Identity mappings in deep residual networks. In Proceedings of the 14th European Conference on Computer Vision, Amsterdam, The Netherlands, pp. 630\u2013645, 2016. DOI: https:\/\/doi.org\/10.1007\/978-3-319-46493-0_38."},{"key":"1597_CR148","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/978-3-031-16452-1_30","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"X Liu","year":"2022","unstructured":"X. Liu, W. Li, Y. Yuan. Intervention & interaction federated abnormality detection with noisy clients. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 309\u2013319, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16452-1_30."},{"key":"1597_CR149","doi-asserted-by":"publisher","unstructured":"X. Pan, Y. Mu, C. Ma, Q. He. TFCNet: A texture-aware and fine-grained feature compensated polyp detection network. Computers in Biology and Medicine, vol. 171, Article number 108144, 2024. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2024.108144.","DOI":"10.1016\/j.compbiomed.2024.108144"},{"key":"1597_CR150","first-page":"91","volume-title":"Proceedings of the 29th International Conference on Neural Information Processing Systems","author":"S Ren","year":"2015","unstructured":"S. Ren, K. He, R. Girshick, J. Sun. Faster R-CNN: Towards real-time object detection with region proposal networks. In Proceedings of the 29th International Conference on Neural Information Processing Systems, Montreal, Canada, pp. 91\u201399, 2015."},{"key":"1597_CR151","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume-title":"Proceedings of the 14th European Conference on Computer Vision","author":"W Liu","year":"2016","unstructured":"W. Liu, D. Anguelov, D. Erhan, C. Szegedy, S. Reed, C. Y. Fu, A. C. Berg. SSD: Single shot MultiBox detector. In Proceedings of the 14th European Conference on Computer Vision, Amsterdam, The Netherlands, pp. 21\u201337, 2016. DOI: https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2."},{"key":"1597_CR152","doi-asserted-by":"publisher","first-page":"2921","DOI":"10.1109\/CVPR.2016.319","volume-title":"Proceedings of IEEE Conference on Computer Vision and Pattern Recognition","author":"B Zhou","year":"2016","unstructured":"B. Zhou, A. Khosla, A. Lapedriza, A. Oliva, A. Torralba. Learning deep features for discriminative localization. In Proceedings of IEEE Conference on Computer Vision and Pattern Recognition, Las Vegas, USA, pp. 2921\u20132929, 2016. DOI: https:\/\/doi.org\/10.1109\/CVPR.2016.319."},{"key":"1597_CR153","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Proceedings of the 16th European Conference on Computer Vision","author":"N Carion","year":"2020","unstructured":"N. Carion, F. Massa, G. Synnaeve, N. Usunier, A. Kirillov, S. Zagoruyko. End-to-end object detection with transformers. In Proceedings of the 16th European Conference on Computer Vision, Glasgow, UK, pp. 213\u2013229, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13."},{"key":"1597_CR154","doi-asserted-by":"publisher","first-page":"19773","DOI":"10.1109\/ICCV51070.2023.01816","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"S Chen","year":"2023","unstructured":"S. Chen, P. Sun, Y. Song, P. Luo. DiffusionDet: Diffusion model for object detection. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Paris, France, pp. 19773\u201319786, 2023. DOI: https:\/\/doi.org\/10.1109\/ICCV51070.2023.01816."},{"issue":"6","key":"1597_CR155","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1109\/TMI.2017.2664042","volume":"36","author":"J Bernal","year":"2017","unstructured":"J. Bernal, N. Tajkbaksh, F. J. S\u00e1nchez, B. J. Matuszewski, H. Chen, L. Yu, Q. Angermann, O. Romain, B. Rustad, I. Balasingham, K. Pogorelov, S. Choi, Q. Debard, L. Maier-Hein, S. Speidel, D. Stoyanov, P. Brandao, H. C\u00f3rdova, C. S\u00e1nchez-Montes, S. R. Gurudu, G. Fern\u00e1ndez-Esparrach, X. Dray, J. Liang, A. Histace. Comparative validation of polyp detection methods in video colonoscopy: Results from the MIC-CAI 2015 endoscopic vision challenge. IEEE Transactions on Medical Imaging, vol. 36, no. 6, pp. 1231\u20131249, 2017. DOI: https:\/\/doi.org\/10.1109\/TMI.2017.2664042.","journal-title":"IEEE Transactions on Medical Imaging"},{"issue":"5","key":"1597_CR156","doi-asserted-by":"publisher","first-page":"749","DOI":"10.1109\/LGRS.2018.2802944","volume":"15","author":"Z Zhang","year":"2018","unstructured":"Z. Zhang, Q. Liu, Y. Wang. Road extraction by deep residual U-Net. IEEE Geoscience and Remote Sensing Letters, vol. 15, no. 5, pp. 749\u2013753, 2018. DOI: https:\/\/doi.org\/10.1109\/LGRS.2018.2802944.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"issue":"2","key":"1597_CR157","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2019.2938758","volume":"43","author":"S H Gao","year":"2021","unstructured":"S. H. Gao, M. M. Cheng, K. Zhao, X. Y. Zhang, M. H. Yang, P. Torr. Res2Net: A new multi-scale backbone architecture. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 43, no. 2, pp. 652\u2013662, 2021. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2019.2938758.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1597_CR158","first-page":"10347","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"H Touvron","year":"2021","unstructured":"H. Touvron, M. Cord, M. Douze, F. Massa, A. Sablayrolles, H. J\u00e9gou. Training data-efficient image transformers & distillation through attention. In Proceedings of the 38th International Conference on Machine Learning, pp. 10347\u201310357, 2021."},{"key":"1597_CR159","first-page":"6105","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"M Tan","year":"2019","unstructured":"M. Tan, Q. V. Le. EfficientNet: Rethinking model scaling for convolutional neural networks. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 6105\u20136114, 2019."},{"key":"1597_CR160","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Proceedings of the 15th European Conference on Computer Vision","author":"L C Chen","year":"2018","unstructured":"L. C. Chen, Y. Zhu, G. Papandreou, F. Schroff, H. Adam. Encoder-decoder with atrous separable convolution for semantic image segmentation. In Proceedings of the 15th European Conference on Computer Vision, Munich, Germany, pp. 833\u2013851, 2018. DOI: https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49."},{"issue":"3","key":"1597_CR161","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"W. Wang, E. Xie, X. Li, D. P. Fan, K. Song, D. Liang, T. Lu, P. Luo, L. Shao. PVT v2: Improved baselines with pyramid vision transformer. Computational Visual Media, vol. 8, no. 3, pp. 415\u2013424, 2022. DOI: https:\/\/doi.org\/10.1007\/s41095-022-0274-8.","journal-title":"Computational Visual Media"},{"key":"1597_CR162","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1109\/ICCV48922.2021.00009","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"H Wu","year":"2021","unstructured":"H. Wu, B. Xiao, N. Codella, M. Liu, X. Dai, L. Yuan, L. Zhang. CvT: Introducing convolutions to vision transformers. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Montreal, Canada, pp. 22\u201331, 2021. DOI: https:\/\/doi.org\/10.1109\/ICCV48922.2021.00009."},{"key":"1597_CR163","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","author":"E Xie","year":"2021","unstructured":"E. Xie, W. Wang, Z. Yu, A. Anandkumar, J. M. Alvarez, P. Luo. SegFormer: Simple and efficient design for semantic segmentation with transformers. In Proceedings of the 35th International Conference on Neural Information Processing Systems, 2021."},{"key":"1597_CR164","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"S Chen","year":"2022","unstructured":"S. Chen, E. Xie, C. Ge, R. Chen, D. Liang, P. Luo. CycleMLP: An MLP-like architecture for dense prediction. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1597_CR165","doi-asserted-by":"publisher","first-page":"8819","DOI":"10.1109\/CVPR46437.2021.00871","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"L Chen","year":"2021","unstructured":"L. Chen, T. Yang, X. Zhang, W. Zhang, J. Sun. Points as queries: Weakly semi-supervised object detection by points. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Nashville, USA, pp. 8819\u20138828, 2021. DOI: https:\/\/doi.org\/10.1109\/CVPR46437.2021.00871."},{"key":"1597_CR166","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"M H Guo","year":"2022","unstructured":"M. H. Guo, C. Z. Lu, Q. Hou, Z. N. Liu, M. M. Cheng, S. M. Hu. SegNeXt: Rethinking convolutional attention design for semantic segmentation. In Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 84, 2022."},{"key":"1597_CR167","doi-asserted-by":"publisher","first-page":"9992","DOI":"10.1109\/ICCV48922.2021.00986","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"Z Liu","year":"2021","unstructured":"Z. Liu, Y. Lin, Y. Cao, H. Hu, Y. Wei, Z. Zhang, S. Lin, B. Guo. Swin transformer: Hierarchical vision transformer using shifted windows. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Montreal, Canada, pp. 9992\u201310002, 2021. DOI: https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986."},{"key":"1597_CR168","doi-asserted-by":"publisher","first-page":"3992","DOI":"10.1109\/ICCV51070.2023.00371","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"A Kirillov","year":"2023","unstructured":"A. Kirillov, E. Mintun, N. Ravi, H. Mao, C. Rolland, L. Gustafson, T. Xiao, S. Whitehead, A. C. Berg, W. Y. Lo, P. Doll\u00e1r, R. Girshick. Segment anything. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Paris, France, pp. 3992\u20134003, 2023. DOI: https:\/\/doi.org\/10.1109\/ICCV51070.2023.00371."},{"key":"1597_CR169","volume-title":"Proceedings of the 13th International Conference on Learning Representations","author":"N Ravi","year":"2025","unstructured":"N. Ravi, V. Gabeur, Y. T. Hu, R. Hu, C. Ryali, T. Ma, H. Khedr, R. R\u00e4dle, C. Rolland, L. Gustafson, E. Mintun, J. Pan, K. V. Alwala, N. Carion, C. Y. Wu, R. Girshick, P. Doll\u00e1r, C. Feichtenhofer. SAM 2: Segment anything in images and videos. In Proceedings of the 13th International Conference on Learning Representations, Singapore, 2025."},{"issue":"4","key":"1597_CR170","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L C Chen","year":"2018","unstructured":"L. C. Chen, G. Papandreou, I. Kokkinos, K. Murphy, A. L. Yuille. DeepLab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 40, no. 4, pp. 834\u2013848, 2018. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2017.2699184.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"10","key":"1597_CR171","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2021","unstructured":"J. Wang, K. Sun, T. Cheng, B. Jiang, C. Deng, Y. Zhao, D. Liu, Y. Mu, M. Tan, X. Wang, W. Liu, B. Xiao. Deep high-resolution representation learning for visual recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 43, no. 10, pp. 3349\u20133364, 2021. DOI: https:\/\/doi.org\/10.1109\/TPAMI.2020.2983686.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1597_CR172","doi-asserted-by":"publisher","first-page":"11966","DOI":"10.1109\/CVPR52688.2022.01167","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Z Liu","year":"2022","unstructured":"Z. Liu, H. Mao, C. Y. Wu, C. Feichtenhofer, T. Darrell, S. Xie. A ConvNet for the 2020s. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, pp. 11966\u201311976, 2022. DOI: https:\/\/doi.org\/10.1109\/CVPR52688.2022.01167."},{"key":"1597_CR173","doi-asserted-by":"publisher","first-page":"1280","DOI":"10.1109\/CVPR52688.2022.00135","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"B Cheng","year":"2022","unstructured":"B. Cheng, I. Misra, A. G. Schwing, A. Kirillov, R. Girdhar. Masked-attention mask transformer for universal image segmentation. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, pp. 1280\u20131289, 2022. DOI: https:\/\/doi.org\/10.1109\/CVPR52688.2022.00135."},{"issue":"4","key":"1597_CR174","doi-asserted-by":"publisher","first-page":"1250","DOI":"10.1109\/JBHI.2017.2734329","volume":"22","author":"Y Yuan","year":"2018","unstructured":"Y. Yuan, D. Li, M. Q. H. Meng. Automatic polyp detection via a novel unified bottom-up and top-down saliency approach. IEEE Journal of Biomedical and Health Informatics, vol. 22, no. 4, pp. 1250\u20131260, 2018. DOI: https:\/\/doi.org\/10.1109\/JBHI.2017.2734329.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR175","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/978-3-030-32239-7_34","volume-title":"Proceedings of the 22nd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Fang","year":"2019","unstructured":"Y. Fang, C. Chen, Y. Yuan, K. Y. Tong. Selective feature aggregation network with area-boundary constraints for polyp segmentation. In Proceedings of the 22nd International Conference on Medical Image Computing and Computer Assisted Intervention, Shenzhen, China, pp. 302\u2013310, 2019. DOI: https:\/\/doi.org\/10.1007\/978-3-030-32239-7_34."},{"key":"1597_CR176","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1109\/ISM46123.2019.00049","volume-title":"Proceedings of IEEE International Symposium on Multimedia","author":"D Jha","year":"2019","unstructured":"D. Jha, P. H. Smedsrud, M. A. Riegler, D. Johansen, T. De Lange, P. Halvorsen, H. D. Johansen. ResUNet++: An advanced architecture for medical image segmentation. In Proceedings of IEEE International Symposium on Multimedia, San Diego, USA, pp. 225\u20132255, 2019. DOI: https:\/\/doi.org\/10.1109\/ISM46123.2019.00049."},{"key":"1597_CR177","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1007\/978-3-030-59725-2_25","volume-title":"Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"R Zhang","year":"2020","unstructured":"R. Zhang, G. Li, Z. Li, S. Cui, D. Qian, Y. Yu. Adaptive context selection for polyp segmentation. In Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention, Lima, Peru, pp. 253\u2013262, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-59725-2_25."},{"key":"1597_CR178","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1007\/978-3-030-59725-2_26","volume-title":"Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"D P Fan","year":"2020","unstructured":"D. P. Fan, G. P. Ji, T. Zhou, G. Chen, H. Fu, J. Shen, L. Shao. PraNet: Parallel reverse attention network for polyp segmentation. In Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention, Lima, Peru, pp. 263\u2013273, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-59725-2_26."},{"key":"1597_CR179","doi-asserted-by":"publisher","unstructured":"K. Wickstr\u00f8m, M. Kampffmeyer, R. Jenssen. Uncertainty and interpretability in convolutional neural networks for semantic segmentation of colorectal polyps. Medical Image Analysis, vol. 60, Article number 101619, 2020. DOI: https:\/\/doi.org\/10.1016\/j.media.2019.101619.","DOI":"10.1016\/j.media.2019.101619"},{"key":"1597_CR180","doi-asserted-by":"publisher","first-page":"2916","DOI":"10.1609\/aaai.v35i4.16398","volume-title":"Proceedings of the 35th AAAI Conference on Artificial Intelligence","author":"H Wu","year":"2021","unstructured":"H. Wu, J. Zhong, W. Wang, Z. Wen, J. Qin. Precise yet efficient semantic calibration and refinement in ConvNets for real-time polyp segmentation from colonoscopy videos. In Proceedings of the 35th AAAI Conference on Artificial Intelligence, pp. 2916\u20132924, 2021. DOI: https:\/\/doi.org\/10.1609\/aaai.v35i4.16398."},{"key":"1597_CR181","volume-title":"Proceedings of the 32nd British Machine Vision Conference","author":"Y Meng","year":"2021","unstructured":"Y. Meng, H. Zhang, D. Gao, Y. Zhao, X. Yang, X. Qian, X. Huang, Y. Zheng. BI-GCN: Boundary-aware input-dependent graph convolution network for biomedical image segmentation. In Proceedings of the 32nd British Machine Vision Conference, 2021."},{"key":"1597_CR182","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00347","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision","author":"H Wu","year":"2021","unstructured":"H. Wu, G. Chen, Z. Wen, J. Qin. Collaborative and adversarial learning of focused and dispersive representations for semi-supervised polyp segmentation. In Proceedings of IEEE\/CVF International Conference on Computer Vision, Montreal, Canada, 2021. DOI: https:\/\/doi.org\/10.1109\/ICCV48922.2021.00347."},{"key":"1597_CR183","doi-asserted-by":"publisher","first-page":"633","DOI":"10.1007\/978-3-030-87193-2_60","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"T C Nguyen","year":"2021","unstructured":"T. C. Nguyen, T. P. Nguyen, G. H. Diep, A. H. Tran-Dinh, T. V. Nguyen, M. T. Tran. CCBANet: Cascading context and balancing attention for polyp segmentation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 633\u2013643, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_60."},{"key":"1597_CR184","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1007\/978-3-030-87240-3_13","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Tian","year":"2021","unstructured":"Y. Tian, G. Pang, F. Liu, Y. Chen, S. H. Shin, J. W. Verjans, R. Singh, G. Carneiro. Constrained contrastive distribution learning for unsupervised anomaly detection and localisation in medical images. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 128\u2013140, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87240-3_13."},{"key":"1597_CR185","doi-asserted-by":"publisher","first-page":"559","DOI":"10.1007\/978-3-030-87193-2_53","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Shen","year":"2021","unstructured":"Y. Shen, X. Jia, M. Q. H. Meng. HRENet: A hard region enhancement network for polyp segmentation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 559\u2013568, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_53."},{"key":"1597_CR186","doi-asserted-by":"publisher","first-page":"720","DOI":"10.1007\/978-3-030-87193-2_68","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"M Cheng","year":"2021","unstructured":"M. Cheng, Z. Kong, G. Song, Y. Tian, Y. Liang, J. Chen. Learnable oriented-derivative network for polyp segmentation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 720\u2013730, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_68."},{"key":"1597_CR187","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1007\/978-3-030-87193-2_12","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"X Zhao","year":"2021","unstructured":"X. Zhao, L. Zhang, H. Lu. Automatic polyp segmentation via multi-scale subtraction network. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 120\u2013130, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_12."},{"key":"1597_CR188","doi-asserted-by":"publisher","first-page":"699","DOI":"10.1007\/978-3-030-87193-2_66","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"J Wei","year":"2021","unstructured":"J. Wei, Y. Hu, R. Zhang, Z. Li, S. K. Zhou, S. Cui. Shallow attention network for polyp segmentation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 699\u2013708, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_66."},{"key":"1597_CR189","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1007\/978-3-030-87193-2_2","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Zhang","year":"2021","unstructured":"Y. Zhang, H. Liu, Q. Hu. TransFuse: Fusing transformers and CNNs for medical image segmentation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 14\u201324, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_2."},{"key":"1597_CR190","doi-asserted-by":"publisher","first-page":"2167","DOI":"10.1145\/3474085.3475375","volume-title":"Proceedings of the 29th ACM International Conference on Multimedia","author":"T Kim","year":"2021","unstructured":"T. Kim, H. Lee, D. Kim. UACANet: Uncertainty augmented context attention for polyp segmentation. In Proceedings of the 29th ACM International Conference on Multimedia, pp. 2167\u20132175, 2021. DOI: https:\/\/doi.org\/10.1145\/3474085.3475375."},{"issue":"10","key":"1597_CR191","doi-asserted-by":"publisher","first-page":"3886","DOI":"10.1109\/JBHI.2021.3077271","volume":"25","author":"C Yang","year":"2021","unstructured":"C. Yang, X. Guo, M. Zhu, B. Ibragimov, Y. Yuan. Mutual-prototype adaptation for cross-domain polyp segmentation. IEEE Journal of Biomedical and Health Informatics, vol. 25, no. 10, pp. 3886\u20133897, 2021. DOI: https:\/\/doi.org\/10.1109\/JBHI.2021.3077271.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR192","doi-asserted-by":"publisher","unstructured":"X. Guo, C. Yang, Y. Yuan. Dynamic-weighting hierarchical segmentation network for medical images. Medical Image Analysis, vol. 73, Article number 102196, 2021. DOI: https:\/\/doi.org\/10.1016\/j.media.2021.102196.","DOI":"10.1016\/j.media.2021.102196"},{"key":"1597_CR193","doi-asserted-by":"publisher","first-page":"877","DOI":"10.24963\/ijcai.2022\/123","volume-title":"Proceedings of the 31st International Joint Conference on Artificial Intelligence","author":"X Du","year":"2022","unstructured":"X. Du, X. Xu, K. Ma. ICGNet: Integration context-based reverse-contour guidance network for polyp segmentation. In Proceedings of the 31st International Joint Conference on Artificial Intelligence, Vienna, Austria, pp. 877\u2013883, 2022. DOI: https:\/\/doi.org\/10.24963\/ijcai.2022\/123."},{"key":"1597_CR194","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/978-3-031-16437-8_7","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"J Wei","year":"2022","unstructured":"J. Wei, Y. Hu, G. Li, S. Cui, S. Kevin Zhou, Z. Li. Box-Polyp: Boost generalized polyp segmentation using extra coarse bounding box annotations. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 67\u201377, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_7."},{"key":"1597_CR195","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/978-3-031-16437-8_10","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"R Zhang","year":"2022","unstructured":"R. Zhang, P. Lai, X. Wan, D. J. Fan, F. Gao, X. J. Wu, G. Li. Lesion-aware dynamic kernel for polyp segmentation. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 99\u2013109, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_10."},{"key":"1597_CR196","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1007\/978-3-031-16440-8_60","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"L Cai","year":"2022","unstructured":"L. Cai, M. Wu, L. Chen, W. Bai, M. Yang, S. Lyu, Q. Zhao. Using guided self-attention with local information for polyp segmentation. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 629\u2013638, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16440-8_60."},{"key":"1597_CR197","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1007\/978-3-031-16437-8_11","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"J Wang","year":"2022","unstructured":"J. Wang, Q. Huang, F. Tang, J. Meng, J. Su, S. Song. Stepwise feature fusion: Local guides global. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 110\u2013120, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_11."},{"key":"1597_CR198","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1007\/978-3-031-16440-8_57","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Shen","year":"2022","unstructured":"Y. Shen, Y. Lu, X. Jia, F. Bai, M. Q. H. Meng. Task-relevant feature replenishment for cross-centre polyp segmentation. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 590\u2013608, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16440-8_57."},{"issue":"7","key":"1597_CR199","doi-asserted-by":"publisher","first-page":"2995","DOI":"10.1109\/JBHI.2022.3147686","volume":"26","author":"D Wang","year":"2022","unstructured":"D. Wang, S. Chen, X. Sun, Q. Chen, Y. Cao, B. Liu, X. Liu. AFP-mask: Anchor-free polyp instance segmentation in colonoscopy. IEEE Journal of Biomedical and Health Informatics, vol. 26, no. 7, pp. 2995\u20133006, 2022. DOI: https:\/\/doi.org\/10.1109\/JBHI.2022.3147686.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"issue":"8","key":"1597_CR200","doi-asserted-by":"publisher","first-page":"4090","DOI":"10.1109\/JBHI.2022.3173948","volume":"26","author":"G Yue","year":"2022","unstructured":"G. Yue, W. Han, B. Jiang, T. Zhou, R. Cong, T. Wang. Boundary constraint network with cross layer feature integration for polyp segmentation. IEEE Journal of Biomedical and Health Informatics, vol. 26, no. 8, pp. 4090\u20134099, 2022. DOI: https:\/\/doi.org\/10.1109\/JBHI.2022.3173948.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR201","doi-asserted-by":"publisher","unstructured":"Y. Lin, J. Wu, G. Xiao, J. Guo, G. Chen, J. Ma. BSCA-Net: Bit slicing context attention network for polyp segmentation. Pattern Recognition, vol. 132, Article number 108917, 2022. DOI: https:\/\/doi.org\/10.1016\/j.patcog.2022.108917.","DOI":"10.1016\/j.patcog.2022.108917"},{"issue":"1","key":"1597_CR202","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TCSVT.2022.3197643","volume":"33","author":"J H Shi","year":"2023","unstructured":"J. H. Shi, Q. Zhang, Y. H. Tang, Z. Q. Zhang. Polypmixer: An efficient context-aware MLP-based paradigm for polyp segmentation. IEEE Transactions on Circuits and Systems for Video Technology, vol. 33, no. 1, pp. 30\u201342, 2023. DOI: https:\/\/doi.org\/10.1109\/TCSVT.2022.3197643.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"1597_CR203","doi-asserted-by":"publisher","first-page":"2812","DOI":"10.1609\/aaai.v37i3.25382","volume-title":"Proceedings of the 37th AAAI Conference on Artificial Intelligence","author":"H Wu","year":"2023","unstructured":"H. Wu, W. Xie, J. Lin, X. Guo. ACL-Net: Semi-supervised polyp segmentation via affinity contrastive learning. In Proceedings of the 37th AAAI Conference on Artificial Intelligence, Washington DC, USA, pp. 2812\u20132820, 2023. DOI: https:\/\/doi.org\/10.1609\/aaai.v37i3.25382."},{"key":"1597_CR204","doi-asserted-by":"publisher","first-page":"757","DOI":"10.1007\/978-3-031-43898-1_72","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"J Wei","year":"2023","unstructured":"J. Wei, Y. Hu, S. Cui, S. K. Zhou, Z. Li. WeakPolyp: You only look bounding box for polyp segmentation. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 757\u2013766, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43898-1_72."},{"key":"1597_CR205","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1007\/978-3-031-43990-2_54","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"T Ling","year":"2023","unstructured":"T. Ling, C. Wu, H. Yu, T. Cai, D. Wang, Y. Zhou, M. Chen, K. Ding. Probabilistic modeling ensemble vision transformer improves complex polyp segmentation. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 572\u2013581, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43990-2_54."},{"key":"1597_CR206","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1007\/978-3-031-43907-0_4","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"A Wang","year":"2023","unstructured":"A. Wang, M. Xu, Y. Zhang, M. Islam, H. Ren. S2ME: Spatial-spectral mutual teaching and ensemble learning for scribble-supervised polyp segmentation. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 35\u201345, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43907-0_4."},{"key":"1597_CR207","doi-asserted-by":"publisher","first-page":"632","DOI":"10.1007\/978-3-031-43904-9_61","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Su","year":"2023","unstructured":"Y. Su, Y. Shen, J. Ye, J. He, J. Cheng. Revisiting feature propagation and aggregation in polyp segmentation. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 632\u2013641, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43904-9_61."},{"key":"1597_CR208","volume-title":"Polyp-PVT: Polyp segmentation with pyramid vision transformers","author":"B Dong","year":"2021","unstructured":"B. Dong, W. Wang, D. P. Fan, J. Li, H. Fu, L. Shao. Polyp-PVT: Polyp segmentation with pyramid vision transformers, [Online], Available: https:\/\/arxiv.org\/abs\/2108.06932, 2021."},{"key":"1597_CR209","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1007\/978-3-031-34048-2_20","volume-title":"Proceedings of the 28th International Conference on Information Processing in Medical Imaging","author":"J Wang","year":"2023","unstructured":"J. Wang, C. Chen. Unsupervised adaptation of polyp segmentation models via coarse-to-fine self-supervision. In Proceedings of the 28th International Conference on Information Processing in Medical Imaging, San Carlos de Bariloche, Argentina, pp. 250\u2013262, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-34048-2_20."},{"issue":"7","key":"1597_CR210","doi-asserted-by":"publisher","first-page":"3420","DOI":"10.1109\/JBHI.2023.3272168","volume":"27","author":"Q Jin","year":"2023","unstructured":"Q. Jin, H. Hou, G. Zhang, Z. Li. FEGNet: A feedback enhancement gate network for automatic polyp segmentation. IEEE Journal of Biomedical and Health Informatics, vol. 27, no. 7, pp. 3420\u20133430, 2023. DOI: https:\/\/doi.org\/10.1109\/JBHI.2023.3272168.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"issue":"2","key":"1597_CR211","doi-asserted-by":"publisher","first-page":"992","DOI":"10.1109\/JBHI.2022.3222390","volume":"27","author":"J Du","year":"2023","unstructured":"J. Du, K. Guan, P. Liu, Y. Li, T. Wang. Boundary-sensitive loss function with location constraint for hard region segmentation. IEEE Journal of Biomedical and Health Informatics, vol. 27, no. 2, pp. 992\u20131003, 2023. DOI: https:\/\/doi.org\/10.1109\/JBHI.2022.3222390.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR212","doi-asserted-by":"publisher","unstructured":"Y. Shi, H. Wang, H. Ji, H. Liu, Y. Li, N. He, D. Wei, Y. Huang, Q. Dai, J. Wu, X. Chen, Y. Zheng, H. Yu. A deep weakly semi-supervised framework for endoscopic lesion segmentation. Medical Image Analysis, vol. 90, Article number 102973, 2023. DOI: https:\/\/doi.org\/10.1016\/j.media.2023.102973.","DOI":"10.1016\/j.media.2023.102973"},{"issue":"1","key":"1597_CR213","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1007\/s11633-022-1365-9","volume":"20","author":"G P Ji","year":"2023","unstructured":"G. P. Ji, D. P. Fan, Y. C. Chou, D. Dai, A. Liniger, L. Van Gool. Deep gradient learning for efficient camouflaged object detection. Machine Intelligence Research, vol. 20, no. 1, pp. 92\u2013108, 2023. DOI: https:\/\/doi.org\/10.1007\/s11633-022-1365-9.","journal-title":"Machine Intelligence Research"},{"key":"1597_CR214","doi-asserted-by":"publisher","unstructured":"T. Zhou, Y. Zhou, K. He, C. Gong, J. Yang, H. Fu, D. Shen. Cross-level feature aggregation network for polyp segmentation. Pattern Recognition, vol. 140, Article number 109555, 2023. DOI: https:\/\/doi.org\/10.1016\/j.patcog.2023.109555.","DOI":"10.1016\/j.patcog.2023.109555"},{"issue":"12","key":"1597_CR215","doi-asserted-by":"publisher","first-page":"3987","DOI":"10.1109\/TMI.2023.3320151","volume":"42","author":"S Jain","year":"2023","unstructured":"S. Jain, R. Atale, A. Gupta, U. Mishra, A. Seal, A. Ojha, J. Jaworek-Korjakowska, O. Krejcar. CoInNet: A convolution-involution network with a novel statistical attention for automatic polyp segmentation. IEEE Transactions on Medical Imaging, vol. 42, no. 12, pp. 3987\u20134000, 2023. DOI: https:\/\/doi.org\/10.1109\/TMI.2023.3320151.","journal-title":"IEEE Transactions on Medical Imaging"},{"issue":"11","key":"1597_CR216","doi-asserted-by":"publisher","first-page":"9375","DOI":"10.1109\/TNNLS.2022.3159394","volume":"34","author":"N K Tomar","year":"2023","unstructured":"N. K. Tomar, D. Jha, M. A. Riegler, H. D. Johansen, D. Johansen, J. Rittscher, P. Halvorsen, S. Ali. FANet: A feedback attention network for improved biomedical image segmentation. IEEE Transactions on Neural Networks and Learning Systems, vol. 34, no. 11, pp. 9375\u20139388, 2023. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2022.3159394.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"3","key":"1597_CR217","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/s11633-025-1552-6","volume":"22","author":"H Shao","year":"2025","unstructured":"H. Shao, Q. Zeng, Q. Hou, J. Yang. MCANet: Medical image segmentation with multi-scale cross-axis attention. Machine Intelligence Research, vol. 22, no. 3, pp. 437\u2013451, 2025. DOI: https:\/\/doi.org\/10.1007\/s11633-025-1552-6.","journal-title":"Machine Intelligence Research"},{"key":"1597_CR218","doi-asserted-by":"publisher","first-page":"4731","DOI":"10.1609\/aaai.v38i5.28274","volume-title":"Proceedings of the 38th AAAI Conference on Artificial Intelligence","author":"H Shao","year":"2024","unstructured":"H. Shao, Y. Zhang, Q. Hou. Polyper: Boundary sensitive polyp segmentation. In Proceedings of the 38th AAAI Conference on Artificial Intelligence, Vancouver, Canada, pp. 4731\u20134739, 2024. DOI: https:\/\/doi.org\/10.1609\/aaai.v38i5.28274."},{"key":"1597_CR219","doi-asserted-by":"publisher","first-page":"11769","DOI":"10.1109\/CVPR52733.2024.01118","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"M M Rahman","year":"2024","unstructured":"M. M. Rahman, M. Munir, R. Marculescu. EMCAD: Efficient multi-scale convolutional attention decoding for medical image segmentation. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 11769\u201311779, 2024. DOI: https:\/\/doi.org\/10.1109\/CVPR52733.2024.01118."},{"key":"1597_CR220","doi-asserted-by":"publisher","first-page":"3616","DOI":"10.1109\/CV-PRW63382.2024.00365","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","author":"R Sch\u00f6on","year":"2024","unstructured":"R. Sch\u00f6on, J. Lorenz, K. Ludwig, R. Lienhart. Adapting the segment anything model during usage in novel situations. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, Seattle, USA, pp. 3616\u20133626, 2024. DOI: https:\/\/doi.org\/10.1109\/CV-PRW63382.2024.00365."},{"key":"1597_CR221","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"L Xie","year":"2024","unstructured":"L. Xie, M. Lin, T. Luan, C. Li, Y. Fang, Q. Shen, Z. Wu. MH-pFLID: Model heterogeneous personalized federated learning via injection and distillation for medical data analysis. In Proceedings of the 41st International Conference on Machine Learning, Vienna, Austria, Article number 2244, 2024."},{"key":"1597_CR222","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1007\/978-3-031-72114-4_12","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"H Li","year":"2024","unstructured":"H. Li, D. Zhang, J. Yao, L. Han, Z. Li, J. Han. ASPS: Augmented segment anything model for polyp segmentation. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 118\u2013128, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72114-4_12."},{"key":"1597_CR223","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1007\/978-3-031-72111-3_48","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Z Xu","year":"2024","unstructured":"Z. Xu, F. Tang, Z. Chen, Z. Zhou, W. Wu, Y. Yang, Y. Liang, J. Jiang, X. Cai, J. Su. Polyp-Mamba: Polyp Segmentation with Visual Mamba. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 510\u2013521, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72111-3_48."},{"key":"1597_CR224","volume-title":"Proceedings of the 38th International Conference on Neural Information Processing Systems","author":"Y Liu","year":"2024","unstructured":"Y. Liu, Y. Tian, Y. Zhao, H. Yu, L. Xie, Y. Wang, Q. Ye, J. Jiao, Y. Liu. VMamba: Visual state space model. In Proceedings of the 38th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 3273, 2024."},{"key":"1597_CR225","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1007\/978-3-031-72111-3_51","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"J Chai","year":"2024","unstructured":"J. Chai, Z. Luo, J. Gao, L. Dai, Y. Lai, S. Li. QueryNet: A unified framework for accurate polyp segmentation and detection. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 544\u2013554, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72111-3_51."},{"key":"1597_CR226","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/978-3-031-72104-5_43","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"W Wang","year":"2024","unstructured":"W. Wang, H. Sun, X. Wang. LSSNet: A method for colon polyp segmentation based on local feature supplementation and shallow feature supplementation. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 446\u2013456, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72104-5_43."},{"key":"1597_CR227","doi-asserted-by":"publisher","first-page":"4938","DOI":"10.1145\/3664647.3681033","volume-title":"Proceedings of the 32nd ACM International Conference on Multimedia","author":"X Zhou","year":"2024","unstructured":"X. Zhou, T. Chen. BSBP-RWKV: Background suppression with boundary preservation for efficient medical image segmentation. In Proceedings of the 32nd ACM International Conference on Multimedia, Melbourne, Australia, pp. 4938\u20134946, 2024. DOI: https:\/\/doi.org\/10.1145\/3664647.3681033."},{"key":"1597_CR228","doi-asserted-by":"publisher","first-page":"14048","DOI":"10.18653\/v1\/2023.findings-emnlp.936","volume-title":"Proceedings of Findings of the Association for Computational Linguistics: EMNLP","author":"B Peng","year":"2023","unstructured":"B. Peng, E. Alcaide, Q. Anthony, A. Albalak, S. Arcadinho, S. Biderman, H. Cao, X. Cheng, M. Chung, L. Derczynski, X. Du, M. Grella, K. K. Gv, X. He, H. Hou, P. Kazienko, J. Kocon, J. Kong, B. Koptyra, H. Lau, J. Lin, K. S. I. Mantri, F. Mom, A. Saito, G. Song, X. Tang, J. S. Wind, S. Wozniak, Z. Zhang, Q. Zhou, J. Zhu, R. J. Zhu. RWKV: Reinventing RNNs for the transformer era. In Proceedings of Findings of the Association for Computational Linguistics: EMNLP, Singapore, Singapore, pp. 14048\u201314077, 2023. DOI: https:\/\/doi.org\/10.18653\/v1\/2023.findings-emnlp.936."},{"key":"1597_CR229","doi-asserted-by":"publisher","unstructured":"C. Wang, L. Wang, N. Wang, X. Wei, T. Feng, M. Wu, Q. Yao, R. Zhang. CFATransUnet: Channel-wise cross fusion attention and transformer for 2D medical image segmentation. Computers in Biology and Medicine, vol. 168, Article number 107803, 2024. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2023.107803.","DOI":"10.1016\/j.compbiomed.2023.107803"},{"key":"1597_CR230","doi-asserted-by":"publisher","unstructured":"X. Jia, Y. Shen, J. Yang, R. Song, W. Zhang, M. Q. H. Meng, J. C. Liao, L. Xing. PolypMixNet: Enhancing semi-supervised polyp segmentation with polyp-aware augmentation. Computers in Biology and Medicine, vol. 170, Article number 108006, 2024. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2024.108006.","DOI":"10.1016\/j.compbiomed.2024.108006"},{"issue":"4","key":"1597_CR231","doi-asserted-by":"publisher","first-page":"2138","DOI":"10.1109\/JBHI.2024.3363910","volume":"28","author":"Z Zhang","year":"2024","unstructured":"Z. Zhang, Y. Li, B. S. Shin. Generalizable polyp segmentation via randomized global illumination augmentation. IEEE Journal of Biomedical and Health Informatics, vol. 28, no. 4, pp. 2138\u20132151, 2024. DOI: https:\/\/doi.org\/10.1109\/JBHI.2024.3363910.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"issue":"3","key":"1597_CR232","doi-asserted-by":"publisher","first-page":"1228","DOI":"10.1109\/JBHI.2023.3273728","volume":"28","author":"M Wang","year":"2024","unstructured":"M. Wang, X. An, Z. Pei, N. Li, L. Zhang, G. Liu, D. Ming. An efficient multi-task synergetic network for polyp segmentation and classification. IEEE Journal of Biomedical and Health Informatics, vol. 28, no. 3, pp. 1228\u20131239, 2024. DOI: https:\/\/doi.org\/10.1109\/JBHI.2023.3273728.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"issue":"7","key":"1597_CR233","doi-asserted-by":"publisher","first-page":"4072","DOI":"10.1109\/JBHI.2023.3344716","volume":"28","author":"L Yang","year":"2024","unstructured":"L. Yang, Y. Gu, G. Bian, Y. Liu. MSDE-Net: A multi-scale dual-encoding network for surgical instrument segmentation. IEEE Journal of Biomedical and Health Informatics, vol. 28, no. 7, pp. 4072\u20134083, 2024. DOI: https:\/\/doi.org\/10.1109\/JBHI.2023.3344716.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"issue":"4","key":"1597_CR234","doi-asserted-by":"publisher","first-page":"631","DOI":"10.1007\/s11633-023-1472-2","volume":"21","author":"G P Ji","year":"2024","unstructured":"G. P. Ji, J. Zhang, D. Campbell, H. Xiong, N. Barnes. Rethinking polyp segmentation from an out-of-distribution perspective. Machine Intelligence Research, vol. 21, no. 4, pp. 631\u2013639, 2024. DOI: https:\/\/doi.org\/10.1007\/s11633-023-1472-2.","journal-title":"Machine Intelligence Research"},{"key":"1597_CR235","doi-asserted-by":"publisher","unstructured":"J. Ma, Y. He, F. Li, L. Han, C. You, B. Wang. Segment anything in medical images. Nature Communications, vol. 15, no. 1, Article number 654, 2024. DOI: https:\/\/doi.org\/10.1038\/s41467-024-44824-z.","DOI":"10.1038\/s41467-024-44824-z"},{"issue":"7","key":"1597_CR236","doi-asserted-by":"publisher","first-page":"5414","DOI":"10.1109\/TCSVT.2023.3348598","volume":"34","author":"Z Liu","year":"2024","unstructured":"Z. Liu, S. Zheng, X. Sun, Z. Zhu, Y. Zhao, X. Yang, Y. Zhao. The devil is in the boundary: Boundary-enhanced polyp segmentation. IEEE Transactions on Circuits and Systems for Video Technology, vol. 34, no. 7, pp. 5414\u20135423, 2024. DOI: https:\/\/doi.org\/10.1109\/TCSVT.2023.3348598.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"9","key":"1597_CR237","doi-asserted-by":"publisher","first-page":"3563","DOI":"10.1109\/TMI.2024.3443262","volume":"44","author":"Z Lu","year":"2025","unstructured":"Z. Lu, Y. Zhang, Y. Zhou, Y. Wu, T. Zhou. Domain-interactive contrastive learning and prototype-guided self-training for cross-domain polyp segmentation. IEEE Transactions on Medical Imaging, vol. 44, no. 9, pp. 3563\u20133573, 2025. DOI: https:\/\/doi.org\/10.1109\/TMI.2024.3443262.","journal-title":"IEEE Transactions on Medical Imaging"},{"issue":"1","key":"1597_CR238","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1109\/TMI.2024.3440311","volume":"44","author":"J Gao","year":"2025","unstructured":"J. Gao, Q. Lao, Q. Kang, P. Liu, C. Du, K. Li, L. Zhang. Boosting your context by dual similarity checkup for in-context learning medical image segmentation. IEEE Transactions on Medical Imaging, vol. 44, no. 1, pp. 310\u2013319, 2025. DOI: https:\/\/doi.org\/10.1109\/TMI.2024.3440311.","journal-title":"IEEE Transactions on Medical Imaging"},{"issue":"10","key":"1597_CR239","doi-asserted-by":"publisher","first-page":"7446","DOI":"10.1109\/JBHI.2025.3564381","volume":"29","author":"C Fan","year":"2025","unstructured":"C. Fan, H. Yu, Y. Huang, L. Wang, Z. Yang, X. Jia. SliceMamba with neural architecture search for medical image segmentation. IEEE Journal of Biomedical and Health Informatics, vol. 29, no. 10, pp. 7446\u20137458, 2025. DOI: https:\/\/doi.org\/10.1109\/JBHI.2025.3564381.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"1597_CR240","volume-title":"ProMamba: Prompt-mamba for polyp segmentation","author":"J Xie","year":"2024","unstructured":"J. Xie, R. Liao, Z. Zhang, S. Yi, Y. Zhu, G. Luo. ProMamba: Prompt-mamba for polyp segmentation, [Online], Available: https:\/\/arxiv.org\/abs\/2403.13660, 2024."},{"key":"1597_CR241","volume-title":"SAM2-UNet: Segment anything 2 makes strong encoder for natural and medical image segmentation","author":"X Xiong","year":"2024","unstructured":"X. Xiong, Z. Wu, S. Tan, W. Li, F. Tang, Y. Chen, S. Li, J. Ma, G. Li. SAM2-UNet: Segment anything 2 makes strong encoder for natural and medical image segmentation, [Online], Available: https:\/\/arxiv.org\/abs\/2408.08870, 2024."},{"key":"1597_CR242","doi-asserted-by":"publisher","first-page":"4652","DOI":"10.1609\/aaai.v39i5.32491","volume-title":"Proceedings of the 39th AAAI Conference on Artificial Intelligence","author":"C Li","year":"2025","unstructured":"C. Li, X. Liu, W. Li, C. Wang, H. Liu, Y. Liu, Z. Chen, Y. Yuan. U-KAN makes strong backbone for medical image segmentation and generation. In Proceedings of the 39th AAAI Conference on Artificial Intelligence, Philadelphia, USA, pp. 4652\u20134660, 2025. DOI: https:\/\/doi.org\/10.1609\/aaai.v39i5.32491."},{"key":"1597_CR243","volume-title":"Proceedings of the 13th International Conference on Learning Representations","author":"Z Liu","year":"2025","unstructured":"Z. Liu, Y. Wang, S. Vaidya, F. Ruehle, J. Halverson, M. Solja\u010di\u0107, T. Y. Hou, M. Tegmark. KAN: Kolmogorov-Arnold networks. In Proceedings of the 13th International Conference on Learning Representations, Singapore, 2025."},{"key":"1597_CR244","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1007\/978-3-030-59725-2_29","volume-title":"Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention","author":"J G B Puyal","year":"2020","unstructured":"J. G. B. Puyal, K. K. Bhatia, P. Brandao, O. F. Ahmad, D. Toth, R. Kader, L. Lovat, P. Mountney, D. Stoyanov. Endoscopic polyp segmentation using a hybrid 2D\/3D CNN. In Proceedings of the 23rd International Conference on Medical Image Computing and Computer Assisted Intervention, Lima, Peru, pp. 295\u2013305, 2020. DOI: https:\/\/doi.org\/10.1007\/978-3-030-59725-2_29."},{"key":"1597_CR245","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1007\/978-3-030-87193-2_14","volume-title":"Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"G P Ji","year":"2021","unstructured":"G. P. Ji, Y. C. Chou, D. P. Fan, G. Chen, H. Fu, D. Jha, L. Shao. Progressively normalized self-attention network for video polyp segmentation. In Proceedings of the 24th International Conference on Medical Image Computing and Computer Assisted Intervention, Strasbourg, France, pp. 142\u2013152, 2021. DOI: https:\/\/doi.org\/10.1007\/978-3-030-87193-2_14."},{"key":"1597_CR246","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1007\/978-3-031-16440-8_44","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"X Zhao","year":"2022","unstructured":"X. Zhao, Z. Wu, S. Tan, D. J. Fan, Z. Li, X. Wan, G. Li. Semi-supervised spatial temporal attention network for video polyp segmentation. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 456\u2013466, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16440-8_44."},{"key":"1597_CR247","doi-asserted-by":"publisher","first-page":"1109","DOI":"10.24963\/ijcai.2022\/155","volume-title":"Proceedings of the 31st International Joint Conference on Artificial Intelligence","author":"X Li","year":"2022","unstructured":"X. Li, J. Xu, Y. Zhang, R. Feng, R. W. Zhao, T. Zhang, X. Lu, S. Gao. TCCNet: Temporally consistent context-free network for semi-supervised video polyp segmentation. In Proceedings of the 31st International Joint Conference on Artificial Intelligence, Vienna, Austria, pp. 1109\u20131115, 2022. DOI: https:\/\/doi.org\/10.24963\/ijcai.2022\/155."},{"key":"1597_CR248","doi-asserted-by":"publisher","unstructured":"J. G. B. Puyal, P. Brandao, O. F. Ahmad, K. K. Bhatia, D. Toth, R. Kader, L. Lovat, P. Mountney, D. Stoyanov. Polyp detection on video colonoscopy using a hybrid 2D\/3D CNN. Medical Image Analysis, vol. 82, Article number 102625, 2022. DOI: https:\/\/doi.org\/10.1016\/j.media.2022.102625.","DOI":"10.1016\/j.media.2022.102625"},{"key":"1597_CR249","doi-asserted-by":"publisher","first-page":"1744","DOI":"10.1609\/aaai.v38i2.27942","volume-title":"Proceedings of the 38st AAAI Conference on Artificial Intelligence","author":"Z Fang","year":"2024","unstructured":"Z. Fang, X. Guo, J. Lin, H. Wu, J. Qin. An embedding-unleashing video polyp segmentation framework via region linking and scale alignment. In Proceedings of the 38st AAAI Conference on Artificial Intelligence, Vancouver, Canada, pp. 1744\u20131752, 2024. DOI: https:\/\/doi.org\/10.1609\/aaai.v38i2.27942."},{"key":"1597_CR250","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1007\/978-3-031-72083-3_62","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"H Xu","year":"2024","unstructured":"H. Xu, Y. Yang, A. I. Aviles-Rivero, G. Yang, J. Qin, L. Zhu. LGRNet: Local-global reciprocal network for uterine fibroid segmentation in ultrasound videos. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 667\u2013677, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72083-3_62."},{"key":"1597_CR251","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1007\/978-3-031-72089-5_50","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Q Hu","year":"2024","unstructured":"Q. Hu, Z. Yi, Y. Zhou, F. Peng, M. Liu, Q. Li, Z. Wang. SALI: Short-term alignment and long-term interaction network for colonoscopy video polyp segmentation. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 531\u2013541, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72089-5_50."},{"key":"1597_CR252","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1007\/978-3-031-72089-5_16","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Lu","year":"2024","unstructured":"Y. Lu, Y. Yang, Z. Xing, Q. Wang, L. Zhu. Diff-VPS: Video polyp segmentation via a multi-task diffusion network with adversarial temporal reasoning. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 165\u2013175, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72089-5_16."},{"key":"1597_CR253","doi-asserted-by":"publisher","unstructured":"L. Wan, Z. Chen, Y. Xiao, J. Zhao, W. Feng, H. Fu. Iterative feedback-based models for image and video polyp segmentation. Computers in Biology and Medicine, vol. 177, Article number 108569, 2024. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2024.108569.","DOI":"10.1016\/j.compbiomed.2024.108569"},{"issue":"2","key":"1597_CR254","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1007\/s11633-023-1380-5","volume":"21","author":"Y C Chou","year":"2024","unstructured":"Y. C. Chou, B. Li, D. P. Fan, A. Yuille, Z. Zhou. Acquiring weak annotations for tumor localization in temporal and volumetric data. Machine Intelligence Research, vol. 21, no. 2, pp. 318\u2013330, 2024. DOI: https:\/\/doi.org\/10.1007\/s11633-023-1380-5.","journal-title":"Machine Intelligence Research"},{"key":"1597_CR255","volume-title":"SSTFB: Leveraging self-supervised pretext learning and temporal self-attention with feature branching for real-time video polyp segmentation","author":"Z Xu","year":"2024","unstructured":"Z. Xu, J. Rittscher, S. Ali. SSTFB: Leveraging self-supervised pretext learning and temporal self-attention with feature branching for real-time video polyp segmentation, [Online], Available: https:\/\/arxiv.org\/abs\/2406.10200, 2024."},{"key":"1597_CR256","volume-title":"Vivim: A video vision mamba for medical video segmentation","author":"Y Yang","year":"2024","unstructured":"Y. Yang, Z. Xing, L. Zhu. Vivim: A video vision mamba for medical video segmentation, [Online], Available: https:\/\/arxiv.org\/abs\/2401.14168, 2024."},{"key":"1597_CR257","volume-title":"MAST: Video polyp segmentation with a mixture-attention Siamese transformer","author":"G Chen","year":"2024","unstructured":"G. Chen, J. Yang, X. Pu, G. P. Ji, H. Xiong, Y. Pan, H. Cui, Y. Xia. MAST: Video polyp segmentation with a mixture-attention Siamese transformer, [Online], Available: https:\/\/arxiv.org\/abs\/2401.12439, 2024."},{"issue":"9","key":"1597_CR258","doi-asserted-by":"publisher","first-page":"2622","DOI":"10.1007\/s11263-021-01490-8","volume":"129","author":"M M Cheng","year":"2021","unstructured":"M. M. Cheng, D. P. Fan. Structure-measure: A new way to evaluate foreground maps. International Journal of Computer Vision, vol. 129, no. 9, pp. 2622\u20132638, 2021. DOI: https:\/\/doi.org\/10.1007\/s11263-021-01490-8.","journal-title":"International Journal of Computer Vision"},{"issue":"6","key":"1597_CR259","doi-asserted-by":"publisher","first-page":"1856","DOI":"10.1109\/TMI.2019.2959609","volume":"39","author":"Z Zhou","year":"2020","unstructured":"Z. Zhou, M. M. R. Siddiquee, N. Tajbakhsh, J. Liang. UNet++: Redesigning skip connections to exploit multiscale features in image segmentation. IEEE Transactions on Medical Imaging, vol. 39, no. 6, pp. 1856\u20131867, 2020. DOI: https:\/\/doi.org\/10.1109\/TMI.2019.2959609.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"1597_CR260","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Proceedings of the 18th International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"O Ronneberger","year":"2015","unstructured":"O. Ronneberger, P. Fischer, T. Brox. U-Net: Convolutional networks for biomedical image segmentation. In Proceedings of the 18th International Conference on Medical Image Computing and Computer-Assisted Intervention, Munich, Germany, pp. 234\u2013241, 2015. DOI: https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28."},{"key":"1597_CR261","volume-title":"Scaling laws for neural language models","author":"J Kaplan","year":"2020","unstructured":"J. Kaplan, S. McCandlish, T. Henighan, T. B. Brown, B. Chess, R. Child, S. Gray, A. Radford, J. Wu, D. Amodei. Scaling laws for neural language models, [Online], Available: https:\/\/arxiv.org\/abs\/2001.08361, 2020."},{"key":"1597_CR262","doi-asserted-by":"publisher","first-page":"11147","DOI":"10.1109\/CVPR52733.2024.01060","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Q Chen","year":"2024","unstructured":"Q. Chen, X. Chen, H. Song, Z. Xiong, A. Yuille, C. Wei, Z. Zhou. Towards generalizable tumor synthesis. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 11147\u201311158, 2024. DOI: https:\/\/doi.org\/10.1109\/CVPR52733.2024.01060."},{"key":"1597_CR263","volume-title":"Proceedings of the 38th Annual Conference on Neural Information Processing Systems","author":"K Tian","year":"2024","unstructured":"K. Tian, Y. Jiang, Z. Yuan, B. PENG, L. Wang. Visual autoregressive modeling: Scalable image generation via next-scale prediction. In Proceedings of the 38th Annual Conference on Neural Information Processing Systems, Vancouver, Canada, 2024."},{"key":"1597_CR264","doi-asserted-by":"publisher","first-page":"481","DOI":"10.1007\/978-3-031-73235-5_27","volume-title":"Proceedings of the 18th European Conference on Computer Vision","author":"M Hu","year":"2024","unstructured":"M. Hu, P. Xia, L. Wang, S. Yan, F. Tang, Z. Xu, Y. Luo, K. Song, J. Leitner, X. Cheng, J. Cheng, C. Liu, K. Zhou, Z. Ge. OphNet: A large-scale video benchmark for ophthalmic surgical workflow understanding. In Proceedings of the 18th European Conference on Computer Vision, Milan, Italy, pp. 481\u2013500, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-73235-5_27."},{"key":"1597_CR265","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1007\/978-3-031-16437-8_15","volume-title":"Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"N K Tomar","year":"2022","unstructured":"N. K. Tomar, D. Jha, U. Bagci, S. Ali. TGANet: Text-guided attention for improved polyp segmentation. In Proceedings of the 25th International Conference on Medical Image Computing and Computer Assisted Intervention, Singapore, pp. 151\u2013160, 2022. DOI: https:\/\/doi.org\/10.1007\/978-3-031-16437-8_15."},{"key":"1597_CR266","doi-asserted-by":"publisher","unstructured":"Y. Zhao, J. Li, L. Ren, Z. Chen. DTAN: Diffusion-based text attention network for medical image segmentation. Computers in Biology and Medicine, vol. 168, Article number 107728, 2024. DOI: https:\/\/doi.org\/10.1016\/j.compbiomed.2023.107728.","DOI":"10.1016\/j.compbiomed.2023.107728"},{"key":"1597_CR267","doi-asserted-by":"publisher","unstructured":"Y. Zhao, J. Li, Z. Hua. TACT: Text attention based CNN-transformer network for polyp segmentation. International Journal of Imaging Systems and Technology, vol. 34, no. 2, Article number e22997, 2024. DOI: https:\/\/doi.org\/10.1002\/ima.22997.","DOI":"10.1002\/ima.22997"},{"key":"1597_CR268","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"Z Qin","year":"2023","unstructured":"Z. Qin, H. Yi, Q. Lao, K. Li. Medical image understanding with pretrained vision language models: A comprehensive study. In Proceedings of the 11th International Conference on Learning Representations, Kigali, Rwanda, 2023."},{"key":"1597_CR269","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1007\/978-3-031-43904-9_28","volume-title":"Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"M Guo","year":"2023","unstructured":"M. Guo, H. Yi, Z. Qin, H. Wang, A. Men, Q. Lao. Multiple prompt fusion for zero-shot lesion detection using vision-language models. In Proceedings of the 26th International Conference on Medical Image Computing and Computer Assisted Intervention, Vancouver, Canada, pp. 283\u2013292, 2023. DOI: https:\/\/doi.org\/10.1007\/978-3-031-43904-9_28."},{"key":"1597_CR270","doi-asserted-by":"publisher","first-page":"10955","DOI":"10.1109\/CVPR52688.2022.01069","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"L H Li","year":"2022","unstructured":"L. H. Li, P. Zhang, H. Zhang, J. Yang, C. Li, Y. Zhong, L. Wang, L. Yuan, L. Zhang, J. N. Hwang, K. W. Chang, J. Gao. Grounded language-image pre-training. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, pp. 10955\u201310965, 2022. DOI: https:\/\/doi.org\/10.1109\/CVPR52688.2022.01069."},{"key":"1597_CR271","volume-title":"Knowledge extraction and distillation from large-scale image-text colonoscopy records leveraging large language and vision models","author":"S Wang","year":"2023","unstructured":"S. Wang, Y. Zhu, X. Luo, Z. Yang, Y. Zhang, P. Fu, M. Wang, Z. Song, Q. Li, P. Zhou, Y. Guo. Knowledge extraction and distillation from large-scale image-text colonoscopy records leveraging large language and vision models, [Online], Available: https:\/\/arxiv.org\/abs\/2310.11173, 2023."},{"key":"1597_CR272","volume-title":"Polyp-SAM++: Can a text guided SAM perform better for polyp segmentation?","author":"R Biswas","year":"2023","unstructured":"R. Biswas. Polyp-SAM++: Can a text guided SAM perform better for polyp segmentation? [Online], Available: https:\/\/arxiv.org\/abs\/2308.06623, 2023."},{"key":"1597_CR273","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1007\/978-3-031-72120-5_66","volume-title":"Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention","author":"Y Zhao","year":"2024","unstructured":"Y. Zhao, Y. Zhou, Y. Zhang, Y. Wu, T. Zhou. Text-Polyp: Point-supervised polyp segmentation with text cues. In Proceedings of the 27th International Conference on Medical Image Computing and Computer Assisted Intervention, Marrakesh, Morocco, pp. 711\u2013722, 2024. DOI: https:\/\/doi.org\/10.1007\/978-3-031-72120-5_66."},{"key":"1597_CR274","first-page":"1744","volume-title":"Proceedings of Working Notes of the Conference and Labs of the Evaluation Forum","author":"S Wang","year":"2023","unstructured":"S. Wang, W. Zhou, Y. Yang, H. Huang, Z. Ye, T. Zhang, D. Yang. Adapting pre-trained visual and language models for medical image question answering. In Proceedings of Working Notes of the Conference and Labs of the Evaluation Forum, Thessaloniki, Greece, pp. 1744\u20131753, 2023."},{"key":"1597_CR275","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"J Li","year":"2023","unstructured":"J. Li, D. Li, S. Savarese, S. Hoi. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In Proceedings of the 40th International Conference on Machine Learning, Honolulu, USA, Article number 814, 2023."},{"issue":"9","key":"1597_CR276","doi-asserted-by":"publisher","first-page":"2307","DOI":"10.1038\/s41591-023-02504-3","volume":"29","author":"Z Huang","year":"2023","unstructured":"Z. Huang, F. Bianchi, M. Yuksekgonul, T. J. Montine, J. Zou. A visual-language foundation model for pathology image analysis using medical twitter. Nature Medicine, vol. 29, no. 9, pp. 2307\u20132316, 2023. DOI: https:\/\/doi.org\/10.1038\/s41591-023-02504-3.","journal-title":"Nature Medicine"},{"key":"1597_CR277","volume-title":"Galactica: A large language model for science","author":"R Taylor","year":"2022","unstructured":"R. Taylor, M. Kardas, G. Cucurull, T. Scialom, A. Hartshorn, E. Saravia, A. Poulton, V. Kerkez, R. Stojnic. Galactica: A large language model for science, [Online], Available: https:\/\/arxiv.org\/abs\/2211.09085, 2022."},{"key":"1597_CR278","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing System","author":"H Liu","year":"2023","unstructured":"H. Liu, C. Li, Q. Wu, Y. J. Lee. Visual instruction tuning. In Proceedings of the 37th International Conference on Neural Information Processing System, New Orleans, USA, 2023."},{"key":"1597_CR279","unstructured":"C. Li, Y. Ge, D. Li, Y. Shan. Vision-language instruction tuning: A review and analysis. Transactions on Machine Learning Research, vol. 2024, 2023."},{"issue":"11","key":"1597_CR280","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1016\/j.tics.2020.08.005","volume":"24","author":"G Lupyan","year":"2020","unstructured":"G. Lupyan, R. A. Rahman, L. Boroditsky, A. Clark. Effects of language on visual perception. Transactions on Machine Learning Research, vol. 24, no. 11, pp. 930\u2013944, 2020. DOI: https:\/\/doi.org\/10.1016\/j.tics.2020.08.005.","journal-title":"Transactions on Machine Learning Research"},{"key":"1597_CR281","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"C Li","year":"2024","unstructured":"C. Li, C. Wong, S. Zhang, N. Usuyama, H. Liu, J. Yang, T. Naumann, H. Poon, J. Gao. LLaVA-med: Training a large language-and-vision assistant for bio-medicine in one day. In Proceedings of the 37th International Conference on Neural Information Processing Systems, New Orleans, USA, Article number 1240, 2024."},{"key":"1597_CR282","volume-title":"Proceedings of the 8th International Conference on Learning Representations","author":"M A Islam","year":"2020","unstructured":"M. A. Islam, S. Jia, N. D. B. Bruce. How much position information do convolutional neural networks encode? In Proceedings of the 8th International Conference on Learning Representations, Addis Ababa, Ethiopia, 2020."},{"key":"1597_CR283","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"E Hu","year":"2022","unstructured":"E. Hu, Y. Shen, P. Wallis, Z. Allen-Zhu, Y. Li, S. Wang, L. Wang, W. Chen. LoRA: Low-rank adaptation of large language models. In Proceedings of the 10th International Conference on Learning Representations, 2022."},{"key":"1597_CR284","volume-title":"Mini-Gemini: Mining the potential of multi-modality vision language models","author":"Y Li","year":"2024","unstructured":"Y. Li, Y. Zhang, C. Wang, Z. Zhong, Y. Chen, R. Chu, S. Liu, J. Jia. Mini-Gemini: Mining the potential of multi-modality vision language models, [Online], Available: https:\/\/arxiv.org\/abs\/2403.18814, 2024."},{"key":"1597_CR285","volume-title":"MobileVLM: A fast, reproducible and strong vision language assistant for mobile devices","author":"X Chu","year":"2023","unstructured":"X. Chu, L. Qiao, X. Lin, S. Xu, Y. Yang, Y. Hu, F. Wei, X. Zhang, B. Zhang, X. Wei, C. Shen. MobileVLM: A fast, reproducible and strong vision language assistant for mobile devices, [Online], Available: https:\/\/arxiv.org\/abs\/2312.16886, 2023."},{"key":"1597_CR286","doi-asserted-by":"publisher","first-page":"16133","DOI":"10.1109\/CVPR52729.2023.01548","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"S Woo","year":"2023","unstructured":"S. Woo, S. Debnath, R. Hu, X. Chen, Z. Liu, I. S. Kweon, S. Xie. ConvNeXt V2: Co-designing and scaling convnets with masked autoencoders. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Vancouver, Canada, pp. 16133\u201316142, 2023. DOI: https:\/\/doi.org\/10.1109\/CVPR52729.2023.01548."},{"key":"1597_CR287","doi-asserted-by":"publisher","first-page":"15979","DOI":"10.1109\/CVPR52688.2022.01553","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"K He","year":"2022","unstructured":"K. He, X. Chen, S. Xie, Y. Li, P. Doll\u00e1r, R. Girshick. Masked autoencoders are scalable vision learners. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, USA, pp. 15979\u201315988, 2022. DOI: https:\/\/doi.org\/10.1109\/CVPR52688.2022.01553."},{"key":"1597_CR288","unstructured":"M. Oquab, T. Darcet, T. Moutakanni, H. V. Vo, M. Szafraniec, V. Khalidov, P. Fernandez, D. Haziza, F. Massa, A. El-Nouby, M. Assran, N. Ballas, W. Galuba, R. Howes, P. Y. Huang, S. W. Li, I. Misra, M. Rabbat, V. Sharma, G. Synnaeve, H. Xu, H. Jegou, J. Mairal1, P. Labatut, A. Joulin, P. Bojanowski. DINOv2: Learning robust visual features without supervision. Transactions on Machine Learning Research, to be published."},{"key":"1597_CR289","first-page":"49523","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"P Villalobos","year":"2024","unstructured":"P. Villalobos, A. Ho, J. Sevilla, T. Besiroglu, L. Heim, M. Hobbhahn. Position: Will we run out of data? Limits of LLM scaling based on human-generated data. In Proceedings of the 41st International Conference on Machine Learning, Vienna, Austria, pp. 49523\u201349544, 2024."},{"key":"1597_CR290","doi-asserted-by":"publisher","first-page":"4818","DOI":"10.1109\/CVPR52733.2024.00461","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"B Xiao","year":"2024","unstructured":"B. Xiao, H. Wu, W. Xu, X. Dai, H. Hu, Y. Lu, M. Zeng, C. Liu, L. Yuan. Florence-2: Advancing a unified representation for a variety of vision tasks. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 4818\u20134829, 2024. DOI: https:\/\/doi.org\/10.1109\/CVPR52733.2024.00461."},{"key":"1597_CR291","unstructured":"D. Jiang, X. He, H. Zeng, C. Wei, M. Ku, Q. Liu, W. Chen. Mantis: Interleaved multi-image instruction tuning. Transactions on Machine Learning Research, to be published."},{"key":"1597_CR292","volume-title":"An agentic system for rare disease diagnosis with traceable reasoning","author":"W Zhao","year":"2025","unstructured":"W. Zhao, C. Wu, Y. Fan, X. Zhang, P. Qiu, Y. Sun, X. Zhou, Y. Wang, Y. Zhang, Y. Yu, K. Sun, W. Xie. An agentic system for rare disease diagnosis with traceable reasoning, [Online], Available: https:\/\/arxiv.org\/abs\/2506.20430, 2025."},{"key":"1597_CR293","doi-asserted-by":"publisher","first-page":"773","DOI":"10.1109\/ICCVW60793.2023.00085","volume-title":"Proceedings of IEEE\/CVF International Conference on Computer Vision Workshops","author":"J B Haurum","year":"2023","unstructured":"J. B. Haurum, S. Escalera, G. W. Taylor, T. B. Moeslund. Which tokens to use? Investigating token reduction in vision transformers. In Proceedings of IEEE\/CVF International Conference on Computer Vision Workshops, Paris, France, pp. 773\u2013783, 2023. DOI: https:\/\/doi.org\/10.1109\/ICCVW60793.2023.00085."},{"key":"1597_CR294","volume-title":"Proceedings of the 38th Annual Conference on Neural Information Processing Systems","author":"H Diao","year":"2024","unstructured":"H. Diao, Y. Cui, X. Li, Y. Wang, H. Lu, X. Wang. Unveiling encoder-free vision-language models. In Proceedings of the 38th Annual Conference on Neural Information Processing Systems, Vancouver, Canada, 2024."},{"key":"1597_CR295","doi-asserted-by":"publisher","first-page":"24185","DOI":"10.1109\/CVPR52733.2024.02283","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Z Chen","year":"2024","unstructured":"Z. Chen, J. Wu, W. Wang, W. Su, G. Chen, S. Xing, M. Zhong, Q. Zhang, X. Zhu, L. Lu, B. Li, P. Luo, T. Lu, Y. Qiao, J. Dai. Intern VL: Scaling up vision foundation models and aligning for generic visual-linguistic tasks. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 24185\u201324198, 2024. DOI: https:\/\/doi.org\/10.1109\/CVPR52733.2024.02283."},{"key":"1597_CR296","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"G Bachmann","year":"2024","unstructured":"G. Bachmann, V. Nagarajan. The pitfalls of next-token prediction. In Proceedings of the 41st International Conference on Machine Learning, Vienna, Austria, 2024."},{"key":"1597_CR297","doi-asserted-by":"publisher","first-page":"13405","DOI":"10.1109\/CVPR52733.2024.01273","volume-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"X Huang","year":"2024","unstructured":"X. Huang, J. Wang, Y. Tang, Z. Zhang, H. Hu, J. Lu, L. Wang, Z. Liu. Segment and caption anything. In Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, USA, pp. 13405\u201313417, 2024. DOI: https:\/\/doi.org\/10.1109\/CVPR52733.2024.01273."}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-025-1597-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-025-1597-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-025-1597-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T15:04:22Z","timestamp":1770044662000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-025-1597-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,7]]},"references-count":297,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["1597"],"URL":"https:\/\/doi.org\/10.1007\/s11633-025-1597-6","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,7]]},"assertion":[{"value":"22 March 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Deng-Ping Fan is an associate editor of\n                      Machine Intelligence Research\n                      and was not involved in the editorial review, or the decision to publish this article. All authors declare that there are no other competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}