{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,10]],"date-time":"2026-07-10T09:02:26Z","timestamp":1783674146434,"version":"3.55.0"},"reference-count":577,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62201468"],"award-info":[{"award-number":["62201468"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"TCL Science and Technology Innovation Fund","award":["D5140240118"],"award-info":[{"award-number":["D5140240118"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Artif Intell Rev"],"DOI":"10.1007\/s10462-025-11368-7","type":"journal-article","created":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T02:49:49Z","timestamp":1760669389000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":24,"title":["A survey on deep learning fundamentals"],"prefix":"10.1007","volume":"58","author":[{"given":"Chunwei","family":"Tian","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tongtong","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhe","family":"Peng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wangmeng","family":"Zuo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yonglin","family":"Tian","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qingfu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fei-Yue","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"key":"11368_CR1","unstructured":"Abadi M, Agarwal A, Barham P, Brevdo E, Chen Z, Citro C, Corrado GS, Davis A, Dean J, Devin M (2016) Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467"},{"key":"11368_CR2","doi-asserted-by":"crossref","unstructured":"Abdelhamed A, Lin S, Brown MS (20180 A high-quality denoising dataset for smartphone cameras. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1692\u20131700","DOI":"10.1109\/CVPR.2018.00182"},{"issue":"11","key":"11368_CR3","doi-asserted-by":"publisher","first-page":"4311","DOI":"10.1109\/TSP.2006.881199","volume":"54","author":"M Aharon","year":"2006","unstructured":"Aharon M, Elad M, Bruckstein A (2006) K-svd: An algorithm for designing overcomplete dictionaries for sparse representation. IEEE Trans Signal Process 54(11):4311\u20134322","journal-title":"IEEE Trans Signal Process"},{"issue":"1","key":"11368_CR4","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/S0306-4573(02)00021-3","volume":"39","author":"A Aizawa","year":"2003","unstructured":"Aizawa A (2003) An information-theoretic perspective of tf-idf measures. Inf Process Manag 39(1):45\u201365","journal-title":"Inf Process Manag"},{"key":"11368_CR5","first-page":"23716","volume":"35","author":"J-B Alayrac","year":"2022","unstructured":"Alayrac J-B, Donahue J, Luc P, Miech A, Barr I, Hasson Y, Lenc K, Mensch A, Millican K, Reynolds M et al (2022) Flamingo: a visual language model for few-shot learning. Adv Neural Inf Process Syst 35:23716\u201323736","journal-title":"Adv Neural Inf Process Syst"},{"key":"11368_CR6","doi-asserted-by":"crossref","unstructured":"Antol S, Agrawal A, Lu J, Mitchell M, Batra D, Zitnick CL, Parikh D (2015) Vqa: Visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433","DOI":"10.1109\/ICCV.2015.279"},{"key":"11368_CR7","unstructured":"Anwar S, Huynh CP, Porikli F (2017) Chaining identity mapping modules for image denoising. arXiv preprint arXiv:1712.02933"},{"key":"11368_CR8","doi-asserted-by":"crossref","unstructured":"Anwar S, Barnes N (2019) Real image denoising with feature attention In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3155\u20133164","DOI":"10.1109\/ICCV.2019.00325"},{"key":"11368_CR9","unstructured":"Aouayeb M, Hamidouche W, Soladie C, Kpalma K, Seguier R (2021) Learning vision transformer with squeeze and excitation for facial expression recognition. arXiv preprint arXiv:2107.03107"},{"key":"11368_CR10","unstructured":"Arjomand\u00a0Bigdeli S, Zwicker M, Favaro P, Jin M (2017) Deep mean-shift priors for image restoration. In: Advances in Neural Information Processing Systems 30"},{"key":"11368_CR11","doi-asserted-by":"crossref","unstructured":"Armeni I, Sener O, Zamir AR, Jiang H, Brilakis I, Fischer M, Savarese S (2016) 3d semantic parsing of large-scale indoor spaces. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1534\u20131543","DOI":"10.1109\/CVPR.2016.170"},{"key":"11368_CR12","unstructured":"Ba JL, Kiros JR, Hinton GE (2018) Layer normalization. arXiv preprint arXiv:1607.06450"},{"key":"11368_CR13","unstructured":"Bahdanau D, Cho K, Bengio Y (2016) Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473"},{"key":"11368_CR14","doi-asserted-by":"crossref","unstructured":"Bao S, He H, Wang F, Wu H, Wang H, Wu W, Guo Z, Liu Z, Xu X (2020) Plato-2: Towards building an open-domain chatbot via curriculum learning. arXiv preprint arXiv:2006.16779","DOI":"10.18653\/v1\/2021.findings-acl.222"},{"key":"11368_CR15","doi-asserted-by":"crossref","unstructured":"Bar-Tal O, Chefer H, Tov O, Herrmann C, Paiss R, Zada S, Ephrat A, Hur J, Liu G, Raj A, et al. (2024) Lumiere: A space-time diffusion model for video generation. In: SIGGRAPH Asia 2024 Conference Papers, pp. 1\u201311","DOI":"10.1145\/3680528.3687614"},{"key":"11368_CR16","unstructured":"Bastien F, Lamblin P, Pascanu R, Bergstra J, Goodfellow I, Bergeron A, Bouchard N, Warde-Farley D, Bengio Y (2012) Theano: new features and speed improvements. arXiv preprint arXiv:1211.5590"},{"key":"11368_CR17","unstructured":"Beal J, Kim E, Tzeng E, Park DH, Kislyuk, D (2020) Toward transformer-based object detection. arXiv:2012.09958"},{"key":"11368_CR18","doi-asserted-by":"crossref","unstructured":"Behley J, Garbade M, Milioto A, Quenzel J, Behnke S, Stachniss C, Gall J (2019) Semantickitti: A dataset for semantic scene understanding of lidar sequences. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9297\u20139307","DOI":"10.1109\/ICCV.2019.00939"},{"issue":"2","key":"11368_CR19","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1145\/1345448.1345465","volume":"9","author":"RM Bell","year":"2007","unstructured":"Bell RM, Koren Y (2007) Lessons from the Netflix prize challenge. ACM SIGKDD Explor Newsl 9(2):75\u201379","journal-title":"ACM SIGKDD Explor Newsl"},{"key":"11368_CR20","first-page":"22614","volume":"34","author":"I Bello","year":"2021","unstructured":"Bello I, Fedus W, Du X, Cubuk ED, Srinivas A, Lin T-Y, Shlens J, Zoph B (2021) Revisiting resnets: Improved training and scaling strategies. Adv Neural Inf Process Syst 34:22614\u201322627","journal-title":"Adv Neural Inf Process Syst"},{"issue":"2","key":"11368_CR21","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio Y, Simard P, Frasconi P (1994) Learning long-term dependencies with gradient descent is difficult. IEEE Trans Neural Netw 5(2):157\u2013166","journal-title":"IEEE Trans Neural Netw"},{"key":"11368_CR22","unstructured":"Bertasius G, Wang H, Torresani L (2021) Is space-time attention all you need for video understanding? In: ICML, vol. 2, p. 4"},{"key":"11368_CR23","doi-asserted-by":"crossref","unstructured":"Bevilacqua M, Roumy A, Guillemot C, Alberi-Morel ML (2012) Low-complexity single-image super-resolution based on nonnegative neighbor embedding","DOI":"10.5244\/C.26.135"},{"key":"11368_CR24","unstructured":"Bochkovskiy A, Wang C-Y, Liao H-YM (2020) Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934"},{"key":"11368_CR25","unstructured":"Brock A, Lim T, Ritchie JM, Weston N (2016) Neural photo editing with introspective adversarial networks. arXiv preprint arXiv:1609.07093"},{"issue":"4","key":"11368_CR26","first-page":"467","volume":"18","author":"PF Brown","year":"1992","unstructured":"Brown PF, Della Pietra VJ, Desouza PV, Lai JC, Mercer RL (1992) Class-based n-gram models of natural language. Comput Linguist 18(4):467\u2013480","journal-title":"Comput Linguist"},{"key":"11368_CR27","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst 33:1877\u20131901","journal-title":"Adv Neural Inf Process Syst"},{"key":"11368_CR28","doi-asserted-by":"crossref","unstructured":"Buades A, Coll B, Morel J-M (2005) A non-local algorithm for image denoising. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR\u201905), vol. 2, pp. 60\u201365 (2005). IEEE","DOI":"10.1109\/CVPR.2005.38"},{"key":"11368_CR30","doi-asserted-by":"crossref","unstructured":"Burger HC, Schuler CJ, Harmeling S (2012) Image denoising: Can plain neural networks compete with bm3d? In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 2392\u20132399. IEEE","DOI":"10.1109\/CVPR.2012.6247952"},{"key":"11368_CR32","doi-asserted-by":"crossref","unstructured":"Byun J, Cha S, Moon T (2021) Fbi-denoiser: Fast blind image denoiser for poisson-gaussian noise. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5768\u20135777","DOI":"10.1109\/CVPR46437.2021.00571"},{"key":"11368_CR33","doi-asserted-by":"crossref","unstructured":"Cai G, Cai Y (2020) Hierarchy spatial-temporal transformer for action recognition in short videos. In: FSDM, pp. 760\u2013774","DOI":"10.3233\/FAIA200754"},{"key":"11368_CR34","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: Delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"11368_CR35","unstructured":"Cai C, Wang D, Wang Y (2021) Graph coarsening with neural networks. arXiv preprint arXiv:2102.01350"},{"key":"11368_CR36","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"11368_CR37","first-page":"255","volume-title":"Advances in manufacturing technology","author":"P Carvalho","year":"2022","unstructured":"Carvalho P, Durupt A, Grandvalet Y (2022) A survey of machine learning approaches for visual inspection on the dagm dataset. Advances in manufacturing technology, vol XXXV. IOS Press, Amsterdam, pp 255\u2013260"},{"key":"11368_CR38","unstructured":"Chang AX, Funkhouser T, Guibas L, Hanrahan P, Huang Q, Li Z, Savarese S, Savva M, Song S, Su H, et al. (2015) Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012"},{"key":"11368_CR39","unstructured":"Chang AX, Funkhouser T, Guibas L, Hanrahan P, Huang Q, Li Z, Savarese S, Savva M, Song S, Su H (2015) Shapenet: An information-rich 3d model repository. arXiv preprint arXiv:1512.03012"},{"key":"11368_CR40","doi-asserted-by":"crossref","unstructured":"Chan C, Ginosar S, Zhou T, Efros AA (2019) Everybody dance now. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5933\u20135942","DOI":"10.1109\/ICCV.2019.00603"},{"key":"11368_CR41","doi-asserted-by":"crossref","unstructured":"Chen Y-J, Tsai C-Y, Xu X, Shi Y, Ho T-Y, Huang M, Yuan H, Zhuang J (2021) Ct image denoising with encoder-decoder based graph convolutional networks. In: 2021 IEEE 18th International Symposium on Biomedical Imaging (ISBI), pp. 400\u2013404. IEEE","DOI":"10.1109\/ISBI48211.2021.9433900"},{"key":"11368_CR42","doi-asserted-by":"crossref","unstructured":"Chen X, Girshick R, He K, Doll\u00e1r P (2019) Tensormask: A foundation for dense object segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2061\u20132069","DOI":"10.1109\/ICCV.2019.00215"},{"key":"11368_CR43","unstructured":"Chen L-C, Papandreou G, Schroff F, Adam H (2017) Rethinking atrous convolution for semantic image segmentation. arXiv preprint arXiv:1706.05587"},{"key":"11368_CR44","unstructured":"Chen X, Wang J, Hebert M (2020) Panonet: Real-time panoptic segmentation through position-sensitive feature embedding. arXiv preprint arXiv:2008.00192"},{"key":"11368_CR45","doi-asserted-by":"crossref","unstructured":"Chen L-C, Zhu Y, Papandreou G, Schroff F, Adam H (2018) Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 801\u2013818","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"11368_CR46","unstructured":"Chen Y (2015) Convolutional neural network for sentence classification. Master\u2019s thesis, University of Waterloo"},{"key":"11368_CR47","doi-asserted-by":"crossref","unstructured":"Chen H, Wang Y Guo T Xu C Deng Y, Liu Z, Ma S, Xu C, Xu C, Gao W (2021) Pre-trained image processing transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12299\u201312310","DOI":"10.1109\/CVPR46437.2021.01212"},{"issue":"10","key":"11368_CR48","doi-asserted-by":"publisher","first-page":"1252","DOI":"10.1016\/j.patrec.2007.02.009","volume":"28","author":"H Chen","year":"2007","unstructured":"Chen H, Bhanu B (2007) 3d free-form object recognition in range images using local surface patches. Pattern Recogn Lett 28(10):1252\u20131262","journal-title":"Pattern Recogn Lett"},{"issue":"6","key":"11368_CR49","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TPAMI.2016.2596743","volume":"39","author":"Y Chen","year":"2016","unstructured":"Chen Y, Pock T (2016) Trainable nonlinear reaction diffusion: A flexible framework for fast and effective image restoration. IEEE Trans Pattern Anal Mach Intell 39(6):1256\u20131272","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1","key":"11368_CR50","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1109\/TAFFC.2016.2593719","volume":"9","author":"J Chen","year":"2016","unstructured":"Chen J, Chen Z, Chi Z, Fu H (2016) Facial expression recognition in video with multiple feature fusion. IEEE Trans Affect Comput 9(1):38\u201350","journal-title":"IEEE Trans Affect Comput"},{"issue":"4","key":"11368_CR51","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"11368_CR52","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"L-C Chen","year":"2017","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR53","doi-asserted-by":"crossref","unstructured":"Chen J, Chen J, Chao H, Yang M (2018) Image blind denoising with generative adversarial network based noise modeling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3155\u20133164","DOI":"10.1109\/CVPR.2018.00333"},{"key":"11368_CR54","doi-asserted-by":"crossref","unstructured":"Chen J, Chen J, Chao H, Yang M (2018) Image blind denoising with generative adversarial network based noise modeling. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3155\u20133164","DOI":"10.1109\/CVPR.2018.00333"},{"key":"11368_CR55","doi-asserted-by":"crossref","unstructured":"Cheng B, Collins MD, Zhu Y, Liu T, Huang TS, Adam H, Chen L-C (2020) Panoptic-deeplab: A simple, strong, and fast baseline for bottom-up panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12475\u201312485","DOI":"10.1109\/CVPR42600.2020.01249"},{"key":"11368_CR56","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIM.2020.2991573","volume":"70","author":"X Cheng","year":"2020","unstructured":"Cheng X, Yu J (2020) Retinanet with difference channel attention and adaptively spatial feature fusion for steel surface defect detection. IEEE Trans Instrum Meas 70:1\u201311","journal-title":"IEEE Trans Instrum Meas"},{"key":"11368_CR57","doi-asserted-by":"crossref","unstructured":"Chen S, Sun P, Song Y, Luo P (2023) Diffusiondet: Diffusion model for object detection In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 19830\u201319843","DOI":"10.1109\/ICCV51070.2023.01816"},{"key":"11368_CR58","doi-asserted-by":"crossref","unstructured":"Chen Z, Sun K, Zhou Z, Lin X, Sun X, Cao L, Ji R (2024) Diffusionface: Towards a comprehensive dataset for diffusion-based face forgery analysis. arXiv preprint arXiv:2403.18471","DOI":"10.2139\/ssrn.5245709"},{"key":"11368_CR59","unstructured":"Chetlur S, Woolley C, Vandermersch P, Cohen J, Tran J, Catanzaro B, Shelhamer E (2014) cudnn: Efficient primitives for deep learning arXiv preprint arXiv:1410.0759"},{"key":"11368_CR60","doi-asserted-by":"crossref","unstructured":"Cho S-J, Ji S-W, Hong J-P, Jung S-W, Ko S-J (2021) Rethinking coarse-to-fine approach in single image deblurring. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4641\u20134650","DOI":"10.1109\/ICCV48922.2021.00460"},{"key":"11368_CR61","doi-asserted-by":"crossref","unstructured":"Choy CB, Xu D, Gwak J, Chen K, Savarese S (2016) 3d-r2n2: A unified approach for single and multi-view 3d object reconstruction. In: Computer vision\u2013ECCV 2016: 14th European Conference, Amsterdam, the Netherlands, October 11\u201314, 2016, Proceedings, Part VIII 14, pp. 628\u2013644. Springer","DOI":"10.1007\/978-3-319-46484-8_38"},{"key":"11368_CR62","unstructured":"Ciresan DC, Meier U, Masci J, Gambardella LM, Schmidhuber J (2011) Flexible, high performance convolutional neural networks for image classification. In: Twenty-second International Joint Conference on Artificial Intelligence . Citeseer"},{"key":"11368_CR63","unstructured":"Clauset A (2011) A brief primer on probability distributions. In: Santa Fe Institute"},{"key":"11368_CR64","doi-asserted-by":"crossref","unstructured":"Cordts M, Omran M, Ramos S, Rehfeld T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2016) The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223","DOI":"10.1109\/CVPR.2016.350"},{"issue":"1","key":"11368_CR65","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","volume":"13","author":"T Cover","year":"1967","unstructured":"Cover T, Hart P (1967) Nearest neighbor pattern classification. IEEE Trans Inf Theory 13(1):21\u201327","journal-title":"IEEE Trans Inf Theory"},{"issue":"9","key":"11368_CR66","doi-asserted-by":"publisher","first-page":"10850","DOI":"10.1109\/TPAMI.2023.3261988","volume":"45","author":"F-A Croitoru","year":"2023","unstructured":"Croitoru F-A, Hondru V, Ionescu RT, Shah M (2023) Diffusion models in vision: A survey. IEEE Trans Pattern Anal Mach Intell 45(9):10850\u201310869","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR67","first-page":"1","volume":"70","author":"L Cui","year":"2021","unstructured":"Cui L, Jiang X, Xu M, Li W, Lv P, Zhou B (2021) Sddnet: A fast and accurate network for surface defect detection. IEEE Trans Instrum Meas 70:1\u201313","journal-title":"IEEE Trans Instrum Meas"},{"issue":"8","key":"11368_CR68","doi-asserted-by":"publisher","first-page":"2080","DOI":"10.1109\/TIP.2007.901238","volume":"16","author":"K Dabov","year":"2007","unstructured":"Dabov K, Foi A, Katkovnik V, Egiazarian K (2007) Image denoising by sparse 3-d transform-domain collaborative filtering. IEEE Trans Image Process 16(8):2080\u20132095","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR69","doi-asserted-by":"crossref","unstructured":"Dai T, Cai J, Zhang Y, Xia S-T, Zhang L (2019) Second-order attention network for single image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11065\u201311074","DOI":"10.1109\/CVPR.2019.01132"},{"key":"11368_CR70","unstructured":"Dai A, Chang AX, Savva M, Halber M, Funkhouser T, Nie\u00dfner M (2047) Scannet: Richly-annotated 3d reconstructions of indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5828\u20135839"},{"key":"11368_CR71","doi-asserted-by":"crossref","unstructured":"Dai J, He K, Sun J (2016) Instance-aware semantic segmentation via multi-task network cascades. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3150\u20133158","DOI":"10.1109\/CVPR.2016.343"},{"key":"11368_CR72","unstructured":"Dai J, Li Y, He K, Sun J (2016) R-fcn: Object detection via region-based fully convolutional networks. In: Advances in neural information processing systems 29"},{"issue":"4","key":"11368_CR73","doi-asserted-by":"publisher","first-page":"1715","DOI":"10.1109\/TIP.2011.2176954","volume":"21","author":"A Danielyan","year":"2011","unstructured":"Danielyan A, Katkovnik V, Egiazarian K (2011) Bm3d frames and variational image deblurring. IEEE Trans Image Process 21(4):1715\u20131728","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR74","unstructured":"DasGupta B, Schnitger G (1992) The power of approximating: a comparison of activation functions. In: Advances in neural information processing systems 5"},{"key":"11368_CR75","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"P-T De Boer","year":"2005","unstructured":"De Boer P-T, Kroese DP, Mannor S, Rubinstein RY (2005) A tutorial on the cross-entropy method. Ann Oper Res 134:19\u201367","journal-title":"Ann Oper Res"},{"key":"11368_CR76","unstructured":"Denton EL, Chintala S, Fergus R (2015) Deep generative image models using a laplacian pyramid of adversarial networks. In: Advances in neural information processing systems 28"},{"issue":"1","key":"11368_CR77","first-page":"47","volume":"6","author":"B Devereux","year":"2004","unstructured":"Devereux B, Amable G, Posada CC (2004) An efficient image segmentation algorithm for landscape analysis. Int J Appl Earth Obs Geoinf 6(1):47\u201361","journal-title":"Int J Appl Earth Obs Geoinf"},{"key":"11368_CR78","doi-asserted-by":"crossref","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) Bert: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (long and Short Papers), pp. 4171\u20134186","DOI":"10.18653\/v1\/N19-1423"},{"key":"11368_CR79","doi-asserted-by":"crossref","unstructured":"Dong C, Loy CC, Tang X (2016) Accelerating the super-resolution convolutional neural network. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part II 14, pp. 391\u2013407. Springer","DOI":"10.1007\/978-3-319-46475-6_25"},{"issue":"4","key":"11368_CR80","doi-asserted-by":"publisher","first-page":"1620","DOI":"10.1109\/TIP.2012.2235847","volume":"22","author":"W Dong","year":"2012","unstructured":"Dong W, Zhang L, Shi G, Li X (2012) Nonlocally centralized sparse representation for image restoration. IEEE Trans Image Process 22(4):1620\u20131630","journal-title":"IEEE Trans Image Process"},{"issue":"2","key":"11368_CR81","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1109\/TPAMI.2015.2439281","volume":"38","author":"C Dong","year":"2015","unstructured":"Dong C, Loy CC, He K, Tang X (2015) Image super-resolution using deep convolutional networks. IEEE Trans Pattern Anal Mach Intell 38(2):295\u2013307","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"12","key":"11368_CR82","doi-asserted-by":"publisher","first-page":"7448","DOI":"10.1109\/TII.2019.2958826","volume":"16","author":"H Dong","year":"2019","unstructured":"Dong H, Song K, He Y, Xu J, Yan Y, Meng Q (2019) Pga-net: Pyramid feature fusion and global context attention network for automated surface defect detection. IEEE Trans Ind Inf 16(12):7448\u20137458","journal-title":"IEEE Trans Ind Inf"},{"issue":"12","key":"11368_CR83","doi-asserted-by":"publisher","first-page":"9960","DOI":"10.1109\/TPAMI.2021.3138787","volume":"44","author":"J Dong","year":"2021","unstructured":"Dong J, Roth S, Schiele B (2021) Dwdn: Deep wiener deconvolution network for non-blind image deblurring. IEEE Trans Pattern Anal Mach Intell 44(12):9960\u20139976","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR84","doi-asserted-by":"crossref","unstructured":"Dong C, Deng Y, Loy CC, Tang X (2015) Compression artifacts reduction by a deep convolutional network. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 576\u2013584","DOI":"10.1109\/ICCV.2015.73"},{"key":"11368_CR85","doi-asserted-by":"crossref","unstructured":"Dong C, Loy CC, Tang X (2016) Accelerating the super-resolution convolutional neural network. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part II 14, pp. 391\u2013407. Springer","DOI":"10.1007\/978-3-319-46475-6_25"},{"key":"11368_CR86","unstructured":"Dosovitskiy A (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"issue":"2","key":"11368_CR87","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2020.100019","volume":"1","author":"JA Dunnmon","year":"2020","unstructured":"Dunnmon JA, Ratner AJ, Saab K, Khandwala N, Markert M, Sagreiya H, Goldman R, Lee-Messer C, Lungren MP, Rubin DL et al (2020) (2020) Cross-modal data programming enables rapid medical machine learning. Patterns 1(2):100019","journal-title":"Patterns"},{"issue":"3","key":"11368_CR88","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1016\/S0959-440X(96)80056-X","volume":"6","author":"SR Eddy","year":"1996","unstructured":"Eddy SR (1996) Hidden Markov models. Curr Opin Struct Biol 6(3):361\u2013365","journal-title":"Curr Opin Struct Biol"},{"key":"11368_CR89","doi-asserted-by":"crossref","unstructured":"Esser P, Sutter E, Ommer B (2018) A variational u-net for conditional appearance and shape generation In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8857\u20138866","DOI":"10.1109\/CVPR.2018.00923"},{"key":"11368_CR90","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vis 88:303\u2013338","journal-title":"Int J Comput Vis"},{"key":"11368_CR91","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vis 88:303\u2013338","journal-title":"Int J Comput Vis"},{"key":"11368_CR92","doi-asserted-by":"crossref","unstructured":"Faltings F, Galley M, Peng B, Brantley K, Cai W, Zhang Y, Gao J, Dolan B (2023) Interactive text generation. arXiv preprint arXiv:2303.00908","DOI":"10.18653\/v1\/2023.emnlp-main.270"},{"key":"11368_CR93","unstructured":"Fan Q, Panda R, et al. (2021) Can an image classifier suffice for action recognition? arXiv preprint arXiv:2106.14104"},{"key":"11368_CR94","first-page":"1","volume":"19","author":"H Fang","year":"2021","unstructured":"Fang H, Xia M, Zhou G, Chang Y, Yan L (2021) Infrared small uav target detection based on residual image prediction via global and local dilated residual networks. IEEE Geosci Remote Sens Lett 19:1\u20135","journal-title":"IEEE Geosci Remote Sens Lett"},{"issue":"6","key":"11368_CR95","doi-asserted-by":"publisher","first-page":"1624","DOI":"10.1109\/TCSS.2021.3132040","volume":"9","author":"K Fang","year":"2022","unstructured":"Fang K, Lu W, Zhou X, Xu J, Mao K (2022) A multitarget interested region extraction method for wrist x-ray images based on optimized alexnet and two-class combined model. IEEE Trans Comput Soc Syst 9(6):1624\u20131634. https:\/\/doi.org\/10.1109\/TCSS.2021.3132040","journal-title":"IEEE Trans Comput Soc Syst"},{"key":"11368_CR96","doi-asserted-by":"crossref","unstructured":"Fang H, Han B, Zhang S, Zhou S, Hu C, Ye W-M (2024) Data augmentation for object detection via controllable diffusion models. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1257\u20131266","DOI":"10.1109\/WACV57701.2024.00129"},{"key":"11368_CR97","doi-asserted-by":"publisher","first-page":"26756","DOI":"10.1109\/ACCESS.2022.3156598","volume":"10","author":"AP Fard","year":"2022","unstructured":"Fard AP, Mahoor MH (2022) Ad-corre: Adaptive correlation-based loss for facial expression recognition in the wild. IEEE Access 10:26756\u201326768","journal-title":"IEEE Access"},{"key":"11368_CR98","doi-asserted-by":"crossref","unstructured":"Farzaneh AH, Qi X (2021) Facial expression recognition in the wild via deep attentive center loss. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2402\u20132411","DOI":"10.1109\/WACV48630.2021.00245"},{"key":"11368_CR99","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Pinz A, Zisserman A (2016) Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1933\u20131941","DOI":"10.1109\/CVPR.2016.213"},{"key":"11368_CR100","doi-asserted-by":"publisher","DOI":"10.1016\/j.infrared.2022.104470","volume":"127","author":"H F\u0131rat","year":"2022","unstructured":"F\u0131rat H, Asker ME, Bayindir M\u0130, Hanbay D (2022) Spatial-spectral classification of hyperspectral remote sensing images using 3d cnn based lenet-5 architecture. Infrared Phys Technol 127:104470","journal-title":"Infrared Phys Technol"},{"key":"11368_CR101","unstructured":"Franzen, R.: Kodak lossless true color image suite. source: http:\/\/r0k.us\/graphics\/kodak 4(2), 9 (1999)"},{"key":"11368_CR102","doi-asserted-by":"crossref","unstructured":"Freeman I, Roese-Koerner L, Kummert A (2018) Effnet: An efficient structure for convolutional neural networks. In: 2018 25th IEEE International Conference on Image Processing (icip), pp. 6\u201310. IEEE","DOI":"10.1109\/ICIP.2018.8451339"},{"key":"11368_CR103","unstructured":"Frome A, Corrado GS, Shlens J, Bengio S, Dean J, Ranzato M, Mikolov T (2013) Devise: A deep visual-semantic embedding model. In: Advances in neural information processing systems 26"},{"key":"11368_CR104","unstructured":"Fu C-Y, Liu W, Ranga A, Tyagi A, Berg AC (2017) Dssd: Deconvolutional single shot detector. arXiv preprint arXiv:1701.06659"},{"issue":"17","key":"11368_CR105","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1016\/j.ifacol.2018.08.059","volume":"51","author":"L Fu","year":"2018","unstructured":"Fu L, Feng Y, Majeed Y, Zhang X, Zhang J, Karkee M, Zhang Q (2018) Kiwifruit detection in field images using faster r-cnn with zfnet. IFAC-PapersOnLine 51(17):45\u201350","journal-title":"IFAC-PapersOnLine"},{"key":"11368_CR106","doi-asserted-by":"crossref","unstructured":"Gallagher AC, Chen T (2009) Understanding images of groups of people In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 256\u2013263 . IEEE","DOI":"10.1109\/CVPRW.2009.5206828"},{"key":"11368_CR107","doi-asserted-by":"crossref","unstructured":"Gao G, Wang Z, Li J, Li W, Yu Y, Zeng T (2022) Lightweight bimodal network for single-image super-resolution via symmetric cnn and recursive transformer. arXiv preprint arXiv:2204.13286 (2022)","DOI":"10.24963\/ijcai.2022\/128"},{"key":"11368_CR108","doi-asserted-by":"publisher","DOI":"10.1016\/j.compind.2022.103689","volume":"140","author":"L Gao","year":"2022","unstructured":"Gao L, Zhang J, Yang C, Zhou Y (2022) Cas-vswin transformer: A variant swin transformer for surface-defect detection. Comput Ind 140:103689","journal-title":"Comput Ind"},{"key":"11368_CR109","doi-asserted-by":"publisher","first-page":"22060","DOI":"10.1109\/ACCESS.2023.3252009","volume":"11","author":"R Garcia-Martin","year":"2023","unstructured":"Garcia-Martin R, Sanchez-Reillo R (2023) Vision transformers for vein biometric recognition. IEEE Access 11:22060\u201322080","journal-title":"IEEE Access"},{"issue":"14\u201315","key":"11368_CR110","doi-asserted-by":"publisher","first-page":"2627","DOI":"10.1016\/S1352-2310(97)00447-0","volume":"32","author":"MW Gardner","year":"1998","unstructured":"Gardner MW, Dorling S (1998) Artificial neural networks (the multilayer perceptron)\u2013a review of applications in the atmospheric sciences. Atmos Environ 32(14\u201315):2627\u20132636","journal-title":"Atmos Environ"},{"key":"11368_CR111","doi-asserted-by":"crossref","unstructured":"Garibi D, Patashnik O, Voynov A, Averbuch-Elor H, Cohen-Or D (2024) Renoise: Real image inversion through iterative noising. arXiv preprint arXiv:2403.14602","DOI":"10.1007\/978-3-031-72630-9_23"},{"issue":"2","key":"11368_CR112","doi-asserted-by":"publisher","first-page":"3216","DOI":"10.1109\/LRA.2021.3060405","volume":"6","author":"S Gasperini","year":"2021","unstructured":"Gasperini S, Mahani M-AN, Marcos-Ramiro A, Navab N, Tombari F (2021) Panoster: End-to-end panoptic segmentation of lidar point clouds. IEEE Robot Autom Lett 6(2):3216\u20133223","journal-title":"IEEE Robot Autom Lett"},{"issue":"11","key":"11368_CR113","doi-asserted-by":"publisher","first-page":"1231","DOI":"10.1177\/0278364913491297","volume":"32","author":"A Geiger","year":"2013","unstructured":"Geiger A, Lenz P, Stiller C, Urtasun R (2013) Vision meets robotics: The kitti dataset. Int J Robot Res 32(11):1231\u20131237","journal-title":"Int J Robot Res"},{"key":"11368_CR114","unstructured":"Ge Z, Liu S, Wang F, Li Z, Sun J (2021) Yolox: Exceeding yolo series in 2021. arXiv preprint arXiv:2107.08430"},{"key":"11368_CR115","doi-asserted-by":"crossref","unstructured":"Gholami, A., Kwon, K., Wu, B., Tai, Z., Yue, X., Jin, P., Zhao, S., Keutzer, K.: Squeezenext: Hardware-aware neural network design. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 1638\u20131647 (2018)","DOI":"10.1109\/CVPRW.2018.00215"},{"key":"11368_CR116","first-page":"1","volume-title":"Advances in hybridization of intelligent methods: Models, systems and applications","author":"P Giannopoulos","year":"2018","unstructured":"Giannopoulos P, Perikos I, Hatzilygeroudis I (2018) Deep learning approaches for facial emotion recognition: A case study on fer-2013. Advances in hybridization of intelligent methods: Models, systems and applications. Springer, Cham, pp 1\u201316"},{"key":"11368_CR117","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. arXiv preprint arXiv:1504.08083","DOI":"10.1109\/ICCV.2015.169"},{"key":"11368_CR118","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"11368_CR119","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"11368_CR120","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"11368_CR121","unstructured":"Goodfellow I (2016) Nips 2016 tutorial: Generative adversarial networks. arXiv preprint arXiv:1701.00160"},{"key":"11368_CR122","doi-asserted-by":"crossref","unstructured":"Gould S, Fulton R, Koller D (2019) Decomposing a scene into geometric and semantically consistent regions. In: 2009 IEEE 12th International Conference on Computer Vision, pp. 1\u20138. IEEE","DOI":"10.1109\/ICCV.2009.5459211"},{"issue":"6","key":"11368_CR123","doi-asserted-by":"publisher","first-page":"1321","DOI":"10.1109\/TED.2007.896718","volume":"54","author":"RD Gow","year":"2007","unstructured":"Gow RD, Renshaw D, Findlater K, Grant L, McLeod SJ, Hart J, Nicol RL (2007) A comprehensive tool for modeling cmos image-sensor-noise performance. IEEE Trans Electron Devices 54(6):1321\u20131329","journal-title":"IEEE Trans Electron Devices"},{"key":"11368_CR124","doi-asserted-by":"crossref","unstructured":"Gowda SN, Yuan C (2019) Colornet: Investigating the importance of color spaces for image classification. In: Computer Vision\u2013ACCV 2018: 14th Asian Conference on Computer Vision, Perth, Australia, December 2\u20136, 2018, Revised Selected Papers, Part IV 14, pp. 581\u2013596. Springer","DOI":"10.1007\/978-3-030-20870-7_36"},{"key":"11368_CR125","unstructured":"Graham B (2014) Fractional max-pooling. arXiv preprint arXiv:1412.6071"},{"key":"11368_CR126","doi-asserted-by":"crossref","unstructured":"Graves A, Mohamed A-r, Hinton G (2013) Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 6645\u20136649. IEEE","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"11368_CR127","doi-asserted-by":"crossref","unstructured":"Gu S, Zhang L, Zuo W, Feng X (2014) Weighted nuclear norm minimization with application to image denoising. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2862\u20132869","DOI":"10.1109\/CVPR.2014.366"},{"key":"11368_CR128","unstructured":"Gulrajani I, Ahmed F, Arjovsky M, Dumoulin V, Courville AC (2017) Improved training of wasserstein gans. In: Advances in neural information processing systems 30"},{"issue":"22","key":"11368_CR129","doi-asserted-by":"publisher","first-page":"2402","DOI":"10.1001\/jama.2016.17216","volume":"316","author":"V Gulshan","year":"2016","unstructured":"Gulshan V, Peng L, Coram M, Stumpe MC, Wu D, Narayanaswamy A, Venugopalan S, Widner K, Madams T, Cuadros J (2016) Development and validation of a deep learning algorithm for detection of diabetic retinopathy in retinal fundus photographs. JAMA 316(22):2402\u20132410","journal-title":"JAMA"},{"key":"11368_CR130","doi-asserted-by":"crossref","unstructured":"Guo C, Li C, Guo J, Loy CC, Hou J, Kwong S, Cong R (2020) Zero-reference deep curve estimation for low-light image enhancement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1780\u20131789","DOI":"10.1109\/CVPR42600.2020.00185"},{"key":"11368_CR131","doi-asserted-by":"crossref","unstructured":"Guo S, Yan Z, Zhang K, Zuo W, Zhang L (2019) Toward convolutional blind denoising of real photographs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1712\u20131722","DOI":"10.1109\/CVPR.2019.00181"},{"key":"11368_CR132","doi-asserted-by":"publisher","first-page":"31742","DOI":"10.1109\/ACCESS.2021.3061062","volume":"9","author":"J Gurrola-Ramos","year":"2021","unstructured":"Gurrola-Ramos J, Dalmau O, Alarc\u00f3n TE (2021) A residual dense u-net neural network for image denoising. IEEE Access 9:31742\u201331754","journal-title":"IEEE Access"},{"key":"11368_CR133","doi-asserted-by":"crossref","unstructured":"Hackel T, Savinov N, Ladicky L, Wegner JD, Schindler K, Pollefeys M (2017) Semantic3d. net: A new large-scale point cloud classification benchmark. arXiv preprint arXiv:1704.03847","DOI":"10.5194\/isprs-annals-IV-1-W1-91-2017"},{"key":"11368_CR134","doi-asserted-by":"crossref","unstructured":"Hadji I, Derpanis KG, Jepson AD (2021) Representation learning via global temporal alignment and cycle-consistency. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11068\u201311077","DOI":"10.1109\/CVPR46437.2021.01092"},{"issue":"3","key":"11368_CR135","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/s13735-020-00195-x","volume":"9","author":"AM Hafiz","year":"2020","unstructured":"Hafiz AM, Bhat GM (2020) A survey on instance segmentation: state of the art. Int J Multimed Inf Retr 9(3):171\u2013189","journal-title":"Int J Multimed Inf Retr"},{"key":"11368_CR136","doi-asserted-by":"crossref","unstructured":"Hamdi A, Giancola S, Ghanem B (2021) Mvtn: Multi-view transformation network for 3d shape recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1\u201311","DOI":"10.1109\/ICCV48922.2021.00007"},{"key":"11368_CR137","doi-asserted-by":"crossref","unstructured":"Han X, Tang Y, Wang Z, Li X (2024) Mamba3d: Enhancing local features for 3d point cloud analysis via state space model. In: Proceedings of the 32nd ACM International Conference on Multimedia, pp. 4995\u20135004","DOI":"10.1145\/3664647.3681173"},{"issue":"4","key":"11368_CR138","doi-asserted-by":"publisher","first-page":"624","DOI":"10.1109\/TMSCS.2018.2865303","volume":"4","author":"B Han","year":"2018","unstructured":"Han B, Roy K (2018) Deltaframe-bp: An algorithm using frame difference for deep convolutional neural networks training and inference on video data. IEEE Trans Multi-Scale Comput Syst 4(4):624\u2013634","journal-title":"IEEE Trans Multi-Scale Comput Syst"},{"key":"11368_CR139","doi-asserted-by":"crossref","unstructured":"Haris M, Shakhnarovich G, Ukita N (2018) Deep back-projection networks for super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1664\u20131673","DOI":"10.1109\/CVPR.2018.00179"},{"key":"11368_CR140","doi-asserted-by":"crossref","unstructured":"Hassanpour M, Malek H (2019) Document image classification using squeezenet convolutional neural network. In: 2019 5th Iranian Conference on Signal Processing and Intelligent Systems (ICSPIS), pp. 1\u20134. IEEE","DOI":"10.1109\/ICSPIS48872.2019.9066032"},{"key":"11368_CR141","doi-asserted-by":"crossref","unstructured":"He J, Deng Z, Qiao Y (2019a) Dynamic multi-scale filters for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3562\u20133572","DOI":"10.1109\/ICCV.2019.00366"},{"key":"11368_CR142","doi-asserted-by":"crossref","unstructured":"He J, Deng Z, Zhou L, Wang Y, Qiao Y (2019b) Adaptive pyramid context network for semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7519\u20137528","DOI":"10.1109\/CVPR.2019.00770"},{"key":"11368_CR143","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"11368_CR144","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"9","key":"11368_CR145","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans Pattern Anal Mach Intell 37(9):1904\u20131916","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"9","key":"11368_CR146","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans Pattern Anal Mach Intell 37(9):1904\u20131916","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"11368_CR147","doi-asserted-by":"publisher","first-page":"1493","DOI":"10.1109\/TIM.2019.2915404","volume":"69","author":"Y He","year":"2019","unstructured":"He Y, Song K, Meng Q, Yan Y (2019) An end-to-end steel surface defect detection approach via fusing multiple hierarchical features. IEEE Trans Instrum Meas 69(4):1493\u20131504","journal-title":"IEEE Trans Instrum Meas"},{"key":"11368_CR148","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2022.3230846","volume":"60","author":"X He","year":"2022","unstructured":"He X, Zhou Y, Zhao J, Zhang D, Yao R, Xue Y (2022) Swin transformer embedding unet for remote sensing image semantic segmentation. IEEE Trans Geosci Remote Sens 60:1\u201315","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11368_CR149","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"11368_CR150","doi-asserted-by":"crossref","unstructured":"Herzig R, Ben-Avraham E, Mangalam K, Bar A, Chechik G, Rohrbach A, Darrell T, Globerson A (2022) Object-region video transformers. In: Proceedings of the Ieee\/cvf Conference on Computer Vision and Pattern Recognition, pp. 3148\u20133159","DOI":"10.1109\/CVPR52688.2022.00315"},{"key":"11368_CR151","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Delving deep into rectifiers: Surpassing human-level performance on imagenet classification. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1026\u20131034","DOI":"10.1109\/ICCV.2015.123"},{"key":"11368_CR152","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"11368_CR153","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Identity mappings in deep residual networks. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part IV 14, pp. 630\u2013645 . Springer","DOI":"10.1007\/978-3-319-46493-0_38"},{"key":"11368_CR154","unstructured":"Hinton GE, Srivastava N, Krizhevsky A, Sutskever I, Salakhutdinov RR (2012) Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint arXiv:1207.0580"},{"issue":"7","key":"11368_CR155","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Osindero S, Teh Y-W (2006) A fast learning algorithm for deep belief nets. Neural Comput 18(7):1527\u20131554","journal-title":"Neural Comput"},{"issue":"1","key":"11368_CR156","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1016\/0893-6080(91)90032-Z","volume":"4","author":"Y Hirose","year":"1991","unstructured":"Hirose Y, Yamashita K, Hijiya S (1991) Back-propagation algorithm which varies the number of hidden units. Neural Netw 4(1):61\u201366","journal-title":"Neural Netw"},{"key":"11368_CR157","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho J, Jain A, Abbeel P (2020) Denoising diffusion probabilistic models. Adv Neural Inf Process Syst 33:6840\u20136851","journal-title":"Adv Neural Inf Process Syst"},{"issue":"7","key":"11368_CR158","doi-asserted-by":"publisher","first-page":"0298102","DOI":"10.1371\/journal.pone.0298102","volume":"19","author":"S Hong","year":"2024","unstructured":"Hong S, Wu J, Zhu L, Chen W (2024) Brain tumor classification in vit-b\/16 based on relative position encoding and residual mlp. PLoS ONE 19(7):0298102","journal-title":"PLoS ONE"},{"key":"11368_CR159","doi-asserted-by":"crossref","unstructured":"Hoppe H, DeRose T, Duchamp T, McDonald J, Stuetzle W (1992) Surface reconstruction from unorganized points. In: Proceedings of the 19th Annual Conference on Computer Graphics and Interactive Techniques, pp. 71\u201378","DOI":"10.1145\/133994.134011"},{"key":"11368_CR160","doi-asserted-by":"crossref","unstructured":"Hore A, Ziou D (2010) Image quality metrics: Psnr vs. ssim. In: 2010 20th International Conference on Pattern Recognition, pp. 2366\u20132369. IEEE","DOI":"10.1109\/ICPR.2010.579"},{"key":"11368_CR161","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861"},{"key":"11368_CR162","unstructured":"Howard AG (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861"},{"key":"11368_CR163","doi-asserted-by":"crossref","unstructured":"Hu X, Yang K, Fei L, Wang K (2019) Acnet: Attention based network to exploit complementary features for rgbd semantic segmentation. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 1440\u20131444. IEEE","DOI":"10.1109\/ICIP.2019.8803025"},{"key":"11368_CR164","doi-asserted-by":"crossref","unstructured":"Hu Q, Yang B, Xie L, Rosa S, Guo Y, Wang Z, Trigoni N, Markham A (2020) Randla-net: Efficient semantic segmentation of large-scale point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11108\u201311117","DOI":"10.1109\/CVPR42600.2020.01112"},{"key":"11368_CR165","doi-asserted-by":"crossref","unstructured":"Hu L (2024) Animate anyone: Consistent and controllable image-to-video synthesis for character animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8153\u20138163","DOI":"10.1109\/CVPR52733.2024.00779"},{"issue":"5","key":"11368_CR166","doi-asserted-by":"publisher","first-page":"1601","DOI":"10.1109\/JBHI.2020.3023462","volume":"25","author":"P Hu","year":"2020","unstructured":"Hu P, Li X, Tian Y, Tang T, Zhou T, Bai X, Zhu S, Liang T, Li J (2020) Automatic pancreas segmentation in ct images with distance-based saliency-aware denseaspp network. IEEE J Biomed Health Inform 25(5):1601\u20131611","journal-title":"IEEE J Biomed Health Inform"},{"key":"11368_CR167","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"11368_CR168","unstructured":"Huang K, Shi B, Li X, Li X, Huang S, Li Y (2022) Multi-modal sensor fusion for auto driving perception: A survey. arXiv preprint arXiv:2202.02703 (2022)"},{"key":"11368_CR169","doi-asserted-by":"crossref","unstructured":"Huang J-B, Singh A, Ahuja N (2015) Single image super-resolution from transformed self-exemplars. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5197\u20135206","DOI":"10.1109\/CVPR.2015.7299156"},{"key":"11368_CR170","doi-asserted-by":"crossref","unstructured":"Huang Z, Wang X, Huang L, Huang C, Wei Y, Liu W (2019) Ccnet: Criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 603\u2013612","DOI":"10.1109\/ICCV.2019.00069"},{"key":"11368_CR171","unstructured":"Huang Z, Xu W, Yu K (2015) Bidirectional lstm-crf models for sequence tagging. arXiv preprint arXiv:1508.01991"},{"key":"11368_CR172","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"11368_CR173","unstructured":"Hu J, Cao L, Lu Y, Zhang S, Wang Y, Li K, Huang F, Shao L, Ji R (2021) Istr: End-to-end instance segmentation with transformers arXiv preprint arXiv:2105.00637"},{"key":"11368_CR174","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2023.105534","volume":"87","author":"X Huo","year":"2024","unstructured":"Huo X, Sun G, Tian S, Wang Y, Yu L, Long J, Zhang W, Li A (2024) Hifuse: Hierarchical multi-scale feature fusion network for medical image classification. Biomed Signal Process Control 87:105534","journal-title":"Biomed Signal Process Control"},{"key":"11368_CR175","doi-asserted-by":"crossref","unstructured":"Hwang S, Oh SW, Kim SJ (2022) Single-shot path integrated panoptic segmentation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3328\u20133337","DOI":"10.1109\/WACV51458.2022.00200"},{"key":"11368_CR176","unstructured":"Ioffe S (2015) Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167"},{"key":"11368_CR177","doi-asserted-by":"crossref","unstructured":"Isola P, Zhu J-Y, Zhou T, Efros AA (2017) Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134","DOI":"10.1109\/CVPR.2017.632"},{"key":"11368_CR178","doi-asserted-by":"crossref","unstructured":"Janocha K, Czarnecki WM (2017) On loss functions for deep neural networks in classification. arXiv preprint arXiv:1702.05659","DOI":"10.4467\/20838476SI.16.004.6185"},{"key":"11368_CR179","doi-asserted-by":"crossref","unstructured":"Jarrett K, Kavukcuoglu K, Ranzato M, LeCun Y (2009) What is the best multi-stage architecture for object recognition? In: 2009 IEEE 12th International Conference on Computer Vision, pp. 2146\u20132153 . IEEE","DOI":"10.1109\/ICCV.2009.5459469"},{"key":"11368_CR180","doi-asserted-by":"crossref","unstructured":"Jarrett K, Kavukcuoglu K, Ranzato M, LeCun Y (2009) What is the best multi-stage architecture for object recognition? In: 2009 IEEE 12th International Conference on Computer Vision, pp. 2146\u20132153. IEEE","DOI":"10.1109\/ICCV.2009.5459469"},{"issue":"6","key":"11368_CR181","doi-asserted-by":"publisher","first-page":"2676","DOI":"10.1109\/TNNLS.2020.3007534","volume":"32","author":"Y Ji","year":"2020","unstructured":"Ji Y, Zhang H, Jie Z, Ma L, Wu QJ (2020) Casnet: A cross-attention siamese network for video salient object detection. IEEE Trans Neural Netw Learn Syst 32(6):2676\u20132690","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"11368_CR182","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00530-023-01202-x","volume":"30","author":"M Jian","year":"2024","unstructured":"Jian M, Yu X, Zhang H, Yang C (2024) Swinct: feature enhancement based low-dose ct images denoising with swin transformer. Multimed Syst 30(1):1","journal-title":"Multimed Syst"},{"key":"11368_CR183","doi-asserted-by":"publisher","first-page":"4753","DOI":"10.1109\/TII.2023.3316184","volume":"20","author":"B Jiang","year":"2023","unstructured":"Jiang B, Lu Y, Zhang B, Lu G (2023) Agp-net: Adaptive graph prior network for image denoising. IEEE Trans Ind Inform 20:4753\u20134764","journal-title":"IEEE Trans Ind Inform"},{"key":"11368_CR184","doi-asserted-by":"publisher","first-page":"1066","DOI":"10.1016\/j.procs.2022.01.135","volume":"199","author":"P Jiang","year":"2022","unstructured":"Jiang P, Ergu D, Liu F, Cai Y, Ma B (2022) A review of yolo algorithm developments. Procedia Comput Sci 199:1066\u20131073","journal-title":"Procedia Comput Sci"},{"key":"11368_CR185","doi-asserted-by":"publisher","first-page":"7077","DOI":"10.1109\/TSMC.2023.3289873","volume":"53","author":"B Jiang","year":"2023","unstructured":"Jiang B, Lu Y, Chen X, Lu X, Lu G (2023) Graph attention in attention network for image denoising. IEEE Trans Syst Man Cybern Syst 53:7077\u20137088","journal-title":"IEEE Trans Syst Man Cybern Syst"},{"key":"11368_CR186","doi-asserted-by":"crossref","unstructured":"Jia Y, Shelhamer E, Donahue J, Karayev S, Long J, Girshick R, Guadarrama S, Darrell T (2014) Caffe: Convolutional architecture for fast feature embedding. In: Proceedings of the 22nd ACM International Conference on Multimedia, pp. 675\u2013678","DOI":"10.1145\/2647868.2654889"},{"key":"11368_CR187","doi-asserted-by":"crossref","unstructured":"Jin, M, Roth, S, Favaro, P (2017) Noise-blind image deblurring. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3510\u20133518","DOI":"10.1109\/CVPR.2017.408"},{"issue":"7","key":"11368_CR188","doi-asserted-by":"publisher","first-page":"965","DOI":"10.1109\/83.597272","volume":"6","author":"DJ Jobson","year":"1997","unstructured":"Jobson DJ, Rahman Z-U, Woodell GA (1997) A multiscale retinex for bridging the gap between color images and the human observation of scenes. IEEE Trans Image Process 6(7):965\u2013976","journal-title":"IEEE Trans Image Process"},{"issue":"5","key":"11368_CR189","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1109\/34.765655","volume":"21","author":"AE Johnson","year":"1999","unstructured":"Johnson AE, Hebert M (1999) Using spin images for efficient object recognition in cluttered 3d scenes. IEEE Trans Pattern Anal Mach Intell 21(5):433\u2013449","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR190","doi-asserted-by":"crossref","unstructured":"Joshi N, Szeliski R, Kriegman DJ (2008) Psf estimation using sharp edge prediction. In: 2008 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20138. IEEE","DOI":"10.1109\/CVPR.2008.4587834"},{"issue":"2","key":"11368_CR191","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1109\/TRPMS.2018.2860788","volume":"3","author":"VS Kadimesetty","year":"2018","unstructured":"Kadimesetty VS, Gutta S, Ganapathy S, Yalavarthy PK (2018) Convolutional neural network-based robust denoising of low-dose computed tomography perfusion maps. IEEE Trans Radiat Plasma Med Sci 3(2):137\u2013152","journal-title":"IEEE Trans Radiat Plasma Med Sci"},{"key":"11368_CR192","doi-asserted-by":"crossref","unstructured":"Kang K, Park S, Park H, Kang D, Paik J (2023) Action recognition using multi-stream 2d cnn with deep learning-based temporal modality. In: 2023 IEEE International Conference on Consumer Electronics (ICCE), pp. 1\u20133. IEEE","DOI":"10.1109\/ICCE56470.2023.10043568"},{"issue":"2","key":"11368_CR193","doi-asserted-by":"publisher","first-page":"109","DOI":"10.1109\/TCI.2016.2532323","volume":"2","author":"A Kappeler","year":"2016","unstructured":"Kappeler A, Yoo S, Dai Q, Katsaggelos AK (2016) Video super-resolution with convolutional neural networks. IEEE Trans Comput Imaging 2(2):109\u2013122","journal-title":"IEEE Trans Comput Imaging"},{"key":"11368_CR194","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L (2015) Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3128\u20133137 (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"11368_CR195","unstructured":"Karras T (2017) Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196"},{"key":"11368_CR196","doi-asserted-by":"crossref","unstructured":"Kassani SH, Kassani PH, Khazaeinezhad R, Wesolowski MJ, Schneider KA, Deters R (2019) Diabetic retinopathy classification using a modified xception architecture. In: 2019 IEEE International Symposium on Signal Processing and Information Technology (ISSPIT), pp. 1\u20136. IEEE","DOI":"10.1109\/ISSPIT47144.2019.9001790"},{"key":"11368_CR197","unstructured":"Kazhdan M, Bolitho M, Hoppe H (20060 Poisson surface reconstruction. In: Proceedings of the Fourth Eurographics Symposium on Geometry Processing, vol. 7"},{"key":"11368_CR198","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/978-1-4842-2766-4_7","volume-title":"Deep learning with python: a hands-on introduction","author":"N Ketkar","year":"2017","unstructured":"Ketkar N, Ketkar N (2017) Introduction to keras. Deep learning with python: a hands-on introduction. Apress, Berkeley, pp 97\u2013111"},{"issue":"5","key":"11368_CR199","doi-asserted-by":"publisher","first-page":"7757","DOI":"10.1007\/s11042-022-13569-6","volume":"82","author":"A Khmag","year":"2023","unstructured":"Khmag A (2023) Additive gaussian noise removal based on generative adversarial network model and semi-soft thresholding approach. Multimed Tools Appl 82(5):7757\u20137777","journal-title":"Multimed Tools Appl"},{"key":"11368_CR200","doi-asserted-by":"crossref","unstructured":"Kim K, Lee S, Cho S (2022) Mssnet: Multi-scale-stage network for single image deblurring. In: European Conference on Computer Vision, pp. 524\u2013539. Springer","DOI":"10.1007\/978-3-031-25063-7_32"},{"key":"11368_CR201","doi-asserted-by":"crossref","unstructured":"Kim D, Woo S, Lee J-Y, Kweon IS (2019) Deep video inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5792\u20135801","DOI":"10.1109\/CVPR.2019.00594"},{"issue":"4","key":"11368_CR202","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3197517.3201283","volume":"37","author":"H Kim","year":"2018","unstructured":"Kim H, Garrido P, Tewari A, Xu W, Thies J, Niessner M, P\u00e9rez P, Richardt C, Zollh\u00f6fer M, Theobalt C (2018) Deep video portraits. ACM Trans Graph (TOG) 37(4):1\u201314","journal-title":"ACM Trans Graph (TOG)"},{"key":"11368_CR204","doi-asserted-by":"crossref","unstructured":"Kim J, Lee JK, Lee KM (2016a) Deeply-recursive convolutional network for image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1637\u20131645","DOI":"10.1109\/CVPR.2016.181"},{"key":"11368_CR203","doi-asserted-by":"crossref","unstructured":"Kim J, Lee JK, Lee KM (2016b) Accurate image super-resolution using very deep convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1646\u20131654","DOI":"10.1109\/CVPR.2016.182"},{"key":"11368_CR205","doi-asserted-by":"crossref","unstructured":"Kim K, Lee S, Cho S (2022) Mssnet: Multi-scale-stage network for single image deblurring. In: European Conference on Computer Vision, pp. 524\u2013539. Springer","DOI":"10.1007\/978-3-031-25063-7_32"},{"key":"11368_CR206","unstructured":"Kipf TN, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907"},{"key":"11368_CR207","unstructured":"Kipf TN, Welling M (2016) Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907"},{"key":"11368_CR208","doi-asserted-by":"crossref","unstructured":"Kirillov A, Girshick R, He K, Doll\u00e1r P (2019) Panoptic feature pyramid networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6399\u20136408","DOI":"10.1109\/CVPR.2019.00656"},{"key":"11368_CR209","doi-asserted-by":"crossref","unstructured":"Kirillov A, He K, Girshick R, Rother C, Doll\u00e1r P (2019) Panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9404\u20139413","DOI":"10.1109\/CVPR.2019.00963"},{"key":"11368_CR210","doi-asserted-by":"crossref","unstructured":"Kitaev N, Klein D (2018) Constituency parsing with a self-attentive encoder. arXiv preprint arXiv:1805.01052","DOI":"10.18653\/v1\/P18-1249"},{"key":"11368_CR211","doi-asserted-by":"crossref","unstructured":"Kobashi S, Kuramoto K, Hata Y (2013) Interactive fuzzy connectedness image segmentation for neonatal brain mr image segmentation. In: 2013 IEEE International Conference on Systems, Man, and Cybernetics, pp. 1799\u20131804. IEEE","DOI":"10.1109\/SMC.2013.311"},{"key":"11368_CR212","doi-asserted-by":"crossref","unstructured":"Kong L, Dong J, Ge J, Li M, Pan J (2023) Efficient frequency domain-based transformers for high-quality image deblurring. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5886\u20135895","DOI":"10.1109\/CVPR52729.2023.00570"},{"key":"11368_CR213","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems 25"},{"key":"11368_CR215","doi-asserted-by":"crossref","unstructured":"Kruse J, Rother C, Schmidt U (2017) Learning to push the limits of efficient fft-based image deconvolution. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4586\u20134594","DOI":"10.1109\/ICCV.2017.491"},{"key":"11368_CR216","doi-asserted-by":"crossref","unstructured":"Kupyn O, Budzan V, Mykhailych M, Mishkin D, Matas J (2018) Deblurgan: Blind motion deblurring using conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8183\u20138192","DOI":"10.1109\/CVPR.2018.00854"},{"key":"11368_CR217","doi-asserted-by":"crossref","unstructured":"Kupyn O, Martyniuk T, Wu J, Wang Z (2019) Deblurgan-v2: Deblurring (orders-of-magnitude) faster and better. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8878\u20138887","DOI":"10.1109\/ICCV.2019.00897"},{"key":"11368_CR218","doi-asserted-by":"crossref","unstructured":"Lai W-S, Huang J-B, Ahuja N, Yang M-H (2017) Deep laplacian pyramid networks for fast and accurate super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 624\u2013632","DOI":"10.1109\/CVPR.2017.618"},{"issue":"11","key":"11368_CR219","doi-asserted-by":"publisher","first-page":"2599","DOI":"10.1109\/TPAMI.2018.2865304","volume":"41","author":"W-S Lai","year":"2018","unstructured":"Lai W-S, Huang J-B, Ahuja N, Yang M-H (2018) Fast and accurate image super-resolution with deep laplacian pyramid networks. IEEE Trans Pattern Anal Mach Intell 41(11):2599\u20132613","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR220","doi-asserted-by":"publisher","first-page":"2999","DOI":"10.1109\/TNNLS.2023.3340561","volume":"36","author":"Z Lai","year":"2024","unstructured":"Lai Z, Fu Y, Zhang J (2024) Hyperspectral image super resolution with real unaligned rgb guidance. IEEE Trans Neural Netw Learn Syst 36:2999\u20133011","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"1","key":"11368_CR221","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1364\/JOSA.61.000001","volume":"61","author":"EH Land","year":"1971","unstructured":"Land EH, McCann JJ (1971) Lightness and retinex theory. J Opt Soc Am 61(1):1\u201311","journal-title":"J Opt Soc Am"},{"key":"11368_CR222","doi-asserted-by":"crossref","unstructured":"Lan X, Roth S, Huttenlocher D, Black MJ (2006) Efficient belief propagation with learned higher-order markov random fields. In: Computer Vision\u2013ECCV 2006: 9th European Conference on Computer Vision, Graz, Austria, May 7\u201313, 2006. Proceedings, Part II 9, pp. 269\u2013282. Springer","DOI":"10.1007\/11744047_21"},{"key":"11368_CR223","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: Detecting objects as paired keypoints In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"issue":"1","key":"11368_CR224","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1109\/72.554195","volume":"8","author":"S Lawrence","year":"1997","unstructured":"Lawrence S, Giles CL, Tsoi AC, Back AD (1997) Face recognition: A convolutional neural-network approach. IEEE Trans Neural Netw 8(1):98\u2013113","journal-title":"IEEE Trans Neural Netw"},{"key":"11368_CR225","doi-asserted-by":"publisher","first-page":"1","DOI":"10.5201\/ipol.2015.125","volume":"5","author":"M Lebrun","year":"2015","unstructured":"Lebrun M, Colom M, Morel J-M (2015) The noise clinic: a blind image denoising algorithm. Image Process On Line 5:1\u201354","journal-title":"Image Process On Line"},{"key":"11368_CR226","unstructured":"LeCun Y, Touresky D, Hinton G, Sejnowski T (1988) A theoretical framework for back-propagation. In: Proceedings of the 1988 Connectionist Models Summer School, vol. 1, pp. 21\u201328"},{"issue":"4","key":"11368_CR227","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser B, Denker JS, Henderson D, Howard RE, Hubbard W, Jackel LD (1989) Backpropagation applied to handwritten zip code recognition. Neural Comput 1(4):541\u2013551","journal-title":"Neural Comput"},{"issue":"11","key":"11368_CR228","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324","journal-title":"Proc IEEE"},{"key":"11368_CR229","doi-asserted-by":"crossref","unstructured":"Ledig C, Theis L, Husz\u00e1r F, Caballero J, Cunningham A, Acosta A, Aitken A, Tejani A, Totz J, Wang Z, et al (2017) Photo-realistic single image super-resolution using a generative adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4681\u20134690","DOI":"10.1109\/CVPR.2017.19"},{"key":"11368_CR230","doi-asserted-by":"crossref","unstructured":"Ledig C, Theis L, Husz\u00e1r F, Caballero J, Cunningham A, Acosta A, Aitken A, Tejani A, Totz J, Wang Z, et al (2017) Photo-realistic single image super-resolution using a generative adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4681\u20134690","DOI":"10.1109\/CVPR.2017.19"},{"key":"11368_CR231","unstructured":"Lee C-Y, Gallagher PW, Tu Z (2016) Generalizing pooling functions in convolutional neural networks: Mixed, gated, and tree. In: Artificial Intelligence and Statistics, pp. 464\u2013472 . PMLR"},{"issue":"8","key":"11368_CR232","doi-asserted-by":"publisher","first-page":"1090","DOI":"10.1109\/5.533956","volume":"84","author":"D Lee","year":"1996","unstructured":"Lee D, Yannakakis M (1996) Principles and methods of testing finite state machines-a survey. Proc IEEE 84(8):1090\u20131123","journal-title":"Proc IEEE"},{"key":"11368_CR233","first-page":"23580","volume":"35","author":"S Lee","year":"2022","unstructured":"Lee S, Jeon M, Kim I, Xiong Y, Kim HJ (2022) Sagemix: Saliency-guided mixup for point clouds. Adv Neural Inf Process Syst 35:23580\u201323592","journal-title":"Adv Neural Inf Process Syst"},{"key":"11368_CR234","unstructured":"Lee J, Lee I, Kang J (2019) Self-attention graph pooling. In: International Conference on Machine Learning, pp. 3734\u20133743. pmlr"},{"key":"11368_CR235","unstructured":"Lehtinen J, Munkberg J, Hasselgren J, Laine S, Karras T, Aittala M, Aila T (2018) Noise2noise: Learning image restoration without clean data. arXiv preprint arXiv:1803.04189"},{"key":"11368_CR236","doi-asserted-by":"crossref","unstructured":"Li S, Deng W, Du J (2017) Reliable crowdsourcing and deep locality-preserving learning for expression recognition in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2852\u20132861","DOI":"10.1109\/CVPR.2017.277"},{"key":"11368_CR237","doi-asserted-by":"crossref","unstructured":"Li X, Jie Z, Wang W, Liu C, Yang J, Shen X, Lin Z, Chen Q, Yan S, Feng J (2017) Foveanet: Perspective-aware urban scene parsing. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 784\u2013792","DOI":"10.1109\/ICCV.2017.91"},{"key":"11368_CR238","doi-asserted-by":"crossref","unstructured":"Li J, Monroe W, Ritter A, Galley M, Gao J, Jurafsky D (2016) Deep reinforcement learning for dialogue generation. arXiv preprint arXiv:1606.01541","DOI":"10.18653\/v1\/D16-1127"},{"key":"11368_CR239","doi-asserted-by":"crossref","unstructured":"Li G, Muller M, Thabet A, Ghanem B (2019) Deepgcns: Can gcns go as deep as cnns? In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9267\u20139276","DOI":"10.1109\/ICCV.2019.00936"},{"key":"11368_CR240","volume":"1813","author":"S Li","year":"2021","unstructured":"Li S, Wang L, Li J, Yao Y (2021) Image classification algorithm based on improved alexnet. J Phys: Conf Ser 1813:012051","journal-title":"J Phys: Conf Ser"},{"key":"11368_CR241","unstructured":"Li H, Xiong P, An J, Wang L (2018) Pyramid attention network for semantic segmentation. arXiv preprint arXiv:1805.10180"},{"key":"11368_CR242","doi-asserted-by":"crossref","unstructured":"Li X, Zhong Z, Wu J, Yang Y, Lin Z, Liu H (2019) Expectation-maximization attention networks for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9167\u20139176","DOI":"10.1109\/ICCV.2019.00926"},{"key":"11368_CR243","unstructured":"Li, J, Raventos A, Bhargava A, Tagawa T, Gaidon A (2018) Learning to fuse things and stuff. arXiv preprint arXiv:1812.01192"},{"key":"11368_CR244","doi-asserted-by":"publisher","first-page":"4353","DOI":"10.1007\/s00521-018-3354-z","volume":"31","author":"F Li","year":"2019","unstructured":"Li F, Ye Y, Tian Z, Zhang X (2019) Cpu versus gpu: which can perform matrix computation faster\u2013performance comparison for basic linear algebra subprograms. Neural Comput Appl 31:4353\u20134365","journal-title":"Neural Comput Appl"},{"issue":"1","key":"11368_CR245","doi-asserted-by":"publisher","first-page":"136","DOI":"10.3390\/s21010136","volume":"21","author":"F Li","year":"2020","unstructured":"Li F, Jin W, Fan C, Zou L, Chen Q, Li X, Jiang H, Liu Y (2020) Psanet: Pyramid splitting and aggregation network for 3d object detection in point cloud. Sensors 21(1):136","journal-title":"Sensors"},{"issue":"12","key":"11368_CR246","doi-asserted-by":"publisher","first-page":"6999","DOI":"10.1109\/TNNLS.2021.3084827","volume":"33","author":"Z Li","year":"2021","unstructured":"Li Z, Liu F, Yang W, Peng S, Zhou J (2021) A survey of convolutional neural networks: analysis, applications, and prospects. IEEE Trans Neural Netw Learn Syst 33(12):6999\u20137019","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"18","key":"11368_CR247","doi-asserted-by":"publisher","first-page":"9016","DOI":"10.3390\/app12189016","volume":"12","author":"Z Li","year":"2022","unstructured":"Li Z, Gu T, Li B, Xu W, He X, Hui X (2022) Convnext-based fine-grained image classification and bilinear attention mechanism model. Appl Sci 12(18):9016","journal-title":"Appl Sci"},{"issue":"5","key":"11368_CR248","doi-asserted-by":"publisher","first-page":"304","DOI":"10.3390\/drones7050304","volume":"7","author":"Y Li","year":"2023","unstructured":"Li Y, Fan Q, Huang H, Han Z, Gu Q (2023) A modified yolov8 detection network for uav aerial image recognition. Drones 7(5):304","journal-title":"Drones"},{"key":"11368_CR249","doi-asserted-by":"publisher","first-page":"7331","DOI":"10.1109\/TNNLS.2024.3386809","volume":"36","author":"M Li","year":"2024","unstructured":"Li M, Fu Y, Zhang T, Wen G (2024) Supervise-assisted self-supervised deep-learning method for hyperspectral image restoration. IEEE Trans Neural Netw Learn Syst 36:7331\u20137344","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11368_CR250","doi-asserted-by":"crossref","unstructured":"Liang J, Cao J, Sun G, Zhang K, Van\u00a0Gool L, Timofte R (2021) Swinir: Image restoration using swin transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1833\u20131844","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"11368_CR251","unstructured":"Liang D, Feng T, Zhou X, Zhang Y, Zou Z, Bai X (2024) Parameter-efficient fine-tuning in spectral domain for point cloud learning. arXiv preprint arXiv:2410.08114"},{"issue":"6","key":"11368_CR252","doi-asserted-by":"publisher","first-page":"2277","DOI":"10.1007\/s00371-022-02413-5","volume":"39","author":"X Liang","year":"2023","unstructured":"Liang X, Xu L, Zhang W, Zhang Y, Liu J, Liu Z (2023) A convolution-transformer dual branch network for head-pose and occlusion facial expression recognition. Vis Comput 39(6):2277\u20132290","journal-title":"Vis Comput"},{"key":"11368_CR253","doi-asserted-by":"publisher","first-page":"4652","DOI":"10.1109\/TNNLS.2024.3359810","volume":"36","author":"P Liang","year":"2024","unstructured":"Liang P, Jiang J, Liu X, Ma J (2024) Image deblurring by exploring in-depth properties of transformer. IEEE Trans Neural Netw Learn Syst 36:4652\u20134663","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11368_CR254","doi-asserted-by":"crossref","unstructured":"Liang J, Cao J, Sun G, Zhang K, Van\u00a0Gool L, Timofte R (2021) Swinir: Image restoration using swin transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1833\u20131844","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"11368_CR255","doi-asserted-by":"crossref","unstructured":"Li Y, Chen X, Zhu Z, Xie L, Huang G, Du D, Wang X (2019) Attention-guided unified network for panoptic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7026\u20137035","DOI":"10.1109\/CVPR.2019.00719"},{"key":"11368_CR256","doi-asserted-by":"crossref","unstructured":"Li S, Li W, Cook C, Zhu C, Gao Y (2018) Independently recurrent neural network (indrnn): Building a longer and deeper rnn. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5457\u20135466","DOI":"10.1109\/CVPR.2018.00572"},{"key":"11368_CR257","unstructured":"Li C, Li L, Jiang H, Weng K, Geng Y, Li L, Ke Z, Li Q, Cheng M, Nie W (2022) Yolov6: A single-stage object detection framework for industrial applications arXiv preprint arXiv:2209.02976"},{"key":"11368_CR258","doi-asserted-by":"crossref","unstructured":"Lim B, Son S, Kim H, Nah S, Mu\u00a0Lee K (2017) Enhanced deep residual networks for single image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 136\u2013144","DOI":"10.1109\/CVPRW.2017.151"},{"key":"11368_CR259","unstructured":"Lin M (2013) Network in network. arXiv preprint arXiv:1312.4400"},{"key":"11368_CR260","doi-asserted-by":"crossref","unstructured":"Lin T (2017) Focal loss for dense object detection. arXiv preprint arXiv:1708.02002","DOI":"10.1109\/ICCV.2017.324"},{"key":"11368_CR261","unstructured":"Lin B, Ge Y, Cheng X, Li Z, Zhu B, Wang S, He X, Ye Y, Yuan S, Chen L, et al (2024) Open-sora plan: Open-source large video generation model. arXiv preprint arXiv:2412.00131"},{"key":"11368_CR262","doi-asserted-by":"crossref","unstructured":"Lin K, Li TH, Liu S, Li G (2019) Real photographs denoising with noise domain adaptation and attentive generative adversarial network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","DOI":"10.1109\/CVPRW.2019.00221"},{"key":"11368_CR263","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"11368_CR264","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"11368_CR265","doi-asserted-by":"crossref","unstructured":"Lin G, Milan A, Shen C, Reid I (2017) Refinenet: Multi-path refinement networks for high-resolution semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1925\u20131934","DOI":"10.1109\/CVPR.2017.549"},{"issue":"2","key":"11368_CR266","doi-asserted-by":"publisher","first-page":"951","DOI":"10.1109\/TITS.2019.2961679","volume":"22","author":"C-T Lin","year":"2020","unstructured":"Lin C-T, Huang S-W, Wu Y-Y, Lai S-H (2020) Gan-based day-to-night image style transfer for nighttime vehicle detection. IEEE Trans Intell Transp Syst 22(2):951\u2013963","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"11368_CR267","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/j.aiopen.2022.10.001","volume":"3","author":"T Lin","year":"2022","unstructured":"Lin T, Wang Y, Liu X, Qiu X (2022) A survey of transformers. AI Open 3:111\u2013132","journal-title":"AI Open"},{"key":"11368_CR268","unstructured":"Lin M, Chen Q, Yan S (2013) Network in network. arXiv preprint arXiv:1312.4400"},{"key":"11368_CR269","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1016\/j.media.2017.07.005","volume":"42","author":"G Litjens","year":"2017","unstructured":"Litjens G, Kooi T, Bejnordi BE, Setio AAA, Ciompi F, Ghafoorian M, Van Der Laak JA, Van Ginneken B, S\u00e1nchez CI (2017) A survey on deep learning in medical image analysis. Med Image Anal 42:60\u201388","journal-title":"Med Image Anal"},{"key":"11368_CR270","doi-asserted-by":"crossref","unstructured":"Li Y, Tofighi M, Geng J, Monga V, Eldar YC (2019) Deep algorithm unrolling for blind image deblurring. arXiv preprint arXiv:1902.03493","DOI":"10.1109\/ICASSP.2019.8682542"},{"key":"11368_CR271","doi-asserted-by":"crossref","unstructured":"Liu Y, Fan B, Xiang S, Pan C (2019) Relation-shape convolutional neural network for point cloud analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8895\u20138904","DOI":"10.1109\/CVPR.2019.00910"},{"key":"11368_CR272","doi-asserted-by":"crossref","unstructured":"Liu K, Gao Z, Lin F, Chen BM (2020) Fg-net: Fast large-scale lidar point clouds understanding network leveraging correlated feature mining and geometric-aware modelling. arXiv preprint arXiv:2012.09439","DOI":"10.1109\/ICRA48506.2021.9561496"},{"key":"11368_CR273","doi-asserted-by":"crossref","unstructured":"Liu Z, Ning J, Cao Y, Wei Y, Zhang Z, Lin S, Hu H (2022) Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"11368_CR274","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis, M, Zettlemoyer L, Stoyanov V (2019) Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692"},{"key":"11368_CR275","unstructured":"Liu D, Wen B, Fan Y, Loy CC, Huang TS (2017) Non-local recurrent network for image restoration. In: Advances in neural information processing systems 31 (2018)"},{"key":"11368_CR276","doi-asserted-by":"crossref","unstructured":"Liu L, Yang L, Chen Y, Zhang X, Hu L, Deng F (2019) Facial expression recognition based on ssvm algorithm and multi-source texture feature fusion using keca. In: Recent Developments in Intelligent Computing, Communication and Devices: Proceedings of ICCD 2017, pp. 659\u2013666. Springer","DOI":"10.1007\/978-981-10-8944-2_76"},{"key":"11368_CR277","doi-asserted-by":"crossref","unstructured":"Liu C, Yuen J, Torralba A (2009) Nonparametric scene parsing: Label transfer via dense scene alignment. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 1972\u20131979. IEEE","DOI":"10.1109\/CVPR.2009.5206536"},{"key":"11368_CR278","doi-asserted-by":"crossref","unstructured":"Liu P, Zhang H, Zhang K, Lin L, Zuo W (2018) Multi-level wavelet-cnn for image restoration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 773\u2013782","DOI":"10.1109\/CVPRW.2018.00121"},{"key":"11368_CR279","doi-asserted-by":"crossref","unstructured":"Liu P, Zhang H, Zhang K, Lin L, Zuo W (2018) Multi-level wavelet-cnn for image restoration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 773\u2013782","DOI":"10.1109\/CVPRW.2018.00121"},{"key":"11368_CR280","doi-asserted-by":"crossref","unstructured":"Liu J, Sun Y, Xu X, Kamilov US (2019) Image restoration using total variation regularized deep image prior. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7715\u20137719. IEEE","DOI":"10.1109\/ICASSP.2019.8682856"},{"key":"11368_CR281","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: Single shot multibox detector. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 21\u201337 . Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"11368_CR282","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo, B (2021) Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"11368_CR283","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"11368_CR284","doi-asserted-by":"crossref","unstructured":"Liu H, Wan Z, Huang W, Song Y, Han X, Liao J (2021) Pd-gan: Probabilistic diverse gan for image inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9371\u20139381","DOI":"10.1109\/CVPR46437.2021.00925"},{"key":"11368_CR285","doi-asserted-by":"crossref","unstructured":"Liu W, Wen Y, Yu Z, Li M, Raj B, Song L (2017) Sphereface: Deep hypersphere embedding for face recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 212\u2013220","DOI":"10.1109\/CVPR.2017.713"},{"key":"11368_CR286","unstructured":"Liu W, Wen Y, Yu Z, Yang M (2016) Large-margin softmax loss for convolutional neural networks. arXiv preprint arXiv:1612.02295"},{"key":"11368_CR287","doi-asserted-by":"crossref","unstructured":"Liu J, Zhang W, Tang Y, Tang J, Wu G (2020) Residual feature aggregation network for image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2359\u20132368","DOI":"10.1109\/CVPR42600.2020.00243"},{"key":"11368_CR288","unstructured":"Li K, Wang Y, Gao P, Song G, Liu Y, Li H, Qiao Y (2022) Uniformer: Unified transformer for efficient spatiotemporal representation learning. arXiv preprint arXiv:2201.04676"},{"key":"11368_CR289","doi-asserted-by":"crossref","unstructured":"Li Z, Wang W, Xie E, Yu Z, Anandkuma, A, Alvarez JM, Luo P, Lu T (2022) Panoptic segformer: Delving deeper into panoptic segmentation with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1280\u20131289","DOI":"10.1109\/CVPR52688.2022.00134"},{"key":"11368_CR290","doi-asserted-by":"crossref","unstructured":"Li Z, Yang J, Liu Z, Yang X, Jeon G, Wu W (2019) Feedback network for image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3867\u20133876","DOI":"10.1109\/CVPR.2019.00399"},{"key":"11368_CR291","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"11368_CR292","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"11368_CR293","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"issue":"5828","key":"11368_CR294","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1038\/293133a0","volume":"293","author":"HC Longuet-Higgins","year":"1981","unstructured":"Longuet-Higgins HC (1981) A computer algorithm for reconstructing a scene from two projections. Nature 293(5828):133\u2013135","journal-title":"Nature"},{"key":"11368_CR295","doi-asserted-by":"crossref","unstructured":"Lu H, Fei N, Huo Y, Gao Y, Lu Z, Wen J-R (2022) Cots: Collaborative two-stream vision-language pre-training model for cross-modal retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15692\u201315701","DOI":"10.1109\/CVPR52688.2022.01524"},{"key":"11368_CR296","doi-asserted-by":"crossref","unstructured":"Lu Z, Li J, Liu H, Huang C, Zhang L, Zeng T (2022) Transformer for single image super-resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 457\u2013466","DOI":"10.1109\/CVPRW56347.2022.00061"},{"issue":"3","key":"11368_CR297","doi-asserted-by":"publisher","first-page":"3938","DOI":"10.1109\/TNNLS.2022.3201448","volume":"35","author":"T Lu","year":"2022","unstructured":"Lu T, Wang Y, Zhang Y, Jiang J, Wang Z, Xiong Z (2022) Rethinking prior-guided face super-resolution: A new paradigm with facial component prior. IEEE Trans Neural Netw Learn Syst 35(3):3938\u20133952","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"11368_CR298","doi-asserted-by":"crossref","unstructured":"Luccioni AS, Viviano JD (2021) What\u2019s in the box? a preliminary analysis of undesirable content in the common crawl corpus. arXiv preprint arXiv:2105.02732","DOI":"10.18653\/v1\/2021.acl-short.24"},{"key":"11368_CR299","doi-asserted-by":"crossref","unstructured":"Luc P, Neverova N, Couprie C, Verbeek J, LeCun Y (2017) Predicting deeper into the future of semantic segmentation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 648\u2013657","DOI":"10.1109\/ICCV.2017.77"},{"key":"11368_CR300","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.106478","volume":"95","author":"Q Lyu","year":"2020","unstructured":"Lyu Q, Guo M, Pei Z (2020) Degan: Mixed noise removal via generative adversarial networks. Appl Soft Comput 95:106478","journal-title":"Appl Soft Comput"},{"key":"11368_CR301","doi-asserted-by":"publisher","first-page":"3157","DOI":"10.1109\/TMM.2021.3094058","volume":"24","author":"J Ma","year":"2021","unstructured":"Ma J, Peng C, Tian X, Jiang J (2021) Dbdnet: A deep boosting strategy for image denoising. IEEE Trans Multimed 24:3157\u20133168","journal-title":"IEEE Trans Multimed"},{"key":"11368_CR302","doi-asserted-by":"publisher","first-page":"4039","DOI":"10.1109\/TII.2023.3313635","volume":"20","author":"J Ma","year":"2023","unstructured":"Ma J, Xiong G, Xu J, Chen X (2023) Cvtnet: A cross-view transformer network for lidar-based place recognition in autonomous driving environments. IEEE Trans Ind Inform 20:4039\u20134048","journal-title":"IEEE Trans Ind Inform"},{"issue":"1","key":"11368_CR303","doi-asserted-by":"publisher","first-page":"654","DOI":"10.1038\/s41467-024-44824-z","volume":"15","author":"J Ma","year":"2024","unstructured":"Ma J, He Y, Li F, Han L, You C, Wang B (2024) Segment anything in medical images. Nat Commun 15(1):654","journal-title":"Nat Commun"},{"key":"11368_CR304","unstructured":"Mansimov E, Parisotto E, Ba JL, Salakhutdinov R (2015) Generating images from captions with attention. arXiv preprint arXiv:1511.02793"},{"key":"11368_CR305","doi-asserted-by":"crossref","unstructured":"Mansour Y, Heckel R (2023) Zero-shot noise2noise: Efficient image denoising without any data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14018\u201314027","DOI":"10.1109\/CVPR52729.2023.01347"},{"key":"11368_CR306","unstructured":"Mao X, Shen C, Yang Y-B (2016) Image restoration using very deep convolutional encoder-decoder networks with symmetric skip connections. In: Advances in neural information processing systems 29"},{"key":"11368_CR307","doi-asserted-by":"crossref","unstructured":"Martin D, Fowlkes C, Tal D, Malik J (2001) A database of human segmented natural images and its application to evaluating segmentation algorithms and measuring ecological statistics. In: Proceedings Eighth IEEE International Conference on Computer Vision. ICCV 2001, vol. 2, pp. 416\u2013423. IEEE","DOI":"10.1109\/ICCV.2001.937655"},{"issue":"1","key":"11368_CR308","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/sym11010001","volume":"11","author":"M Mateen","year":"2018","unstructured":"Mateen M, Wen J, Nasrullah, Song S, Huang Z (2018) Fundus image classification using vgg-19 architecture with pca and svd. Symmetry 11(1):1","journal-title":"Symmetry"},{"key":"11368_CR309","doi-asserted-by":"publisher","first-page":"21811","DOI":"10.1007\/s11042-016-4020-z","volume":"76","author":"Y Matsui","year":"2017","unstructured":"Matsui Y, Ito K, Aramaki Y, Fujimoto A, Ogawa T, Yamasaki T, Aizawa K (2017) Sketch-based manga retrieval using manga109 dataset. Multimed Tools Appl 76:21811\u201321838","journal-title":"Multimed Tools Appl"},{"issue":"12","key":"11368_CR310","doi-asserted-by":"publisher","first-page":"3207","DOI":"10.1007\/s11263-023-01843-5","volume":"131","author":"Y Mei","year":"2023","unstructured":"Mei Y, Fan Y, Zhang Y, Yu J, Zhou Y, Liu D, Fu Y, Huang TS, Shi H (2023) Pyramid attention network for image restoration. Int J Comput Vis 131(12):3207\u20133225","journal-title":"Int J Comput Vis"},{"key":"11368_CR311","doi-asserted-by":"crossref","unstructured":"Mescheder L, Oechsle M, Niemeyer M, Nowozin S, Geiger A (2019) Occupancy networks: Learning 3d reconstruction in function space. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4460\u20134470","DOI":"10.1109\/CVPR.2019.00459"},{"key":"11368_CR312","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781"},{"issue":"1","key":"11368_CR313","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall B, Srinivasan PP, Tancik M, Barron JT, Ramamoorthi R, Ng R (2021) Nerf: Representing scenes as neural radiance fields for view synthesis. Commun ACM 65(1):99\u2013106","journal-title":"Commun ACM"},{"key":"11368_CR314","unstructured":"Milton MAA (2019) Automated skin lesion classification using ensemble of deep neural networks in isic 2018: Skin lesion analysis towards melanoma detection challenge. arXiv preprint arXiv:1901.10802"},{"issue":"9","key":"11368_CR315","doi-asserted-by":"publisher","first-page":"3046","DOI":"10.3390\/s21093046","volume":"21","author":"S Minaee","year":"2021","unstructured":"Minaee S, Minaei M, Abdolrashidi A (2021) Deep-emotion: Facial expression recognition using attentional convolutional network. Sensors 21(9):3046","journal-title":"Sensors"},{"key":"11368_CR316","doi-asserted-by":"crossref","unstructured":"Miranda E, Aryuni M, Irwansyah E (2016) A survey of medical image classification techniques. In: 2016 International Conference on Information Management and Technology (ICIMTech), pp. 56\u201361. IEEE","DOI":"10.1109\/ICIMTech.2016.7930302"},{"key":"11368_CR317","unstructured":"Mnih V, Heess N, Graves A (2014) Recurrent models of visual attention. In: Advances in neural information processing systems 27"},{"key":"11368_CR318","doi-asserted-by":"crossref","unstructured":"Mohammadshahi A, Henderson J (2019) Graph-to-graph transformer for transition-based dependency parsing. arXiv preprint arXiv:1911.03561","DOI":"10.18653\/v1\/2020.findings-emnlp.294"},{"issue":"5","key":"11368_CR319","doi-asserted-by":"publisher","first-page":"1551","DOI":"10.1007\/s11263-021-01445-z","volume":"129","author":"R Mohan","year":"2021","unstructured":"Mohan R, Valada A (2021) Efficientps: Efficient panoptic segmentation. Int J Comput Vis 129(5):1551\u20131579","journal-title":"Int J Comput Vis"},{"key":"11368_CR320","doi-asserted-by":"crossref","unstructured":"Mottaghi R, Chen X, Liu X, Cho N-G, Lee S-W, Fidler S, Urtasun R, Yuille A (2014) The role of context for object detection and semantic segmentation in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 891\u2013898","DOI":"10.1109\/CVPR.2014.119"},{"key":"11368_CR321","doi-asserted-by":"crossref","unstructured":"Mou C, Zhang J, Wu Z (2021) Dynamic attentive graph learning for image restoration. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4328\u20134337","DOI":"10.1109\/ICCV48922.2021.00429"},{"key":"11368_CR322","doi-asserted-by":"crossref","unstructured":"Nah S, Hyun\u00a0Kim T, Mu\u00a0Lee K (2017) Deep multi-scale convolutional neural network for dynamic scene deblurring. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3883\u20133891","DOI":"10.1109\/CVPR.2017.35"},{"key":"11368_CR323","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th International Conference on Machine Learning (ICML-10), pp. 807\u2013814"},{"key":"11368_CR324","doi-asserted-by":"crossref","unstructured":"Nam S, Hwang Y, Matsushita Y, Kim SJ (2016) A holistic approach to cross-channel image noise modeling and its application to image denoising. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1683\u20131691","DOI":"10.1109\/CVPR.2016.186"},{"key":"11368_CR325","doi-asserted-by":"crossref","unstructured":"Neuhold G, Ollmann T, Rota\u00a0Bulo S, Kontschieder P (2017) The mapillary vistas dataset for semantic understanding of street scenes. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4990\u20134999 (2017)","DOI":"10.1109\/ICCV.2017.534"},{"key":"11368_CR326","unstructured":"Nichol AQ, Dhariwal P (2021) Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning, pp. 8162\u20138171. PMLR"},{"key":"11368_CR327","volume-title":"Neural networks and deep learning","author":"MA Nielsen","year":"2015","unstructured":"Nielsen MA (2015) Neural networks and deep learning, vol 25. Determination Press, San Francisco"},{"key":"11368_CR328","unstructured":"Oei K, Gomaa A, Feit AM, Belo J (2024) Self-supervised contrastive learning for videos using differentiable local alignment. arXiv preprint arXiv:2409.04607"},{"key":"11368_CR329","unstructured":"Oktay O, Schlemper J, Folgoc LL, Lee M, Heinrich M, Misawa K, Mori K, McDonagh S, Hammerla NY, Kainz B (2018) Attention u-net: Learning where to look for the pancreas. arXiv preprint arXiv:1804.03999"},{"key":"11368_CR330","doi-asserted-by":"crossref","unstructured":"Pan X, Shi J, Luo P, Wang X, Tang X (2018) Spatial as deep: Spatial cnn for traffic scene understanding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32","DOI":"10.1609\/aaai.v32i1.12301"},{"key":"11368_CR331","unstructured":"Pan J, Ferrer CC, McGuinness K, O\u2019Connor NE, Torres J, Sayrol E, Giro-i-Nieto X (2017) Salgan: Visual saliency prediction with generative adversarial networks. arXiv preprint arXiv:1701.01081"},{"key":"11368_CR332","doi-asserted-by":"crossref","unstructured":"Papalampid, P, Koppula S, Pathak S, Chiu J, Heyward J, Patraucean V, Shen J, Miech A, Zisserman A, Nematzdeh A (2024) A simple recipe for contrastively pre-training video-first encoders beyond 16 frames. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14386\u201314397","DOI":"10.1109\/CVPR52733.2024.01364"},{"key":"11368_CR333","doi-asserted-by":"crossref","unstructured":"Park JJ, Florence P, Straub J, Newcombe R, Lovegrove S (2019) Deepsdf: Learning continuous signed distance functions for shape representation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 165\u2013174","DOI":"10.1109\/CVPR.2019.00025"},{"key":"11368_CR334","doi-asserted-by":"crossref","unstructured":"Parkhi O, Vedaldi A, Zisserman A (2015) Deep face recognition In: BMVC 2015-Proceedings of the British Machine Vision Conference 2015. British Machine Vision Association","DOI":"10.5244\/C.29.41"},{"key":"11368_CR335","unstructured":"Paszke A, Gross S, Chintala S, Chanan G (2017) Pytorch: Tensors and dynamic neural networks in python with strong gpu acceleration 6(3), 67"},{"issue":"4","key":"11368_CR336","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1109\/TSMCB.2002.1018769","volume":"32","author":"JC Patra","year":"2002","unstructured":"Patra JC, Kot AC (2002) Nonlinear dynamic system identification using chebyshev functional link artificial neural networks. IEEE Trans Syst Man Cybern B 32(4):505\u2013511","journal-title":"IEEE Trans Syst Man Cybern B"},{"key":"11368_CR337","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning CD (2014) Glove: Global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"11368_CR338","doi-asserted-by":"crossref","unstructured":"Pham H, Dai Z, Xie Q, Le QV (2021) Meta pseudo labels. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11557\u201311568","DOI":"10.1109\/CVPR46437.2021.01139"},{"key":"11368_CR339","doi-asserted-by":"crossref","unstructured":"Plotz T, Roth S (2017) Benchmarking denoising algorithms with real photographs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1586\u20131595","DOI":"10.1109\/CVPR.2017.294"},{"key":"11368_CR340","unstructured":"Qi CR, Su H, Mo K, Guibas LJ (2017a) Pointnet: Deep learning on point sets for 3d classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660"},{"key":"11368_CR341","unstructured":"Qi CR, Yi L, Su H, Guibas LJ (2017b) Pointnet++: Deep hierarchical feature learning on point sets in a metric space. In: Advances in neural information processing systems 30"},{"key":"11368_CR342","first-page":"23192","volume":"35","author":"G Qian","year":"2022","unstructured":"Qian G, Li Y, Peng H, Mai J, Hammoud H, Elhoseiny M, Ghanem B (2022) Pointnext: Revisiting pointnet++ with improved training and scaling strategies. Adv Neural Inf Process Syst 35:23192\u201323204","journal-title":"Adv Neural Inf Process Syst"},{"key":"11368_CR343","doi-asserted-by":"crossref","unstructured":"Qing Z, Zhang S, Wang J, Wang X, Wei Y, Zhang Y, Gao C, Sang N (2024) Hierarchical spatio-temporal decoupling for text-to-video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6635\u20136645","DOI":"10.1109\/CVPR52733.2024.00634"},{"key":"11368_CR344","doi-asserted-by":"crossref","unstructured":"Qiu Z, Yao T, Mei T (2017) Learning spatio-temporal representation with pseudo-3d residual networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5533\u20135541","DOI":"10.1109\/ICCV.2017.590"},{"key":"11368_CR345","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J, et al (2021) Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PmLR"},{"issue":"8","key":"11368_CR346","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I et al (2019) Language models are unsupervised multitask learners. OpenAI Blog 1(8):9","journal-title":"OpenAI Blog"},{"key":"11368_CR347","unstructured":"Rajpurkar P, Irvin J, Zhu K, Yang B, Mehta H, Duan T, Ding D, Bagul A, Langlotz C, Shpanskaya K (2017) Chexnet: Radiologist-level pneumonia detection on chest x-rays with deep learning. arXiv preprint arXiv:1711.05225"},{"key":"11368_CR348","unstructured":"Ramesh A, Pavlov M, Goh G, Gray S, Voss C, Radford A, Chen M, Sutskever I (2021) Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. Pmlr"},{"issue":"7","key":"11368_CR349","doi-asserted-by":"publisher","first-page":"1146","DOI":"10.3390\/electronics11071146","volume":"11","author":"A Raza","year":"2022","unstructured":"Raza A, Ayub H, Khan JA, Ahmad I, Salama AS, Daradkeh YI, Javeed D, Ur Rehman A, Hamam H (2022) A hybrid deep learning-based approach for brain tumor classification. Electronics 11(7):1146","journal-title":"Electronics"},{"key":"11368_CR350","doi-asserted-by":"crossref","unstructured":"Redmon J (2016) You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","DOI":"10.1109\/CVPR.2016.91"},{"key":"11368_CR351","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"11368_CR352","unstructured":"Redmon J, Farhadi A (2018) Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767"},{"key":"11368_CR353","unstructured":"Reed S, Akata Z, Yan X, Logeswaran L, Schiele B, Lee H (2016) Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069. PMLR"},{"issue":"3","key":"11368_CR354","doi-asserted-by":"publisher","first-page":"929","DOI":"10.1109\/TCYB.2017.2668395","volume":"48","author":"R Ren","year":"2017","unstructured":"Ren R, Hung T, Tan KC (2017) A generic deep-learning-based approach for automated surface inspection. IEEE Trans Cybern 48(3):929\u2013940","journal-title":"IEEE Trans Cybern"},{"key":"11368_CR355","doi-asserted-by":"publisher","first-page":"240","DOI":"10.1007\/s11263-019-01235-8","volume":"128","author":"W Ren","year":"2020","unstructured":"Ren W, Pan J, Zhang H, Cao X, Yang M-H (2020) Single image dehazing via multi-scale convolutional neural networks with holistic edges. Int J Comput Vis 128:240\u2013259","journal-title":"Int J Comput Vis"},{"key":"11368_CR356","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems 28"},{"key":"11368_CR357","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems 28"},{"key":"11368_CR358","unstructured":"Ren W, Zhang J, Ma L, Pan J, Cao X, Zuo W, Liu W, Yang M-H (2018) Deep non-blind deconvolution via generalized low-rank approximation. In: Advances in neural information processing systems 31"},{"key":"11368_CR359","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1023\/B:VLSI.0000028532.53893.82","volume":"38","author":"AM Reza","year":"2004","unstructured":"Reza AM (2004) Realization of the contrast limited adaptive histogram equalization (clahe) for real-time image enhancement. J VLSI Signal Process Syst Signal Image Video Technol 38:35\u201344","journal-title":"J VLSI Signal Process Syst Signal Image Video Technol"},{"key":"11368_CR360","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1016\/j.imavis.2018.04.004","volume":"75","author":"P Rodr\u00edguez","year":"2018","unstructured":"Rodr\u00edguez P, Bautista MA, Gonzalez J, Escalera S (2018) Beyond one-hot encoding: Lower dimensional target embedding. Image Vis Comput 75:21\u201331","journal-title":"Image Vis Comput"},{"key":"11368_CR361","doi-asserted-by":"crossref","unstructured":"Rombach R, Blattmann A, Lorenz D, Esser P, Ommer B (2022) High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"11368_CR362","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-assisted intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5\u20139, 2015, Proceedings, Part III 18, pp. 234\u2013241. Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"11368_CR363","doi-asserted-by":"crossref","unstructured":"Ronneberger O, Fischer P, Brox T (2015) U-net: Convolutional networks for biomedical image segmentation In: Medical Image Computing and Computer-assisted intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5\u20139, 2015, Proceedings, Part III 18, pp. 234\u2013241 . Springer","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"11368_CR364","doi-asserted-by":"crossref","unstructured":"Roth S, Black MJ (2005) Fields of experts: A framework for learning image priors. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR\u201905), vol. 2, pp. 860\u2013867. IEEE","DOI":"10.1109\/CVPR.2005.160"},{"key":"11368_CR365","doi-asserted-by":"crossref","unstructured":"Rozenberszki D, Litany O, Dai A (2024) Unscene3d: Unsupervised 3d instance segmentation for indoor scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19957\u201319967","DOI":"10.1109\/CVPR52733.2024.01886"},{"key":"11368_CR366","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia C, Chan W, Saxena S, Li L, Whang J, Denton EL, Ghasemipour K, Gontijo Lopes R, Karagol Ayan B, Salimans T et al (2022a) Photorealistic text-to-image diffusion models with deep language understanding. Adv Neural Inf Process Syst 35:36479\u201336494","journal-title":"Adv Neural Inf Process Syst"},{"issue":"4","key":"11368_CR367","first-page":"4713","volume":"45","author":"C Saharia","year":"2022","unstructured":"Saharia C, Ho J, Chan W, Salimans T, Fleet DJ, Norouzi M (2022b) Image super-resolution via iterative refinement. IEEE Trans Pattern Anal Mach Intell 45(4):4713\u20134726","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR368","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1016\/j.procs.2019.11.147","volume":"161","author":"SM Sam","year":"2019","unstructured":"Sam SM, Kamardin K, Sjarif NNA, Mohamed N et al (2019) Offline signature verification using deep learning convolutional neural network (cnn) architectures googlenet inception-v1 and inception-v3. Procedia Comput Sci 161:475\u2013483","journal-title":"Procedia Comput Sci"},{"issue":"9","key":"11368_CR369","first-page":"33","volume":"6","author":"B Sanchez-Lengeling","year":"2021","unstructured":"Sanchez-Lengeling B, Reif E, Pearce A, Wiltschko AB (2021) A gentle introduction to graph neural networks. Distill 6(9):33","journal-title":"Distill"},{"key":"11368_CR370","volume-title":"CUDA by example: an introduction to general-purpose GPU programming","author":"J Sanders","year":"2010","unstructured":"Sanders J, Kandrot E (2010) CUDA by example: an introduction to general-purpose GPU programming. Addison-Wesley Professional, Boston"},{"key":"11368_CR371","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: Inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"issue":"3","key":"11368_CR372","doi-asserted-by":"publisher","first-page":"8","DOI":"10.4236\/jcc.2019.73002","volume":"7","author":"U Sara","year":"2019","unstructured":"Sara U, Akter M, Uddin MS (2019) Image quality assessment through fsim, ssim, mse and psnr\u2013a comparative study. J Comput Commun 7(3):8\u201318","journal-title":"J Comput Commun"},{"issue":"1","key":"11368_CR373","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1109\/TNN.2008.2005605","volume":"20","author":"F Scarselli","year":"2008","unstructured":"Scarselli F, Gori M, Tsoi AC, Hagenbuchner M, Monfardini G (2008) The graph neural network model. IEEE Trans Neural Netw 20(1):61\u201380","journal-title":"IEEE Trans Neural Netw"},{"key":"11368_CR374","volume-title":"Artificial neural networks","author":"RJ Schalkoff","year":"1997","unstructured":"Schalkoff RJ (1997) Artificial neural networks. McGraw-Hill Higher Education, New York"},{"key":"11368_CR375","doi-asserted-by":"crossref","unstructured":"Schlichtkrull M, Kipf TN, Bloem P, Van Den\u00a0Berg R, Titov I, Welling M (2018) Modeling relational data with graph convolutional networks. In: The Semantic Web: 15th International Conference, ESWC 2018, Heraklion, Crete, Greece, June 3\u20137, 2018, Proceedings 15, pp. 593\u2013607. Springer","DOI":"10.1007\/978-3-319-93417-4_38"},{"key":"11368_CR376","doi-asserted-by":"crossref","unstructured":"Schmidt U, Roth S (2014) Shrinkage fields for effective image restoration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2774\u20132781","DOI":"10.1109\/CVPR.2014.349"},{"key":"11368_CR377","doi-asserted-by":"crossref","unstructured":"Schmidt U, Roth S (2014) Shrinkage fields for effective image restoration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2774\u20132781","DOI":"10.1109\/CVPR.2014.349"},{"key":"11368_CR378","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: A unified embedding for face recognition and clustering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"11368_CR379","doi-asserted-by":"crossref","unstructured":"Schuler CJ, Christopher\u00a0Burger H, Harmeling S, Scholkopf B (2013) A machine learning approach for non-blind image deconvolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1067\u20131074","DOI":"10.1109\/CVPR.2013.142"},{"key":"11368_CR380","doi-asserted-by":"crossref","unstructured":"Seitz SM, Curless B, Diebel J, Scharstein D, Szeliski R (2006) A comparison and evaluation of multi-view stereo reconstruction algorithms. In: 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR\u201906), vol. 1, pp. 519\u2013528. IEEE","DOI":"10.1109\/CVPR.2006.19"},{"key":"11368_CR381","unstructured":"Sermanet P, Eigen D, Zhang X, Mathieu M, Fergus R, LeCun Y (2013) Overfeat: Integrated recognition, localization and detection using convolutional networks. arXiv preprint arXiv:1312.6229"},{"key":"11368_CR382","unstructured":"Sermanet P, Eigen D, Zhang X, Mathieu M, Fergus R, LeCun Y (2013) Overfeat: Integrated recognition, localization and detection using convolutional networks. arXiv preprint arXiv:1312.6229"},{"issue":"6","key":"11368_CR383","doi-asserted-by":"publisher","first-page":"8423","DOI":"10.1007\/s11042-020-10035-z","volume":"80","author":"DRIM Setiadi","year":"2021","unstructured":"Setiadi DRIM (2021) Psnr vs ssim: imperceptibility quality assessment for image steganography. Multimed Tools Appl 80(6):8423\u20138444","journal-title":"Multimed Tools Appl"},{"key":"11368_CR384","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2023.101882","volume":"55","author":"H Shang","year":"2023","unstructured":"Shang H, Sun C, Liu J, Chen X, Yan R (2023) Defect-aware transformer network for intelligent visual surface defect detection. Adv Eng Inform 55:101882","journal-title":"Adv Eng Inform"},{"key":"11368_CR385","unstructured":"Shetty S (2016) Application of convolutional neural network for image classification on pascal voc challenge 2012 dataset. arXiv preprint arXiv:1607.03785"},{"issue":"03","key":"11368_CR386","doi-asserted-by":"publisher","first-page":"445","DOI":"10.1142\/S0218001408006284","volume":"22","author":"FY Shih","year":"2008","unstructured":"Shih FY, Chuang C-F, Wang PS (2008) Performance comparisons of facial expression recognition in jaffe database. Int J Pattern Recognit Artif Intell 22(03):445\u2013459","journal-title":"Int J Pattern Recognit Artif Intell"},{"key":"11368_CR387","unstructured":"Siarohin A, Lathuili\u00e8re S, Tulyakov S, Ricci E, Sebe N (2019) First order motion model for image animation. In: Advances in neural information processing systems 32"},{"key":"11368_CR388","doi-asserted-by":"crossref","unstructured":"Silberman N, Hoiem D, Kohli P, Fergus R (2012) Indoor segmentation and support inference from rgbd images. In: Computer Vision\u2013ECCV 2012: 12th European Conference on Computer Vision, Florence, Italy, October 7\u201313, 2012, Proceedings, Part V 12, pp. 746\u2013760. Springer","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"11368_CR389","doi-asserted-by":"crossref","unstructured":"Simonovsky M, Komodakis N (2017) Dynamic edge-conditioned filters in convolutional neural networks on graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3693\u20133702","DOI":"10.1109\/CVPR.2017.11"},{"key":"11368_CR390","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"issue":"10","key":"11368_CR391","doi-asserted-by":"publisher","first-page":"1590","DOI":"10.3390\/electronics11101590","volume":"11","author":"A Singh","year":"2022","unstructured":"Singh A, Kushwaha S, Alarfaj M, Singh M (2022) Comprehensive overview of backpropagation algorithm for digital image denoising. Electronics 11(10):1590","journal-title":"Electronics"},{"issue":"3","key":"11368_CR392","doi-asserted-by":"publisher","first-page":"1894","DOI":"10.1109\/TRO.2021.3122069","volume":"38","author":"K Sirohi","year":"2021","unstructured":"Sirohi K, Mohan R, B\u00fcscher D, Burgard W, Valada A (2021) Efficientlps: Efficient lidar panoptic segmentation. IEEE Trans Rob 38(3):1894\u20131914","journal-title":"IEEE Trans Rob"},{"key":"11368_CR393","doi-asserted-by":"crossref","unstructured":"Soh JW, Cho NI (2021) Deep universal blind image denoising. In: 2020 25th International Conference on Pattern Recognition (ICPR), pp. 747\u2013754 . IEEE","DOI":"10.1109\/ICPR48806.2021.9412605"},{"key":"11368_CR394","unstructured":"Soltanayev S, Chun SY (2018) Training deep learning based denoisers without ground truth data. In: Advances in neural information processing systems 31"},{"key":"11368_CR395","doi-asserted-by":"crossref","unstructured":"Song S, Lichtenberg SP, Xiao J (2015) Sun rgb-d: A rgb-d scene understanding benchmark suite. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 567\u2013576","DOI":"10.1109\/CVPR.2015.7298655"},{"issue":"17","key":"11368_CR396","doi-asserted-by":"publisher","first-page":"3809","DOI":"10.3390\/s19173809","volume":"19","author":"Y Song","year":"2019","unstructured":"Song Y, Zhu Y, Du X (2019) Dynamic residual dense network for image denoising. Sensors 19(17):3809","journal-title":"Sensors"},{"key":"11368_CR397","unstructured":"Song Y, Sohl-Dickstein J, Kingma DP, Kumar A, Ermon S, Poole B (2021) Score-based generative modeling through stochastic differential equations. In: International Conference on Learning Representations"},{"issue":"3","key":"11368_CR398","doi-asserted-by":"publisher","first-page":"1179","DOI":"10.1109\/TCSS.2022.3153557","volume":"10","author":"A Soni","year":"2023","unstructured":"Soni A, Dutta T, Nigam N, Verma D, Gupta HP (2023) Supervised attention network for arbitrary-shaped text detection in edge-fainted noisy scene images. IEEE Trans Comput Soc Syst 10(3):1179\u20131188. https:\/\/doi.org\/10.1109\/TCSS.2022.3153557","journal-title":"IEEE Trans Comput Soc Syst"},{"key":"11368_CR399","unstructured":"Srivastava N (2013) Improving neural networks with dropout. Thesis, University of Toronto"},{"issue":"2","key":"11368_CR400","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1603\/0022-0493-94.2.388","volume":"94","author":"S Stewart","year":"2001","unstructured":"Stewart S, Layton M, Williams M, Ingram D, Maily W (2001) Response of cotton to prebloom square loss. J Econ Entomol 94(2):388\u2013396","journal-title":"J Econ Entomol"},{"issue":"3","key":"11368_CR401","doi-asserted-by":"publisher","first-page":"66","DOI":"10.1109\/MCSE.2010.69","volume":"12","author":"JE Stone","year":"2010","unstructured":"Stone JE, Gohara D, Shi G (2010) Opencl: A parallel programming standard for heterogeneous computing systems. Comput Sci Eng 12(3):66","journal-title":"Comput Sci Eng"},{"key":"11368_CR402","doi-asserted-by":"crossref","unstructured":"Strudel R, Garcia R, Laptev I, Schmid C (2021) Segmenter: Transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7262\u20137272","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"11368_CR403","doi-asserted-by":"crossref","unstructured":"Su W-t, Cheung G, Wildes R, Lin C-W (2020) Graph neural net using analytical graph filters and topology optimization for image denoising. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8464\u20138468. IEEE","DOI":"10.1109\/ICASSP40776.2020.9054201"},{"key":"11368_CR404","doi-asserted-by":"crossref","unstructured":"Su S, Delbracio M, Wang J, Sapiro G, Heidrich W, Wang O (2017) Deep video deblurring for hand-held cameras. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1279\u20131288","DOI":"10.1109\/CVPR.2017.33"},{"key":"11368_CR405","unstructured":"Sun Y, Chen Y, Wang X, Tang X (2014) Deep learning face representation by joint identification-verification. In: Advances in neural information processing systems 27"},{"key":"11368_CR406","series-title":"Integrated series in information system","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-7641-3","volume-title":"Machine learning models and algorithms for big data classification","author":"S Suthaharan","year":"2016","unstructured":"Suthaharan S, Suthaharan S (2016) Support vector machine. Machine learning models and algorithms for big data classification, vol 36. Integrated series in information system. Springer, Boston"},{"issue":"4","key":"11368_CR407","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1561\/2200000013","volume":"4","author":"C Sutton","year":"2012","unstructured":"Sutton C, McCallum A et al (2012) An introduction to conditional random fields. Found Trends Mach Learn 4(4):267\u2013373","journal-title":"Found Trends Mach Learn"},{"key":"11368_CR408","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi A (2017) Inception-v4, inception-resnet and the impact of residual connections on learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 31","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"11368_CR409","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"11368_CR410","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"issue":"3","key":"11368_CR411","doi-asserted-by":"publisher","first-page":"759","DOI":"10.1007\/s10845-019-01476-x","volume":"31","author":"D Tabernik","year":"2020","unstructured":"Tabernik D, \u0160ela S, Skvar\u010d J, Sko\u010daj D (2020) Segmentation-based deep-learning approach for surface-defect detection. J Intell Manuf 31(3):759\u2013776","journal-title":"J Intell Manuf"},{"key":"11368_CR412","doi-asserted-by":"crossref","unstructured":"Tai Y, Yang J, Liu X, Xu C (2017) Memnet: A persistent memory network for image restoration. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4539\u20134547","DOI":"10.1109\/ICCV.2017.486"},{"key":"11368_CR413","doi-asserted-by":"crossref","unstructured":"Tai Y, Yang J, Liu X, Xu C (2017) Memnet: A persistent memory network for image restoration. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4539\u20134547","DOI":"10.1109\/ICCV.2017.486"},{"key":"11368_CR414","doi-asserted-by":"crossref","unstructured":"Taigman Y, Yang M, Ranzato M, Wolf L (2014) Deepface: Closing the gap to human-level performance in face verification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1701\u20131708","DOI":"10.1109\/CVPR.2014.220"},{"key":"11368_CR415","doi-asserted-by":"crossref","unstructured":"Tan Z, Wang M, Xie J, Chen Y, Shi X (2018) Deep semantic role labeling with self-attention. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32","DOI":"10.1609\/aaai.v32i1.11928"},{"key":"11368_CR416","unstructured":"Tan M, Le Q (2019) Efficientnet: Rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, pp. 6105\u20136114. PMLR"},{"issue":"9","key":"11368_CR417","doi-asserted-by":"publisher","first-page":"1575","DOI":"10.3390\/app8091575","volume":"8","author":"X Tao","year":"2018","unstructured":"Tao X, Zhang D, Ma W, Liu X, Xu D (2018) Automatic metallic surface defect detection and recognition with convolutional neural networks. Appl Sci 8(9):1575","journal-title":"Appl Sci"},{"key":"11368_CR418","doi-asserted-by":"crossref","unstructured":"Tao X, Gao H, Shen X, Wang J, Jia J (2018) Scale-recurrent network for deep image deblurring. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8174\u20138182","DOI":"10.1109\/CVPR.2018.00853"},{"key":"11368_CR419","doi-asserted-by":"crossref","unstructured":"Tchapmi L, Choy C, Armeni I, Gwak J, Savarese S (2017) Segcloud: Semantic segmentation of 3d point clouds. In: 2017 International Conference on 3D Vision (3DV), pp. 537\u2013547. IEEE","DOI":"10.1109\/3DV.2017.00067"},{"key":"11368_CR420","unstructured":"Team G, Anil R, Borgeaud S, Alayrac J-B, Yu J, Soricut R, Schalkwyk J, Dai AM, Hauth A, Millican K, et al (2023) Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805"},{"key":"11368_CR421","doi-asserted-by":"crossref","unstructured":"Thomas H, Qi CR, Deschaud J-E, Marcotegui B, Goulette F, Guibas LJ (2019) Kpconv: Flexible and deformable convolution for point clouds. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6411\u20136420","DOI":"10.1109\/ICCV.2019.00651"},{"key":"11368_CR422","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102043","volume":"102","author":"C Tian","year":"2024","unstructured":"Tian C, Zheng M, Zuo W, Zhang S, Zhang Y, Lin C-W (2024) A cross transformer for image denoising. Inf Fusion 102:102043. https:\/\/doi.org\/10.1016\/j.inffus.2023.102043","journal-title":"Inf Fusion"},{"issue":"1","key":"11368_CR423","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1049\/trit.2018.1054","volume":"4","author":"C Tian","year":"2019","unstructured":"Tian C, Xu Y, Fei L, Wang J, Wen J, Luo N (2019) Enhanced cnn for image denoising. CAAI Trans Intell Technol 4(1):17\u201323","journal-title":"CAAI Trans Intell Technol"},{"key":"11368_CR424","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1016\/j.neunet.2019.08.022","volume":"121","author":"C Tian","year":"2020","unstructured":"Tian C, Xu Y, Zuo W (2020) Image denoising using deep cnn with batch renormalization. Neural Netw 121:461\u2013473","journal-title":"Neural Netw"},{"key":"11368_CR425","doi-asserted-by":"publisher","first-page":"461","DOI":"10.1016\/j.neunet.2019.08.022","volume":"121","author":"C Tian","year":"2020","unstructured":"Tian C, Xu Y, Zuo W (2020) Image denoising using deep cnn with batch renormalization. Neural Netw 121:461\u2013473","journal-title":"Neural Netw"},{"key":"11368_CR426","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.neunet.2020.07.025","volume":"131","author":"C Tian","year":"2020","unstructured":"Tian C, Fei L, Zheng W, Xu Y, Zuo W, Lin C-W (2020) Deep learning on image denoising: An overview. Neural Netw 131:251\u2013275","journal-title":"Neural Netw"},{"key":"11368_CR427","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1016\/j.neunet.2019.12.024","volume":"124","author":"C Tian","year":"2020","unstructured":"Tian C, Xu Y, Li Z, Zuo W, Fei L, Liu H (2020) Attention-guided cnn for image denoising. Neural Netw 124:117\u2013129","journal-title":"Neural Netw"},{"key":"11368_CR428","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.106949","volume":"226","author":"C Tian","year":"2021","unstructured":"Tian C, Xu Y, Zuo W, Du B, Lin C-W, Zhang D (2021) Designing and training of a dual cnn for image denoising. Knowl-Based Syst 226:106949","journal-title":"Knowl-Based Syst"},{"key":"11368_CR429","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) Fcos: Fully convolutional one-stage object detection. arXiv preprint arXiv:1904.01355","DOI":"10.1109\/ICCV.2019.00972"},{"key":"11368_CR430","doi-asserted-by":"crossref","unstructured":"Timofte R, Agustsson E, Van\u00a0Gool L, Yang M-H, Zhang L (2017) Ntire 2017 challenge on single image super-resolution: Methods and results. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 114\u2013125","DOI":"10.1109\/CVPRW.2017.150"},{"key":"11368_CR431","doi-asserted-by":"crossref","unstructured":"Timofte, R, Agustsson, E, Van\u00a0Gool, L, Yang, M-H, Zhang, L (2017) Ntire 2017 challenge on single image super-resolution: Methods and results. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 114\u2013125","DOI":"10.1109\/CVPRW.2017.150"},{"key":"11368_CR432","first-page":"24261","volume":"34","author":"IO Tolstikhin","year":"2021","unstructured":"Tolstikhin IO, Houlsby N, Kolesnikov A, Beyer L, Zhai X, Unterthiner T, Yung J, Steiner A, Keysers D, Uszkoreit J et al (2021) Mlp-mixer: An all-mlp architecture for vision. Adv Neural Inf Process Syst 34:24261\u201324272","journal-title":"Adv Neural Inf Process Syst"},{"key":"11368_CR433","doi-asserted-by":"crossref","unstructured":"Tong T, Li G, Liu X, Gao Q (2017) Image super-resolution using dense skip connections In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4799\u20134807","DOI":"10.1109\/ICCV.2017.514"},{"key":"11368_CR434","unstructured":"Touvron H, Vedaldi A, Douze M, J\u00e9gou H (2019) Fixing the train-test resolution discrepancy. In: Advances in neural information processing systems 32"},{"key":"11368_CR435","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"11368_CR436","doi-asserted-by":"crossref","unstructured":"Tsai F-J, Peng Y-T, Lin Y-Y, Tsai C-C, Lin C-W (2022) Stripformer: Strip transformer for fast image deblurring. In: European Conference on Computer Vision, pp. 146\u2013162. Springer","DOI":"10.1007\/978-3-031-19800-7_9"},{"key":"11368_CR437","doi-asserted-by":"crossref","unstructured":"Tu Z, Talebi H, Zhang H, Yang F, Milanfar P, Bovik A, Li Y (2022) Maxim: Multi-axis mlp for image processing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5769\u20135780","DOI":"10.1109\/CVPR52688.2022.00568"},{"key":"11368_CR438","doi-asserted-by":"crossref","unstructured":"Tu J, Ji W, Zhao H, Zhang C, Zimmermann R, Qian H (2024) Driveditfit: Fine-tuning diffusion transformers for autonomous driving. arXiv preprint arXiv:2407.15661","DOI":"10.1145\/3712064"},{"key":"11368_CR439","doi-asserted-by":"crossref","unstructured":"Tulyakov S, Liu M-Y, Yang X, Kautz J (2018) Mocogan: Decomposing motion and content for video generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1526\u20131535","DOI":"10.1109\/CVPR.2018.00165"},{"key":"11368_CR440","doi-asserted-by":"publisher","first-page":"14076","DOI":"10.1109\/ACCESS.2022.3147495","volume":"10","author":"T Turay","year":"2022","unstructured":"Turay T, Vladimirova T (2022) Toward performing image classification and object detection with convolutional neural networks in autonomous driving systems: A survey. IEEE Access 10:14076\u201314119","journal-title":"IEEE Access"},{"key":"11368_CR441","doi-asserted-by":"crossref","unstructured":"Ulyanov D, Vedaldi A, Lempitsky V (2018) Deep image prior. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9446\u20139454","DOI":"10.1109\/CVPR.2018.00984"},{"key":"11368_CR442","doi-asserted-by":"publisher","first-page":"8226","DOI":"10.1109\/TIP.2020.3013166","volume":"29","author":"D Valsesia","year":"2020","unstructured":"Valsesia D, Fracastoro G, Magli E (2020) Deep graph-convolutional image denoising. IEEE Trans Image Process 29:8226\u20138237","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR443","doi-asserted-by":"crossref","unstructured":"Van\u00a0Gansbeke W, De\u00a0Brabandere B (2025) A simple latent diffusion approach for panoptic segmentation and mask inpainting. In: European Conference on Computer Vision, pp. 78\u201397. Springer","DOI":"10.1007\/978-3-031-72633-0_5"},{"key":"11368_CR444","unstructured":"Vaswani A (2017) Attention is all you need. In: Advances in Neural Information Processing Systems"},{"key":"11368_CR445","doi-asserted-by":"crossref","unstructured":"Vedaldi A, Lenc K (2015) Matconvnet: Convolutional neural networks for matlab In: Proceedings of the 23rd ACM International Conference on Multimedia, pp. 689\u2013692","DOI":"10.1145\/2733373.2807412"},{"key":"11368_CR446","unstructured":"Veli\u010dkovi\u0107 P, Cucurull G, Casanova A, Romero A, Lio P, Bengio Y (2017) Graph attention networks. arXiv preprint arXiv:1710.10903"},{"key":"11368_CR447","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell: A neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3156\u20133164","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"11368_CR448","doi-asserted-by":"publisher","first-page":"131988","DOI":"10.1109\/ACCESS.2020.3010018","volume":"8","author":"T-H Vo","year":"2020","unstructured":"Vo T-H, Lee G-S, Yang H-J, Kim S-H (2020) Pyramid with super resolution for in-the-wild facial expression recognition. IEEE Access 8:131988\u2013132001","journal-title":"IEEE Access"},{"key":"11368_CR449","unstructured":"Walmer M, Kanjirathinkal R, Tai KS, Muzumdar K, Tian T, Shrivastava A (2023) Multi-entity video transformers for fine-grained video representation learning. arXiv preprint arXiv:2311.10873"},{"key":"11368_CR450","doi-asserted-by":"crossref","unstructured":"Wang C-Y, Bochkovskiy A, Liao H-YM (2023) Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"11368_CR451","doi-asserted-by":"crossref","unstructured":"Wang C-Y, Yeh I-H, Liao H-YM (2024) Yolov9: Learning what you want to learn using programmable gradient information. arXiv preprint arXiv:2402.13616","DOI":"10.1007\/978-3-031-72751-1_1"},{"key":"11368_CR452","unstructured":"Wang T-C, Liu M-Y, Zhu J-Y, Liu G, Tao A, Kautz J, Catanzaro B (2018) Video-to-video synthesis. arXiv preprint arXiv:1808.06601"},{"key":"11368_CR453","doi-asserted-by":"crossref","unstructured":"Wang R, Chen D, Wu Z, Chen Y, Dai X, Liu M, Jiang Y-G, Zhou L, Yuan L (2022) Bevt: Bert pretraining of video transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14733\u201314743","DOI":"10.1109\/CVPR52688.2022.01432"},{"key":"11368_CR454","doi-asserted-by":"crossref","unstructured":"Wang P, Chen P, Yuan Y, Liu D, Huang Z, Hou X, Cottrell G (2018) Understanding convolution for semantic segmentation. In: 2018 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 1451\u20131460. IEEE","DOI":"10.1109\/WACV.2018.00163"},{"key":"11368_CR455","unstructured":"Wang L, Guo S, Huang W, Qiao Y (2015) Places205-vggnet models for scene recognition. arXiv preprint arXiv:1508.01667"},{"key":"11368_CR456","unstructured":"Wang Y, Li K, Li Y, He Y, Huang B, Zhao Z, Zhang H, Xu J, Liu Y, Wang Z, et al (2022) Internvideo: General video foundation models via generative and discriminative learning. arXiv preprint arXiv:2212.03191"},{"key":"11368_CR457","doi-asserted-by":"crossref","unstructured":"Wang T, Sun M, Hu K (2017) Dilated deep residual network for image denoising. In: 2017 IEEE 29th International Conference on Tools with Artificial Intelligence (ICTAI), pp. 1272\u20131279. IEEE","DOI":"10.1109\/ICTAI.2017.00192"},{"key":"11368_CR458","doi-asserted-by":"crossref","unstructured":"Wang T, Sun M, Hu K (2017) Dilated deep residual network for image denoising. In: 2017 IEEE 29th International Conference on Tools with Artificial Intelligence (ICTAI), pp. 1272\u20131279. IEEE","DOI":"10.1109\/ICTAI.2017.00192"},{"key":"11368_CR459","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1109\/TIP.2024.3520008","volume":"34","author":"C Wang","year":"2024","unstructured":"Wang C, Zha Y, He J, Yang W, Zhang T (2024) Rethinking masked representation learning for 3d point cloud understanding. IEEE Trans Image Process 34:247\u2013262","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR460","doi-asserted-by":"crossref","unstructured":"Wang L, Zhang J, Wang O, Lin Z, Lu H (2020) Sdc-depth: Semantic divide-and-conquer network for monocular depth estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 541\u2013550","DOI":"10.1109\/CVPR42600.2020.00062"},{"key":"11368_CR461","doi-asserted-by":"crossref","unstructured":"Wang Q, Hu X, Wang H, Men A, Jiang Z (2021) Multi-dip: A general framework for unsupervised multi-degraded image restoration. In: International Conference on Neural Information Processing, pp. 378\u2013389 Springer","DOI":"10.1007\/978-3-030-92273-3_31"},{"key":"11368_CR462","doi-asserted-by":"crossref","unstructured":"Wang Y, Perazzi F, McWilliams B, Sorkine-Hornung A, Sorkine-Hornung O, Schroers C (2018) A fully progressive approach to single-image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 864\u2013873","DOI":"10.1109\/CVPRW.2018.00131"},{"issue":"5","key":"11368_CR463","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3326362","volume":"38","author":"Y Wang","year":"2019","unstructured":"Wang Y, Sun Y, Liu Z, Sarma SE, Bronstein MM, Solomon JM (2019) Dynamic graph cnn for learning on point clouds. ACM Trans Graph 38(5):1\u201312","journal-title":"ACM Trans Graph"},{"issue":"10","key":"11368_CR464","doi-asserted-by":"publisher","first-page":"3365","DOI":"10.1109\/TPAMI.2020.2982166","volume":"43","author":"Z Wang","year":"2020","unstructured":"Wang Z, Chen J, Hoi SC (2020) Deep learning for image super-resolution: A survey. IEEE Trans Pattern Anal Mach Intell 43(10):3365\u20133387","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR465","doi-asserted-by":"publisher","first-page":"4057","DOI":"10.1109\/TIP.2019.2956143","volume":"29","author":"K Wang","year":"2020","unstructured":"Wang K, Peng X, Yang J, Meng D, Qiao Y (2020) Region attention networks for pose and occlusion robust facial expression recognition. IEEE Trans Image Process 29:4057\u20134069","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR466","first-page":"17721","volume":"33","author":"X Wang","year":"2020","unstructured":"Wang X, Zhang R, Kong T, Li L, Shen C (2020) Solov2: Dynamic and fast instance segmentation. Adv Neural Inf Process Syst 33:17721\u201317732","journal-title":"Adv Neural Inf Process Syst"},{"issue":"6","key":"11368_CR467","doi-asserted-by":"publisher","first-page":"2533","DOI":"10.1109\/TCSVT.2022.3224940","volume":"33","author":"Y Wang","year":"2022","unstructured":"Wang Y, Lu T, Zhang Y, Wang Z, Jiang J, Xiong Z (2022) Faceformer: Aggregating global and local representation for face hallucination. IEEE Trans Circuits Syst Video Technol 33(6):2533\u20132545","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"11368_CR468","doi-asserted-by":"publisher","first-page":"2314","DOI":"10.1109\/TMM.2023.3294808","volume":"26","author":"Y Wang","year":"2023","unstructured":"Wang Y, Lu T, Yao Y, Zhang Y, Xiong Z (2023) Learning to hallucinate face in the dark. IEEE Trans Multimed 26:2314\u20132326","journal-title":"IEEE Trans Multimed"},{"key":"11368_CR469","doi-asserted-by":"crossref","unstructured":"Wang Z, Cun X, Bao J, Zhou W, Liu J, Li H (2022) Uformer: A general u-shaped transformer for image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17683\u201317693","DOI":"10.1109\/CVPR52688.2022.01716"},{"key":"11368_CR470","doi-asserted-by":"crossref","unstructured":"Wang Z, Fu Y, Liu J, Zhang Y (2023) Lg-bpn: Local and global blind-patch network for self-supervised real-world denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18156\u201318165","DOI":"10.1109\/CVPR52729.2023.01741"},{"key":"11368_CR471","doi-asserted-by":"crossref","unstructured":"Wang L, Xiong Y, Wang Z, Qiao Y, Lin D, Tang X, Van\u00a0Gool L (2016) Temporal segment networks: Towards good practices for deep action recognition. In: European Conference on Computer Vision, pp. 20\u201336. Springer","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"11368_CR472","doi-asserted-by":"crossref","unstructured":"Wan J, Wang D, Hoi SCH, Wu P, Zhu J, Zhang Y, Li J (2014) Deep learning for content-based image retrieval: A comprehensive study. In: Proceedings of the 22nd ACM International Conference on Multimedia, pp. 157\u2013166","DOI":"10.1145\/2647868.2654948"},{"key":"11368_CR473","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1007\/s10994-017-5675-z","volume":"107","author":"T Wei","year":"2018","unstructured":"Wei T, Guo L-Z, Li Y-F, Gao W (2018) Learning safe multi-label prediction for weakly labeled data. Mach Learn 107:703\u2013725","journal-title":"Mach Learn"},{"key":"11368_CR474","doi-asserted-by":"crossref","unstructured":"Wen Y, Zhang K, Li Z, Qiao Y (2016) A discriminative feature learning approach for deep face recognition. In: Computer vision\u2013ECCV 2016: 14th European Conference, Amsterdam, the Netherlands, October 11\u201314, 2016, Proceedings, Part VII 14, pp. 499\u2013515. Springer","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"11368_CR475","unstructured":"Williams T, Li R (2018) Wavelet pooling for convolutional neural networks. In: International Conference on Learning Representations"},{"key":"11368_CR476","unstructured":"Wu Z, Song S, Khosla A, Yu F, Zhang L, Tang X, Xiao J (2015) 3d shapenets: A deep representation for volumetric shapes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1912\u20131920"},{"key":"11368_CR477","unstructured":"Wu J, Zhang C, Xue T, Freeman B, Tenenbaum J (2016) Learning a probabilistic latent space of object shapes via 3d generative-adversarial modeling. In: Advances in neural information processing systems 29"},{"issue":"1","key":"11368_CR478","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/TNNLS.2020.2978386","volume":"32","author":"Z Wu","year":"2020","unstructured":"Wu Z, Pan S, Chen F, Long G, Zhang C, Philip SY (2020) A comprehensive survey on graph neural networks. IEEE Trans Neural Netw Learn Syst 32(1):4\u201324","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"issue":"10","key":"11368_CR479","doi-asserted-by":"publisher","first-page":"0259283","DOI":"10.1371\/journal.pone.0259283","volume":"16","author":"W Wu","year":"2021","unstructured":"Wu W, Liu H, Li L, Long Y, Wang X, Wang Z, Li J, Chang Y (2021) Application of local fully convolutional neural network combined with yolo v5 algorithm in small target detection of remote sensing image. PLoS ONE 16(10):0259283","journal-title":"PLoS ONE"},{"key":"11368_CR480","doi-asserted-by":"crossref","unstructured":"Wu Y, He K (2018) Group normalization. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"11368_CR481","doi-asserted-by":"crossref","unstructured":"Wu X, Liu M, Cao Y, Ren D, Zuo W (2020) Unpaired learning of deep image denoising. In: European Conference on Computer Vision, pp. 352\u2013368 . Springer","DOI":"10.1007\/978-3-030-58548-8_21"},{"key":"11368_CR482","doi-asserted-by":"crossref","unstructured":"Wu H, Xiao B, Codella N, Liu M, Dai X, Yuan L, Zhang L (2021) Cvt: Introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22\u201331","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"11368_CR483","doi-asserted-by":"crossref","unstructured":"Xiang Y, Kim W, Chen W, Ji J, Choy C, Su H, Mottaghi R, Guibas L, Savarese S (2016) Objectnet3d: A large scale database for 3d object recognition. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VIII 14, pp. 160\u2013176. Springer","DOI":"10.1007\/978-3-319-46484-8_10"},{"key":"11368_CR484","doi-asserted-by":"crossref","unstructured":"Xiao J, Owens A, Torralba A (2013) Sun3d: A database of big spaces reconstructed using sfm and object labels. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1625\u20131632","DOI":"10.1109\/ICCV.2013.458"},{"issue":"11","key":"11368_CR485","doi-asserted-by":"publisher","first-page":"13265","DOI":"10.1109\/TPAMI.2023.3292266","volume":"45","author":"J Xiao","year":"2023","unstructured":"Xiao J, Zhou P, Yao A, Li Y, Hong R, Yan S, Chua T-S (2023) Contrastive video question answering via video graph transformer. IEEE Trans Pattern Anal Mach Intell 45(11):13265\u201313280","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR486","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"11368_CR487","doi-asserted-by":"crossref","unstructured":"Xie Q, Luong M-T, Hovy E, Le QV (2020) Self-training with noisy student improves imagenet classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10687\u201310698","DOI":"10.1109\/CVPR42600.2020.01070"},{"issue":"5","key":"11368_CR488","doi-asserted-by":"publisher","first-page":"2787","DOI":"10.1109\/TCYB.2019.2925095","volume":"51","author":"W Xie","year":"2019","unstructured":"Xie W, Shen L, Duan J (2019) Adaptive weighting of handcrafted feature losses for facial expression recognition. IEEE Trans Cybern 51(5):2787\u20132800","journal-title":"IEEE Trans Cybern"},{"issue":"9","key":"11368_CR489","doi-asserted-by":"publisher","first-page":"5385","DOI":"10.1109\/TPAMI.2021.3080324","volume":"44","author":"E Xie","year":"2021","unstructured":"Xie E, Wang W, Ding M, Zhang R, Luo P (2021) Polarmask++: Enhanced polar representation for single-shot instance segmentation and beyond. IEEE Trans Pattern Anal Mach Intell 44(9):5385\u20135400","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11368_CR490","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1492\u20131500","DOI":"10.1109\/CVPR.2017.634"},{"key":"11368_CR491","unstructured":"Xie J, Xu L, Chen E (2012) Image denoising and inpainting with deep neural networks. In: Advances in neural information processing systems 25"},{"key":"11368_CR492","doi-asserted-by":"crossref","unstructured":"Xiong Y, Liao R, Zhao H, Hu R, Bai M, Yumer E, Urtasun R (2019) Upsnet: A unified panoptic segmentation network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8818\u20138826","DOI":"10.1109\/CVPR.2019.00902"},{"key":"11368_CR493","doi-asserted-by":"crossref","unstructured":"Xiong Y, Liao R, Zhao H, Hu R, Bai M, Yumer E, Urtasun R (2019) Upsnet: A unified panoptic segmentation network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 8818\u20138826","DOI":"10.1109\/CVPR.2019.00902"},{"key":"11368_CR494","doi-asserted-by":"crossref","unstructured":"Xu J, Zhang L, Zhang D (2018) A trilateral weighted sparse coding scheme for real-world image denoising. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 20\u201336","DOI":"10.1007\/978-3-030-01237-3_2"},{"issue":"6","key":"11368_CR495","doi-asserted-by":"publisher","first-page":"2996","DOI":"10.1109\/TIP.2018.2811546","volume":"27","author":"J Xu","year":"2018","unstructured":"Xu J, Zhang L, Zhang D (2018) External prior guided internal prior learning for real-world noisy image denoising. IEEE Trans Image Process 27(6):2996\u20133010","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR496","doi-asserted-by":"publisher","first-page":"33432","DOI":"10.1109\/ACCESS.2018.2845840","volume":"6","author":"B Xu","year":"2018","unstructured":"Xu B, Shi X, Zhao Z, Zheng W (2018) Leveraging biomedical resources in bi-lstm for drug-drug interaction extraction. IEEE Access 6:33432\u201333439","journal-title":"IEEE Access"},{"key":"11368_CR497","doi-asserted-by":"crossref","unstructured":"Xue H, Liu C, Wan F, Jiao J, Ji X, Ye Q (2019) Danet: Divergent activation for weakly supervised object localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6589\u20136598","DOI":"10.1109\/ICCV.2019.00669"},{"key":"11368_CR498","unstructured":"Xu L, Ren JS, Liu C, Jia J (2014) Deep convolutional neural network for image deconvolution. In: Advances in neural information processing systems 27"},{"key":"11368_CR499","doi-asserted-by":"crossref","unstructured":"Yan Z, Li X, Li M, Zuo W, Shan S (2018) Shift-net: Image inpainting via deep feature rearrangement. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 1\u201317","DOI":"10.1007\/978-3-030-01264-9_1"},{"key":"11368_CR500","doi-asserted-by":"publisher","first-page":"4905","DOI":"10.1109\/TIP.2021.3077135","volume":"30","author":"Y Yan","year":"2021","unstructured":"Yan Y, Ren W, Hu X, Li K, Shen H, Cao X (2021) Srgat: Single image super-resolution with graph attention network. IEEE Trans Image Process 30:4905\u20134918","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR501","unstructured":"Yang T-J, Collins MD, Zhu Y, Hwang J-J, Liu T, Zhang X, Sze V, Papandreou G, Chen L-C (2019) Deeperlab: Single-shot image parser. arXiv preprint arXiv:1902.05093"},{"key":"11368_CR502","unstructured":"Yang X, He X, Zhao J, Zhang Y, Zhang S, Xie P (2020) Covid-ct-dataset: a ct scan dataset about covid-19. arXiv preprint arXiv:2003.13865"},{"key":"11368_CR503","doi-asserted-by":"crossref","unstructured":"Yang Y, Ni X, Hao Y, Liu C, Wang W, Liu Y, Xie H (2022) Mf-gan: Multi-conditional fusion generative adversarial network for text-to-image synthesis. In: International Conference on Multimedia Modeling, pp. 41\u201353. Springer","DOI":"10.1007\/978-3-030-98358-1_4"},{"key":"11368_CR504","unstructured":"Yang T, Zhu Y, Xie Y, Zhang A, Chen C, Li M (2023) Aim: Adapting image models for efficient video action recognition. arXiv preprint arXiv:2302.03024"},{"issue":"1","key":"11368_CR505","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1109\/LSP.2017.2768660","volume":"25","author":"D Yang","year":"2017","unstructured":"Yang D, Sun J (2017) Bm3d-net: A convolutional neural network for transform-domain collaborative filtering. IEEE Signal Process Lett 25(1):55\u201359","journal-title":"IEEE Signal Process Lett"},{"key":"11368_CR506","doi-asserted-by":"publisher","first-page":"5783","DOI":"10.1109\/TIP.2020.2984904","volume":"29","author":"H Yang","year":"2020","unstructured":"Yang H, Yuan C, Zhang L, Sun Y, Hu W, Maybank SJ (2020) Sta-cnn: Convolutional spatial-temporal attention learning for action recognition. IEEE Trans Image Process 29:5783\u20135793","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR507","doi-asserted-by":"crossref","unstructured":"Yang Y, Fu H, Aviles-Rivero AI, Sch\u00f6nlieb C-B, Zhu L (2023) Diffmic: Dual-guidance diffusion network for medical image classification. In: International Conference on Medical Image Computing and Computer-Assisted Intervention, pp. 95\u2013105. Springer","DOI":"10.1007\/978-3-031-43987-2_10"},{"key":"11368_CR508","doi-asserted-by":"crossref","unstructured":"Yang T, Ren P, Xie X, Zhang L (2021) Gan prior embedded network for blind face restoration in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 672\u2013681","DOI":"10.1109\/CVPR46437.2021.00073"},{"key":"11368_CR509","doi-asserted-by":"crossref","unstructured":"Ye Q, Xu G, Yan M, Xu H, Qian Q, Zhang J, Huang F (2023) Hitea: Hierarchical temporal-aware video-language pre-training. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15405\u201315416","DOI":"10.1109\/ICCV51070.2023.01413"},{"key":"11368_CR510","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121007","volume":"234","author":"W Ye","year":"2023","unstructured":"Ye W, Zhang W, Lei W, Zhang W, Chen X, Wang Y (2023) Remote sensing image instance segmentation network with transformer and multi-scale feature representation. Expert Syst Appl 234:121007","journal-title":"Expert Syst Appl"},{"key":"11368_CR511","doi-asserted-by":"publisher","first-page":"655","DOI":"10.1007\/s10278-018-0056-0","volume":"31","author":"X Yi","year":"2018","unstructured":"Yi X, Babyn P (2018) Sharpness-aware low-dose ct denoising using conditional generative adversarial network. J Digit Imaging 31:655\u2013669","journal-title":"J Digit Imaging"},{"issue":"2","key":"11368_CR512","doi-asserted-by":"publisher","first-page":"1600068","DOI":"10.1002\/srin.201600068","volume":"88","author":"L Yi","year":"2017","unstructured":"Yi L, Li G, Jiang M (2017) An end-to-end steel strip surface defects recognition system based on convolutional neural networks. Steel Res Int 88(2):1600068","journal-title":"Steel Res Int"},{"key":"11368_CR513","doi-asserted-by":"crossref","unstructured":"Yu F, Huang K, Wang M, Cheng Y, Chu W, Cui L (2022) Width & depth pruning for vision transformers. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 3143\u20133151","DOI":"10.1609\/aaai.v36i3.20222"},{"key":"11368_CR514","doi-asserted-by":"crossref","unstructured":"Yu X, Tang L, Rao Y, Huang T, Zhou J, Lu J (2022) Point-bert: Pre-training 3d point cloud transformers with masked point modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19313\u201319322","DOI":"10.1109\/CVPR52688.2022.01871"},{"key":"11368_CR515","doi-asserted-by":"crossref","unstructured":"Yu C, Wang J, Peng C, Gao C, Yu G, Sang N (2018) Bisenet: Bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"11368_CR516","first-page":"76749","volume":"36","author":"S Yu","year":"2023","unstructured":"Yu S, Cho J, Yadav P, Bansal M (2023) Self-chained image-language model for video localization and question answering. Adv Neural Inf Process Syst 36:76749\u201376771","journal-title":"Adv Neural Inf Process Syst"},{"key":"11368_CR517","first-page":"5021714","volume":"73","author":"Y Yu","year":"2024","unstructured":"Yu Y, Yuan J, Liao L, Li X, Zhong X, Wu J (2024) Ensemble cross unet transformers for augmentation of atomic electron tomography. IEEE Trans Instrum Meas 73:5021714","journal-title":"IEEE Trans Instrum Meas"},{"key":"11368_CR518","doi-asserted-by":"crossref","unstructured":"Yuan Y, Liu S, Zhang J, Zhang Y, Dong C, Lin L (2018) Unsupervised image super-resolution using cycle-in-cycle generative adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 701\u2013710","DOI":"10.1109\/CVPRW.2018.00113"},{"key":"11368_CR519","doi-asserted-by":"crossref","unstructured":"Yuan L, Chen Y, Wang T, Yu W, Shi Y, Jiang Z-H, Tay FE, Feng J, Yan S (2021) Tokens-to-token vit: Training vision transformers from scratch on imagenet. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 558\u2013567","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"11368_CR520","unstructured":"Yue Z, Yong H, Zhao Q, Meng D, Zhang L (2019) Variational denoising network: Toward blind noise modeling and removal. In: Advances in neural information processing systems 32"},{"key":"11368_CR521","doi-asserted-by":"crossref","unstructured":"Yu Z, Li A, Au OC, Xu C (2012) Bag of textons for image segmentation via soft clustering and convex shift. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 781\u2013788. IEEE","DOI":"10.1109\/CVPR.2012.6247749"},{"key":"11368_CR522","doi-asserted-by":"crossref","unstructured":"Yu D, Wang H, Chen P, Wei Z (2014) Mixed pooling for convolutional neural networks. In: Rough Sets and Knowledge Technology: 9th International Conference, RSKT 2014, Shanghai, China, October 24\u201326, 2014, Proceedings 9, pp. 364\u2013375 . Springer","DOI":"10.1007\/978-3-319-11740-9_34"},{"key":"11368_CR523","doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Komodakis N (2016) Wide residual networks. arXiv preprint arXiv:1605.07146","DOI":"10.5244\/C.30.87"},{"key":"11368_CR524","unstructured":"Zagoruyko S, Komodakis N (2017) Diracnets: Training very deep neural networks without skip-connections. arXiv preprint arXiv:1706.00388"},{"key":"11368_CR525","doi-asserted-by":"crossref","unstructured":"Zamir SW, Arora A, Khan S, Hayat M, Khan FS, Yang M-H (2022) Restormer: Efficient transformer for high-resolution image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5728\u20135739","DOI":"10.1109\/CVPR52688.2022.00564"},{"key":"11368_CR526","doi-asserted-by":"crossref","unstructured":"Zamir SW, Arora A, Khan S, Hayat M, Khan FS, Yang M-H, Shao L (2021) Multi-stage progressive image restoration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14821\u201314831","DOI":"10.1109\/CVPR46437.2021.01458"},{"key":"11368_CR527","doi-asserted-by":"crossref","unstructured":"Zeiler M (2014) Visualizing and understanding convolutional networks. In: European Conference on Computer vision\/arXiv, vol. 1311","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"11368_CR528","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Krishnan D, Taylor GW, Fergus R (2010) Deconvolutional networks In: 2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 2528\u20132535. IEEE","DOI":"10.1109\/CVPR.2010.5539957"},{"key":"11368_CR529","doi-asserted-by":"crossref","unstructured":"Zeng Y, Wei G, Zheng J, Zou J, Wei Y, Zhang Y, Li H (2024) Make pixels dance: High-dynamic video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8850\u20138860","DOI":"10.1109\/CVPR52733.2024.00845"},{"issue":"10","key":"11368_CR530","doi-asserted-by":"publisher","first-page":"9419","DOI":"10.1016\/j.jksuci.2022.09.019","volume":"34","author":"J Zeng","year":"2022","unstructured":"Zeng J, Ouyang H, Liu M, Leng L, Fu X (2022) Multi-scale yolact for instance segmentation. J King Saud Univ Comput Inf Sci 34(10):9419\u20139427","journal-title":"J King Saud Univ Comput Inf Sci"},{"key":"11368_CR531","doi-asserted-by":"crossref","unstructured":"Zeyde R, Elad M, Protter M (2012) On single image scale-up using sparse-representations. In: Curves and Surfaces: 7th International Conference, Avignon, France, June 24\u201330, 2010, Revised Selected Papers 7, pp. 711\u2013730. Springer","DOI":"10.1007\/978-3-642-27413-8_47"},{"key":"11368_CR532","unstructured":"Zhang R (2019) Making convolutional networks shift-invariant again. In: International Conference on Machine Learning, pp. 7324\u20137334. PMLR"},{"key":"11368_CR533","doi-asserted-by":"crossref","unstructured":"Zhang H, Dana K, Shi J, Zhang Z, Wang X, Tyagi A, Agrawal A (2018a) Context encoding for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7151\u20137160","DOI":"10.1109\/CVPR.2018.00747"},{"key":"11368_CR534","doi-asserted-by":"crossref","unstructured":"Zhang P, Li X, Hu X, Yang J, Zhang L, Wang L, Choi Y, Gao J (2021) Vinvl: Revisiting visual representations in vision-language models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5579\u20135588","DOI":"10.1109\/CVPR46437.2021.00553"},{"key":"11368_CR535","doi-asserted-by":"crossref","unstructured":"Zhang Y, Sun S, Galley M, Chen Y-C, Brockett C, Gao X, Gao J, Liu J, Dolan B (2019) Dialogpt: Large-scale generative pre-training for conversational response generation. arXiv preprint arXiv:1911.00536","DOI":"10.18653\/v1\/2020.acl-demos.30"},{"key":"11368_CR536","doi-asserted-by":"crossref","unstructured":"Zhang Y, Tian Y, Kong Y, Zhong B, Fu Y (2018) Residual dense network for image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2472\u20132481","DOI":"10.1109\/CVPR.2018.00262"},{"key":"11368_CR537","doi-asserted-by":"crossref","unstructured":"Zhang Z, Zhang X, Peng C, Xue X, Sun J (2018b) Exfuse: Enhancing feature fusion for semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 269\u2013284 (2018)","DOI":"10.1007\/978-3-030-01249-6_17"},{"key":"11368_CR538","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou X, Lin M, Sun J (2018) Shufflenet: An extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2018)","DOI":"10.1109\/CVPR.2018.00716"},{"key":"11368_CR539","unstructured":"Zhang Y (2023) Lung segmentation with nasnet-large-decoder net. arXiv preprint arXiv:2303.10315"},{"issue":"10","key":"11368_CR540","doi-asserted-by":"publisher","first-page":"2761","DOI":"10.1007\/s11263-021-01501-8","volume":"129","author":"H Zhang","year":"2021","unstructured":"Zhang H, Ma J (2021) Sdnet: A versatile squeeze-and-decomposition network for real-time image fusion. Int J Comput Vis 129(10):2761\u20132785","journal-title":"Int J Comput Vis"},{"key":"11368_CR541","doi-asserted-by":"publisher","first-page":"14340","DOI":"10.1109\/ACCESS.2023.3243829","volume":"11","author":"D Zhang","year":"2023","unstructured":"Zhang D, Zhou F (2023) Self-supervised image denoising for real-world images with context-aware transformer. IEEE Access 11:14340\u201314349","journal-title":"IEEE Access"},{"issue":"1","key":"11368_CR542","doi-asserted-by":"publisher","first-page":"35","DOI":"10.1016\/S0169-2070(97)00044-7","volume":"14","author":"G Zhang","year":"1998","unstructured":"Zhang G, Patuwo BE, Hu MY (1998) Forecasting with artificial neural networks: The state of the art. Int J Forecast 14(1):35\u201362","journal-title":"Int J Forecast"},{"key":"11368_CR543","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1007\/s13042-010-0001-0","volume":"1","author":"Y Zhang","year":"2010","unstructured":"Zhang Y, Jin R, Zhou Z-H (2010) Understanding bag-of-words model: a statistical framework. Int J Mach Learn Cybern 1:43\u201352","journal-title":"Int J Mach Learn Cybern"},{"issue":"2","key":"11368_CR544","doi-asserted-by":"publisher","first-page":"023016","DOI":"10.1117\/1.3600632","volume":"20","author":"L Zhang","year":"2011","unstructured":"Zhang L, Wu X, Buades A, Li X (2011) Color demosaicking by local directional interpolation and nonlocal adaptive thresholding. J Electron Imaging 20(2):023016\u201302301616","journal-title":"J Electron Imaging"},{"issue":"7","key":"11368_CR545","doi-asserted-by":"publisher","first-page":"3142","DOI":"10.1109\/TIP.2017.2662206","volume":"26","author":"K Zhang","year":"2017","unstructured":"Zhang K, Zuo W, Chen Y, Meng D, Zhang L (2017) Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising. IEEE Trans Image Process 26(7):3142\u20133155","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"11368_CR546","doi-asserted-by":"publisher","first-page":"3142","DOI":"10.1109\/TIP.2017.2662206","volume":"26","author":"K Zhang","year":"2017","unstructured":"Zhang K, Zuo W, Chen Y, Meng D, Zhang L (2017) Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising. IEEE Trans Image Process 26(7):3142\u20133155","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"11368_CR547","doi-asserted-by":"publisher","first-page":"3142","DOI":"10.1109\/TIP.2017.2662206","volume":"26","author":"K Zhang","year":"2017","unstructured":"Zhang K, Zuo W, Chen Y, Meng D, Zhang L (2017) Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising. IEEE Trans Image Process 26(7):3142\u20133155","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"11368_CR548","doi-asserted-by":"publisher","first-page":"3142","DOI":"10.1109\/TIP.2017.2662206","volume":"26","author":"K Zhang","year":"2017","unstructured":"Zhang K, Zuo W, Chen Y, Meng D, Zhang L (2017) Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising. IEEE Trans Image Process 26(7):3142\u20133155","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"11368_CR549","doi-asserted-by":"publisher","first-page":"3142","DOI":"10.1109\/TIP.2017.2662206","volume":"26","author":"K Zhang","year":"2017","unstructured":"Zhang K, Zuo W, Chen Y, Meng D, Zhang L (2017) Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising. IEEE Trans Image Process 26(7):3142\u20133155","journal-title":"IEEE Trans Image Process"},{"issue":"7","key":"11368_CR550","doi-asserted-by":"publisher","first-page":"3142","DOI":"10.1109\/TIP.2017.2662206","volume":"26","author":"K Zhang","year":"2017","unstructured":"Zhang K, Zuo W, Chen Y, Meng D, Zhang L (2017) Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising. IEEE Trans Image Process 26(7):3142\u20133155","journal-title":"IEEE Trans Image Process"},{"issue":"9","key":"11368_CR551","doi-asserted-by":"publisher","first-page":"4608","DOI":"10.1109\/TIP.2018.2839891","volume":"27","author":"K Zhang","year":"2018","unstructured":"Zhang K, Zuo W, Zhang L (2018) Ffdnet: Toward a fast and flexible solution for cnn-based image denoising. IEEE Trans Image Process 27(9):4608\u20134622","journal-title":"IEEE Trans Image Process"},{"issue":"9","key":"11368_CR552","doi-asserted-by":"publisher","first-page":"4608","DOI":"10.1109\/TIP.2018.2839891","volume":"27","author":"K Zhang","year":"2018","unstructured":"Zhang K, Zuo W, Zhang L (2018) Ffdnet: Toward a fast and flexible solution for cnn-based image denoising. IEEE Trans Image Process 27(9):4608\u20134622","journal-title":"IEEE Trans Image Process"},{"key":"11368_CR553","doi-asserted-by":"publisher","first-page":"79936","DOI":"10.1109\/ACCESS.2021.3084951","volume":"9","author":"J Zhang","year":"2021","unstructured":"Zhang J, Zhu Y, Li W, Fu W, Cao L (2021) Drnet: A deep neural network with multi-layer residual blocks improves image denoising. IEEE Access 9:79936\u201379946","journal-title":"IEEE Access"},{"key":"11368_CR554","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107571","volume":"109","author":"J Zhang","year":"2021","unstructured":"Zhang J, Su H, Zou W, Gong X, Zhang Z, Shen F (2021) Cadn: A weakly supervised learning-based category-aware object detection network for surface defect detection. Pattern Recogn 109:107571","journal-title":"Pattern Recogn"},{"issue":"6","key":"11368_CR555","doi-asserted-by":"publisher","first-page":"822","DOI":"10.1007\/s11633-023-1466-0","volume":"20","author":"K Zhang","year":"2023","unstructured":"Zhang K, Li Y, Liang J, Cao J, Zhang Y, Tang H, Fan D-P, Timofte R, Gool LV (2023) Practical blind image denoising via swin-conv-unet and data synthesis. Mach Intell Res 20(6):822\u2013836","journal-title":"Mach Intell Res"},{"issue":"4","key":"11368_CR556","doi-asserted-by":"publisher","first-page":"13517","DOI":"10.1111\/exsy.13517","volume":"41","author":"X Zhang","year":"2024","unstructured":"Zhang X, Fu X, Qi G, Zhang N (2024) A multi-scale feature fusion convolutional neural network for facial expression recognition. Expert Syst 41(4):13517","journal-title":"Expert Syst"},{"key":"11368_CR557","unstructured":"Zhang Z, Bu J, Ester M, Zhang J, Yao C, Yu Z, Wang C (2019) Hierarchical graph pooling with structure learning. arXiv preprint arXiv:1911.05954"},{"key":"11368_CR558","doi-asserted-by":"crossref","unstructured":"Zhang K, Gool LV, Timofte R (2020) Deep unfolding network for image super-resolution In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3217\u20133226","DOI":"10.1109\/CVPR42600.2020.00328"},{"key":"11368_CR559","doi-asserted-by":"crossref","unstructured":"Zhang Y, Li D, Law KL, Wang X, Qin H, Li H (2022) Idr: Self-supervised image denoising via iterative data refinement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2098\u20132107","DOI":"10.1109\/CVPR52688.2022.00214"},{"key":"11368_CR560","doi-asserted-by":"crossref","unstructured":"Zhang Y, Li K, Li K, Wang L, Zhong B, Fu Y (2018) Image super-resolution using very deep residual channel attention networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 286\u2013301","DOI":"10.1007\/978-3-030-01234-2_18"},{"key":"11368_CR561","doi-asserted-by":"crossref","unstructured":"Zhang Y, Tian Y, Kong Y, Zhong B, Fu Y (2018) Residual dense network for image super-resolution. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2472\u20132481","DOI":"10.1109\/CVPR.2018.00262"},{"key":"11368_CR562","doi-asserted-by":"crossref","unstructured":"Zhang L, Xiang T, Gong S (2017) Learning a deep embedding model for zero-shot learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2021\u20132030","DOI":"10.1109\/CVPR.2017.321"},{"key":"11368_CR563","unstructured":"Zhang H, Zhang L, Qi X, Li H, Torr PH, Koniusz P (2020) Few-shot action recognition via improved attention with self-supervision. arXiv preprint arXiv:2001.03905"},{"key":"11368_CR564","doi-asserted-by":"crossref","unstructured":"Zhang K, Zuo W, Gu S, Zhang L (2017) Learning deep cnn denoiser prior for image restoration. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3929\u20133938","DOI":"10.1109\/CVPR.2017.300"},{"key":"11368_CR565","doi-asserted-by":"crossref","unstructured":"Zhao H, Jiang L, Jia J, Torr PH, Koltun V (2021) Point transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16259\u201316268","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"11368_CR566","doi-asserted-by":"crossref","unstructured":"Zhao H, Shao W, Bao B, Li H (2019) A simple and robust deep convolutional approach to blind image denoising. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops","DOI":"10.1109\/ICCVW.2019.00488"},{"key":"11368_CR567","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2017) Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890","DOI":"10.1109\/CVPR.2017.660"},{"key":"11368_CR568","doi-asserted-by":"crossref","unstructured":"Zhao H, Zhang Y, Liu S, Shi J, Loy CC, Lin D, Jia J (2018) Psanet: Point-wise spatial attention network for scene parsing. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 267\u2013283","DOI":"10.1007\/978-3-030-01240-3_17"},{"issue":"17","key":"11368_CR569","doi-asserted-by":"publisher","first-page":"24265","DOI":"10.1007\/s11042-022-12670-0","volume":"81","author":"C Zhao","year":"2022","unstructured":"Zhao C, Shuai R, Ma L, Liu W, Wu M (2022) Improving cervical cancer classification with imbalanced datasets combining taming transformers with t2t-vit. Multimed Tools Appl 81(17):24265\u201324300","journal-title":"Multimed Tools Appl"},{"key":"11368_CR570","doi-asserted-by":"crossref","unstructured":"Zhao Y, Jiang Z, Men A, Ju G (2019) Pyramid real image denoising network. In: 2019 IEEE Visual Communications and Image Processing (VCIP), pp. 1\u20134 . IEEE","DOI":"10.1109\/VCIP47243.2019.8966075"},{"key":"11368_CR571","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2017) Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890","DOI":"10.1109\/CVPR.2017.660"},{"key":"11368_CR572","doi-asserted-by":"crossref","unstructured":"Zhong X, Tu S, Ma X, Jiang K, Huang W, Wang Z (2022) Rainy wcity: A real rainfall dataset with diverse conditions for semantic driving scene understanding. In: IJCAI, pp. 1743\u20131749","DOI":"10.24963\/ijcai.2022\/243"},{"key":"11368_CR573","doi-asserted-by":"crossref","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A (2017) Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641","DOI":"10.1109\/CVPR.2017.544"},{"key":"11368_CR574","doi-asserted-by":"crossref","unstructured":"Zhou X, Zhuo J, Krahenbuhl P (2019) Bottom-up object detection by grouping extreme and center points. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 850\u2013859","DOI":"10.1109\/CVPR.2019.00094"},{"key":"11368_CR575","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou B, Zhao H, Puig X, Xiao T, Fidler S, Barriuso A, Torralba A (2019) Semantic understanding of scenes through the ade20k dataset. Int J Comput Vis 127:302\u2013321","journal-title":"Int J Comput Vis"},{"key":"11368_CR576","unstructured":"Zhou X, Wang D, Kr\u00e4henb\u00fchl P (2019) Objects as points. arXiv preprint arXiv:1904.07850"},{"key":"11368_CR577","doi-asserted-by":"crossref","unstructured":"Zhu J-Y, Park T, Isola P, Efros AA (2017) Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2223\u20132232","DOI":"10.1109\/ICCV.2017.244"},{"key":"11368_CR578","doi-asserted-by":"publisher","DOI":"10.1016\/j.aei.2023.102061","volume":"57","author":"W Zhu","year":"2023","unstructured":"Zhu W, Zhang H, Zhang C, Zhu X, Guan Z, Jia J (2023) Surface defect detection and classification of steel using an efficient swin transformer. Adv Eng Inform 57:102061","journal-title":"Adv Eng Inform"},{"key":"11368_CR579","doi-asserted-by":"crossref","unstructured":"Zoran D, Weiss Y (2011) From learning models of natural image patches to whole image restoration. In: 2011 International Conference on Computer Vision, pp. 479\u2013486. IEEE","DOI":"10.1109\/ICCV.2011.6126278"},{"key":"11368_CR580","doi-asserted-by":"crossref","unstructured":"Zou Y, Yan C, Fu Y (2023) Iterative denoiser and noise estimator for self-supervised image denoising. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13265\u201313274","DOI":"10.1109\/ICCV51070.2023.01220"}],"container-title":["Artificial Intelligence Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11368-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10462-025-11368-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10462-025-11368-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T02:03:12Z","timestamp":1764986592000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10462-025-11368-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"references-count":577,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["11368"],"URL":"https:\/\/doi.org\/10.1007\/s10462-025-11368-7","relation":{},"ISSN":["1573-7462"],"issn-type":[{"value":"1573-7462","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"15 August 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"381"}}