{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T16:35:07Z","timestamp":1779381307730,"version":"3.53.1"},"reference-count":144,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2022,11,18]],"date-time":"2022-11-18T00:00:00Z","timestamp":1668729600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,18]],"date-time":"2022-11-18T00:00:00Z","timestamp":1668729600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s11063-022-11072-5","type":"journal-article","created":{"date-parts":[[2022,11,18]],"date-time":"2022-11-18T12:02:56Z","timestamp":1668772976000},"page":"2471-2520","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Scene Level Image Classification: A Literature Review"],"prefix":"10.1007","volume":"55","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7478-6814","authenticated-orcid":false,"given":"Sagar","family":"Chavda","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mahesh","family":"Goyani","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,11,18]]},"reference":[{"issue":"3","key":"11072_CR1","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1109\/TGRS.2004.839547","volume":"43","author":"S Aksoy","year":"2005","unstructured":"Aksoy S, Koperski K, Tusk C, Marchisio G, Tilton JC (2005) Learning Bayesian classifiers for scene classification with a visual grammar. IEEE Trans Geosci Remote Sens 43(3):581\u2013589. https:\/\/doi.org\/10.1109\/TGRS.2004.839547","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"5","key":"11072_CR2","doi-asserted-by":"publisher","first-page":"1986","DOI":"10.1080\/01431161.2019.1681602","volume":"41","author":"K Amiri","year":"2020","unstructured":"Amiri K, Farah M, Leloglu UM (2020) BoVSG: bag of visual SubGraphs for remote sensing scene classification. Int J Remote Sens 41(5):1986\u20132003. https:\/\/doi.org\/10.1080\/01431161.2019.1681602","journal-title":"Int J Remote Sens"},{"key":"11072_CR3","unstructured":"Anil R, Gupta V, Koren T, Regan K, Singer Y (2020) Scalable second order optimization for deep learning. arxiv:2002.09018"},{"key":"11072_CR4","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.neunet.2021.01.026","volume":"138","author":"A Apicella","year":"2021","unstructured":"Apicella A, Donnarumma F, Isgro F, Prevete R (2021) A survey on modern trainable activation functions. Neural Netw 138:14\u201332. https:\/\/doi.org\/10.1016\/j.neunet.2021.01.026","journal-title":"Neural Netw"},{"key":"11072_CR5","unstructured":"Bahdanau D, Cho K, Bengio Y (2015) Neural machine translation by jointly learning to align and translate. In: 3rd International conference on learning representations, pp 1\u201315. arxiv:1409.0473"},{"key":"11072_CR6","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.neucom.2019.10.008","volume":"378","author":"SS Basha","year":"2020","unstructured":"Basha SS, Dubey SR, Pulabaigari V, Mukherjee S (2020) Impact of fully connected layers on performance of convolutional neural networks for image classification. Neurocomputing 378:112\u2013119. https:\/\/doi.org\/10.1016\/j.neucom.2019.10.008","journal-title":"Neurocomputing"},{"issue":"9","key":"11072_CR7","doi-asserted-by":"publisher","first-page":"3974","DOI":"10.3390\/app11093974","volume":"11","author":"L Bashmal","year":"2021","unstructured":"Bashmal L, Bazi Y, Al Rahhal MM, Alhichri H, Al Ajlan N (2021) Uav image multi-labeling with data-efficient transformers. Appl Sci 11(9):3974. https:\/\/doi.org\/10.3390\/app11093974","journal-title":"Appl Sci"},{"key":"11072_CR8","doi-asserted-by":"publisher","unstructured":"Bashmal L, Bazi Y, Rahhal MA (2021b) Deep vision transformers for remote sensing scene classification. In: International geoscience and remote sensing symposium. IEEE, pp 2815\u20132818. https:\/\/doi.org\/10.1109\/IGARSS47720.2021.9553684","DOI":"10.1109\/IGARSS47720.2021.9553684"},{"key":"11072_CR9","doi-asserted-by":"publisher","unstructured":"Basu S, Ganguly S, Mukhopadhyay S, DiBiano R, Karki M, Nemani R (2015) Deepsat: a learning framework for satellite imagery. In: Advances in geographic information systems, vol 37. ACM, pp 1\u201310. https:\/\/doi.org\/10.1145\/2820783.2820816","DOI":"10.1145\/2820783.2820816"},{"issue":"3","key":"11072_CR10","doi-asserted-by":"publisher","first-page":"516:1","DOI":"10.3390\/rs13030516","volume":"13","author":"Y Bazi","year":"2021","unstructured":"Bazi Y, Bashmal L, Al\u00a0Rahhal MM, Dayil RA, Ajlan NA (2021) Vision transformers for remote sensing image classification. Remote Sens 13(3):516:1\u201320. https:\/\/doi.org\/10.3390\/rs13030516","journal-title":"Remote Sens"},{"key":"11072_CR11","unstructured":"Bharathi N (2018) Scene classification dataset. https:\/\/www.kaggle.com\/nitishabharathi\/scene-classification"},{"key":"11072_CR12","unstructured":"Bhilare A (2021) Complexity of CNN using MACC and flops. https:\/\/www.kaggle.com\/general\/240788"},{"key":"11072_CR13","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1016\/j.neucom.2019.11.068","volume":"377","author":"Q Bi","year":"2020","unstructured":"Bi Q, Qin K, Zhang H, Li Z, Xu K (2020) RADC-Net: a residual attention based convolution network for aerial scene classification. Neurocomputing 377:345\u2013359. https:\/\/doi.org\/10.1016\/j.neucom.2019.11.068","journal-title":"Neurocomputing"},{"issue":"6","key":"11072_CR14","first-page":"12","volume":"14","author":"T Blaschke","year":"2001","unstructured":"Blaschke T, Strobl J (2001) What\u2019s wrong with pixels? Some recent developments interfacing remote sensing and GIS. Z Geoinformationssyst 14(6):12\u201317","journal-title":"Z Geoinformationssyst"},{"issue":"4","key":"11072_CR15","doi-asserted-by":"publisher","first-page":"712","DOI":"10.1109\/TPAMI.2007.70716","volume":"30","author":"A Bosch","year":"2008","unstructured":"Bosch A, Zisserman A, Munoz X (2008) Scene classification using a hybrid generative\/discriminative approach. IEEE Trans Pattern Anal Mach Intell 30(4):712\u2013727. https:\/\/doi.org\/10.1109\/TPAMI.2007.70716","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"11072_CR16","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1137\/16M1080173","volume":"60","author":"L Bottou","year":"2018","unstructured":"Bottou L, Curtis FE, Nocedal J (2018) Optimization methods for large-scale machine learning. SIAM Rev 60(2):223\u2013311. https:\/\/doi.org\/10.1137\/16M1080173","journal-title":"SIAM Rev"},{"key":"11072_CR17","doi-asserted-by":"publisher","DOI":"10.1109\/lgrs.2019.2911855","author":"Y Boualleg","year":"2019","unstructured":"Boualleg Y, Farah M, Farah IR (2019) Remote sensing scene classification using convolutional features and deep forest classifier. IEEE Geosci Remote Sens Lett. https:\/\/doi.org\/10.1109\/lgrs.2019.2911855","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"11072_CR18","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1109\/CVPR.2004.1315222","volume":"2","author":"M Boutell","year":"2004","unstructured":"Boutell M, Luo J (2004) Bayesian fusion of camera metadata cues in semantic scene classification. IEEE Comput Vis Pattern Recogn 2:623\u2013630. https:\/\/doi.org\/10.1109\/CVPR.2004.1315222","journal-title":"IEEE Comput Vis Pattern Recogn"},{"issue":"9","key":"11072_CR19","doi-asserted-by":"publisher","first-page":"1757","DOI":"10.1016\/j.patcog.2004.03.009","volume":"37","author":"MR Boutell","year":"2004","unstructured":"Boutell MR, Luo J, Shen X, Brown CM (2004) Learning multi-label scene classification. Pattern Recogn 37(9):1757\u20131771. https:\/\/doi.org\/10.1016\/j.patcog.2004.03.009","journal-title":"Pattern Recogn"},{"issue":"8","key":"11072_CR20","doi-asserted-by":"publisher","first-page":"4775","DOI":"10.1109\/TGRS.2017.2700322","volume":"55","author":"S Chaib","year":"2017","unstructured":"Chaib S, Liu H, Gu Y, Yao H (2017) Deep feature fusion for VHR remote sensing scene classification. IEEE Trans Geosci Remote Sens 55(8):4775\u20134784. https:\/\/doi.org\/10.1109\/TGRS.2017.2700322","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"5","key":"11072_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3465055","volume":"12","author":"S Chaudhari","year":"2021","unstructured":"Chaudhari S, Mithal V, Polatkan G, Ramanath R (2021) An attentive survey of attention models. ACM Trans Intell Syst Technol 12(5):1\u201332. https:\/\/doi.org\/10.1145\/3465055","journal-title":"ACM Trans Intell Syst Technol"},{"issue":"2","key":"11072_CR22","doi-asserted-by":"publisher","first-page":"1144","DOI":"10.1109\/TGRS.2017.2760909","volume":"56","author":"B Chaudhuri","year":"2018","unstructured":"Chaudhuri B, Demir B, Chaudhuri S, Bruzzone L (2018) Multilabel remote sensing image retrieval using a semisupervised graph-theoretic method. IEEE Trans Geosci Remote Sens 56(2):1144\u20131158. https:\/\/doi.org\/10.1109\/TGRS.2017.2760909","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"4","key":"11072_CR23","doi-asserted-by":"publisher","first-page":"745","DOI":"10.1007\/s11760-015-0804-2","volume":"10","author":"C Chen","year":"2016","unstructured":"Chen C, Zhang B, Su H, Li W, Wang L (2016) Land-use scene classification using multi-scale completed local binary patterns. SIViP 10(4):745\u2013752. https:\/\/doi.org\/10.1007\/s11760-015-0804-2","journal-title":"SIViP"},{"key":"11072_CR24","unstructured":"Chen J, Huang H, Peng J, Zhu J, Chen L, Li W, Sun B, Li H (2020) Convolution neural network architecture learning for remote sensing scene classification. arxiv:2001.09614"},{"issue":"1","key":"11072_CR25","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1080\/01431161.2012.705443","volume":"34","author":"G Cheng","year":"2013","unstructured":"Cheng G, Guo L, Zhao T, Han J, Li H, Fang J (2013) Automatic landslide detection from remote-sensing imagery using a scene classification method based on boVW and pLSA. Int J Remote Sens 34(1):45\u201359. https:\/\/doi.org\/10.1080\/01431161.2012.705443","journal-title":"Int J Remote Sens"},{"issue":"10","key":"11072_CR26","doi-asserted-by":"publisher","first-page":"1865","DOI":"10.1109\/JPROC.2017.2675998","volume":"105","author":"G Cheng","year":"2017","unstructured":"Cheng G, Han J, Lu X (2017) Remote sensing image scene classification: benchmark and state of the art. Proc IEEE 105(10):1865\u20131883. https:\/\/doi.org\/10.1109\/JPROC.2017.2675998","journal-title":"Proc IEEE"},{"issue":"10","key":"11072_CR27","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1109\/LGRS.2017.2731997","volume":"14","author":"G Cheng","year":"2017","unstructured":"Cheng G, Li Z, Yao X, Guo L, Wei Z (2017) Remote sensing image scene classification using bag of convolutional features. IEEE Geosci Remote Sens Lett 14(10):1735\u20131739. https:\/\/doi.org\/10.1109\/LGRS.2017.2731997","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"11072_CR28","doi-asserted-by":"publisher","unstructured":"Chollet F (2017) Xception: deep learning with depthwise separable convolutions. In: Computer vision and pattern recognition. IEEE, pp 1800\u20131807. https:\/\/doi.org\/10.1109\/CVPR.2017.195, http:\/\/ieeexplore.ieee.org\/document\/8099678\/","DOI":"10.1109\/CVPR.2017.195"},{"issue":"1","key":"11072_CR29","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell A, White T, Dumoulin V, Arulkumaran K, Sengupta B, Bharath AA (2018) Generative adversarial networks: an overview. IEEE Signal Process Mag 35(1):53\u201365. https:\/\/doi.org\/10.1109\/MSP.2017.2765202","journal-title":"IEEE Signal Process Mag"},{"key":"11072_CR30","unstructured":"Datta L (2020) A survey on activation functions and their relation with Xavier and He normal initialization. arxiv:2004.06632"},{"issue":"5","key":"11072_CR31","doi-asserted-by":"publisher","first-page":"732","DOI":"10.1109\/LGRS.2018.2880136","volume":"16","author":"MA Dede","year":"2019","unstructured":"Dede MA, Aptoula E, Genc Y (2019) Deep network ensembles for aerial scene classification. IEEE Geosci Remote Sens Lett 16(5):732\u2013735. https:\/\/doi.org\/10.1109\/LGRS.2018.2880136","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"11072_CR32","doi-asserted-by":"publisher","unstructured":"Derpanis K, Lecce M, Daniilidis K, Wildes R (2012) Dynamic scene understanding: the role of orientation features in space and time in scene classification. In: Computer vision and pattern recognition. IEEE, pp 1306\u20131313. https:\/\/doi.org\/10.1109\/CVPR.2012.6247815","DOI":"10.1109\/CVPR.2012.6247815"},{"issue":"2","key":"11072_CR33","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s13748-014-0060-7","volume":"3","author":"J Diez","year":"2015","unstructured":"Diez J, Luaces O, del Coz JJ, Bahamonde A (2015) Optimizing different loss functions in multilabel classifications. Progr Artif Intell 3(2):107\u2013118. https:\/\/doi.org\/10.1007\/s13748-014-0060-7","journal-title":"Progr Artif Intell"},{"key":"11072_CR34","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N (2020) An image is worth 16$$\\times $$16 words: transformers for image recognition at scale, pp 1\u201322. arxiv:2010.11929v2"},{"key":"11072_CR35","doi-asserted-by":"publisher","unstructured":"Frank E, Hall M (2001) A simple approach to ordinal classification. In: Lecture notes in computer science, vol 2167. Springer, pp 145\u2013156. https:\/\/doi.org\/10.1007\/3-540-44795-4_13","DOI":"10.1007\/3-540-44795-4_13"},{"key":"11072_CR36","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Ghahramani Z, Welling M, Cortes C, Lawrence N, Weinberger K (eds) Advances in neural information processing systems, vol\u00a027. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2014\/file\/5ca3e9b122f61f8f06494c97b1afccf3-Paper.pdf"},{"key":"11072_CR37","doi-asserted-by":"publisher","first-page":"6344","DOI":"10.1109\/ACCESS.2019.2963769","volume":"8","author":"D Guo","year":"2020","unstructured":"Guo D, Xia Y, Luo X (2020) Scene classification of remote sensing images based on saliency dual attention residual network. IEEE Access 8:6344\u20136357. https:\/\/doi.org\/10.1109\/ACCESS.2019.2963769","journal-title":"IEEE Access"},{"key":"11072_CR38","unstructured":"Guo MH, Xu TX, Liu JJ, Liu ZN, Jiang PT, Mu TJ, Zhang SH, Martin RR, Cheng MM, Hu SM (2021) Attention mechanisms in computer vision: a survey, pp 1\u201327. http:\/\/arxiv.org\/abs\/2111.07624"},{"key":"11072_CR39","doi-asserted-by":"crossref","unstructured":"Hafiz AM, Parah SA, Bhat RUA (2021) Attention mechanisms and deep learning for machine vision: a survey of the state of the art, pp 1\u201324. arxiv:2106.07550","DOI":"10.21203\/rs.3.rs-510910\/v1"},{"key":"11072_CR40","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Computer vision and pattern recognition, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"issue":"7","key":"11072_CR41","doi-asserted-by":"publisher","first-page":"2217","DOI":"10.1109\/JSTARS.2019.2918242","volume":"12","author":"P Helber","year":"2019","unstructured":"Helber P, Bischke B, Dengel A, Borth D (2019) EuroSAT: a novel dataset and deep learning benchmark for land use and land cover classification. IEEE J Sel Top Appl Earth Observ Remote Sens 12(7):2217\u20132226. https:\/\/doi.org\/10.1109\/JSTARS.2019.2918242","journal-title":"IEEE J Sel Top Appl Earth Observ Remote Sens"},{"key":"11072_CR42","unstructured":"Hendrycks D, Gimpel K (2016) Gaussian error linear units (GELUs). arxiv:1606.08415"},{"issue":"5214","key":"11072_CR43","doi-asserted-by":"publisher","first-page":"1158","DOI":"10.1126\/science.7761831","volume":"268","author":"GE Hinton","year":"1995","unstructured":"Hinton GE, Dayan P, Frey BJ, Neal RM (1995) The wake sleep algorithm for unsupervised neural networks. Science 268(5214):1158\u20131161","journal-title":"Science"},{"key":"11072_CR44","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) MobileNets: efficient convolutional neural networks for mobile vision applications. arxiv:1704.04861"},{"issue":"8","key":"11072_CR45","doi-asserted-by":"publisher","first-page":"2011","DOI":"10.1109\/TPAMI.2019.2913372","volume":"42","author":"J Hu","year":"2020","unstructured":"Hu J, Shen L, Albanie S, Sun G, Wu E (2020) Squeeze-and-excitation networks. IEEE Trans Pattern Anal Mach Intell 42(8):2011\u20132023. https:\/\/doi.org\/10.1109\/TPAMI.2019.2913372","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11072_CR46","doi-asserted-by":"publisher","first-page":"821","DOI":"10.1109\/ICME.2002.1035908","volume":"1","author":"W Hua","year":"2002","unstructured":"Hua W, Han M, Gong Y (2002) Baseball scene classification using multimedia features. IEEE Multim Expo 1:821\u2013824. https:\/\/doi.org\/10.1109\/ICME.2002.1035908","journal-title":"IEEE Multim Expo"},{"key":"11072_CR47","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1016\/j.isprsjprs.2019.01.015","volume":"149","author":"Y Hua","year":"2019","unstructured":"Hua Y, Mou L, Zhu XX (2019) Recurrently exploring class-wise attention in a hybrid convolutional and bidirectional LSTM network for multi-label aerial image classification. ISPRS J Photogramm Remote Sens 149:188\u2013199. https:\/\/doi.org\/10.1016\/j.isprsjprs.2019.01.015","journal-title":"ISPRS J Photogramm Remote Sens"},{"issue":"7","key":"11072_CR48","doi-asserted-by":"publisher","first-page":"4558","DOI":"10.1109\/TGRS.2019.2963364","volume":"58","author":"Y Hua","year":"2020","unstructured":"Hua Y, Mou L, Zhu XX (2020) Relation network for multilabel aerial image classification. IEEE Trans Geosci Remote Sens 58(7):4558\u20134572. https:\/\/doi.org\/10.1109\/TGRS.2019.2963364","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11072_CR49","doi-asserted-by":"publisher","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Computer vision and pattern recognition. IEEE, pp 2261\u20132269 https:\/\/doi.org\/10.1109\/CVPR.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"11072_CR50","doi-asserted-by":"publisher","first-page":"6951","DOI":"10.1109\/JSTARS.2021.3091134","volume":"14","author":"R Huang","year":"2021","unstructured":"Huang R, Zheng F, Huang W (2021) Multilabel remote sensing image annotation with multiscale attention and label correlation. IEEE J Sel Top Appl Earth Observ Remote Sens 14:6951\u20136961. https:\/\/doi.org\/10.1109\/JSTARS.2021.3091134","journal-title":"IEEE J Sel Top Appl Earth Observ Remote Sens"},{"issue":"1","key":"11072_CR51","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1214\/aoms\/1177703732","volume":"35","author":"PJ Huber","year":"1964","unstructured":"Huber PJ (1964) Robust estimation of a location parameter. Ann Math Stat 35(1):73\u2013101. https:\/\/doi.org\/10.1214\/aoms\/1177703732","journal-title":"Ann Math Stat"},{"key":"11072_CR52","unstructured":"Hui J (2017) Understanding dynamic routing between capsules (capsule networks). https:\/\/jhui.github.io\/2017\/11\/03\/Dynamic-Routing-Between-Capsules\/"},{"key":"11072_CR53","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Machine learning. JMLR, pp 448\u2013456"},{"key":"11072_CR54","doi-asserted-by":"publisher","unstructured":"Jin P, Xia GS, Hu F, Lu Q, Zhang L (2018) AID++: an updated version of AID on scene classification. In: IEEE international geoscience and remote sensing symposium. IEEE, pp 4721\u20134724. https:\/\/doi.org\/10.1109\/IGARSS.2018.8518882, https:\/\/ieeexplore.ieee.org\/document\/8518882\/","DOI":"10.1109\/IGARSS.2018.8518882"},{"key":"11072_CR55","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1016\/j.neucom.2019.05.024","volume":"357","author":"N Khan","year":"2019","unstructured":"Khan N, Chaudhuri U, Banerjee B, Chaudhuri S (2019) Graph convolutional network for multi-label VHR remote sensing scene recognition. Neurocomputing 357:36\u201346. https:\/\/doi.org\/10.1016\/j.neucom.2019.05.024","journal-title":"Neurocomputing"},{"key":"11072_CR56","unstructured":"Khan S, Naseer M, Hayat M, Zamir SW, Khan FS, Shah M (2021) Transformers in vision: a survey. http:\/\/arxiv.org\/abs\/2101.01169"},{"key":"11072_CR57","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) ImageNet classification with deep convolutional neural networks. Adv Neural Inf Process Syst 25:1097\u20131105","journal-title":"Adv Neural Inf Process Syst"},{"key":"11072_CR58","doi-asserted-by":"publisher","unstructured":"Lazebnik S, Schmid C, Ponce J (2006) Beyond bags of features: spatial pyramid matching for recognizing natural scene categories. In: Computer vision and pattern recognition. IEEE, New York, pp 2169\u20132178. https:\/\/doi.org\/10.1109\/CVPR.2006.68","DOI":"10.1109\/CVPR.2006.68"},{"issue":"11","key":"11072_CR59","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y Lecun","year":"1998","unstructured":"Lecun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132323. https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc IEEE"},{"key":"11072_CR60","unstructured":"Li H, Tao C, Wu Z, Chen J, Gong J, Deng M (2017a) RSI-CB: a large scale remote sensing image classification benchmark via crowdsource data. arxiv:1705.10450"},{"key":"11072_CR61","doi-asserted-by":"publisher","unstructured":"Li LJ, Fei-Fei L (2007) What, where and who? Classifying events by scene and object recognition. In: Computer vision. IEEE, Rio de Janeiro, pp 1\u20138. https:\/\/doi.org\/10.1109\/ICCV.2007.4408872","DOI":"10.1109\/ICCV.2007.4408872"},{"key":"11072_CR62","doi-asserted-by":"publisher","unstructured":"Li Lj, Socher R, Fei-Fei L (2009) Towards total scene understanding: classification, annotation and segmentation in an automatic framework. In: Computer vision and pattern recognition. IEEE, pp 2036\u20132043. https:\/\/doi.org\/10.1109\/CVPR.2009.5206718","DOI":"10.1109\/CVPR.2009.5206718"},{"issue":"10","key":"11072_CR63","doi-asserted-by":"publisher","first-page":"2424:1","DOI":"10.3390\/rs14102424","volume":"14","author":"P Li","year":"2022","unstructured":"Li P, Chen P, Zhang D (2022) Cross-modal feature representation learning and label graph mining in a residual multi-attentional CNN-LSTM network for multi-label aerial scene classification. Remote Sens 14(10):2424:1\u201327. https:\/\/doi.org\/10.3390\/rs14102424","journal-title":"Remote Sens"},{"key":"11072_CR64","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.isprsjprs.2021.07.007","volume":"179","author":"X Li","year":"2021","unstructured":"Li X, Du Z, Huang Y, Tan Z (2021) A deep translation (GAN) based change detection network for optical and SAR remote sensing images. ISPRS J Photogramm Remote Sens 179:14\u201334. https:\/\/doi.org\/10.1016\/j.isprsjprs.2021.07.007","journal-title":"ISPRS J Photogramm Remote Sens"},{"key":"11072_CR65","doi-asserted-by":"publisher","unstructured":"Li Y, Song Y, Luo J (2017) Improving pairwise ranking for multi-label image classification. In: Computer vision and pattern recognition, pp 1837\u20131845. https:\/\/doi.org\/10.1109\/CVPR.2017.199","DOI":"10.1109\/CVPR.2017.199"},{"issue":"23","key":"11072_CR66","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/rs12234003","volume":"12","author":"Y Li","year":"2020","unstructured":"Li Y, Chen R, Zhang Y, Zhang M, Chen L (2020) Multi-label remote sensing image scene classification by combining a convolutional neural network and a graph neural network. Remote Sens 12(23):1\u201317. https:\/\/doi.org\/10.3390\/rs12234003","journal-title":"Remote Sens"},{"key":"11072_CR67","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/1822539","author":"D Lin","year":"2022","unstructured":"Lin D, Chen Z (2022) Semantic understandings for aerial images via multigrained feature grouping. Sci Program. https:\/\/doi.org\/10.1155\/2022\/1822539","journal-title":"Sci Program"},{"issue":"5602112","key":"11072_CR68","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2020.3041461","volume":"60","author":"D Lin","year":"2022","unstructured":"Lin D, Lin J, Zhao L, Wang ZJ, Chen Z (2022) Multilabel aerial image classification with a concept attention graph neural network. IEEE Trans Geosci Remote Sens 60(5602112):1\u201312. https:\/\/doi.org\/10.1109\/TGRS.2020.3041461","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11072_CR69","unstructured":"Lin M, Chen Q, Yan S (2013) Network in network, pp 1\u201310. arxiv:1312.4400"},{"issue":"2","key":"11072_CR70","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","volume":"42","author":"TY Lin","year":"2020","unstructured":"Lin TY, Goyal P, Girshick R, He K, Dollar P (2020) Focal loss for dense object detection. IEEE Trans Pattern Anal Mach Intell 42(2):318\u2013327. https:\/\/doi.org\/10.1109\/TPAMI.2018.2858826","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11072_CR71","doi-asserted-by":"publisher","unstructured":"Lipson P, Grimson E, Sinha P (1997) Configuration based scene classification and image indexing. In: Computer vision and pattern recognition. IEEE, pp 1007\u20131013. https:\/\/doi.org\/10.1109\/CVPR.1997.609453","DOI":"10.1109\/CVPR.1997.609453"},{"issue":"1","key":"11072_CR72","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1016\/j.patcog.2012.06.001","volume":"46","author":"GH Liu","year":"2013","unstructured":"Liu GH, Yang JY (2013) Content-based image retrieval using color difference histogram. Pattern Recogn 46(1):188\u2013198. https:\/\/doi.org\/10.1016\/j.patcog.2012.06.001","journal-title":"Pattern Recogn"},{"issue":"5","key":"11072_CR73","doi-asserted-by":"publisher","first-page":"2494","DOI":"10.1109\/TGRS.2018.2873966","volume":"57","author":"Y Liu","year":"2019","unstructured":"Liu Y, Suen C, Liu Y, Ding L (2019) Scene classification using hierarchical Wasserstein CNN. IEEE Trans Geosci Remote Sens 57(5):2494\u20132509. https:\/\/doi.org\/10.1109\/TGRS.2018.2873966","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"10","key":"11072_CR74","doi-asserted-by":"publisher","first-page":"7894","DOI":"10.1109\/TGRS.2019.2917161","volume":"57","author":"X Lu","year":"2019","unstructured":"Lu X, Sun H, Zheng X (2019) A feature aggregation convolutional neural network for remote sensing scene classification. IEEE Trans Geosci Remote Sens 57(10):7894\u20137906. https:\/\/doi.org\/10.1109\/TGRS.2019.2917161","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11072_CR75","first-page":"745","volume":"2","author":"J Luo","year":"2001","unstructured":"Luo J, Savakis A (2001) Indoor vs outdoor classification of consumer photographs using low-level and semantic features. IEEE Image Process 2:745\u2013748","journal-title":"IEEE Image Process"},{"key":"11072_CR76","doi-asserted-by":"crossref","unstructured":"Ma N, Zhang X, Zheng HT, Sun J (2018) Shufflenet v2: practical guidelines for efficient CNN architecture design. In: The European conference on computer vision. Springer, Munich, pp 116\u2013131. https:\/\/openaccess.thecvf.com\/content_ECCV_2018\/html\/Ningning_Light-weight_CNN_Architecture_ECCV_2018_paper.html","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"11072_CR77","doi-asserted-by":"publisher","unstructured":"Maron O, Ratan AL (1998) Multiple-instance learning for natural scene classification. In: Machine learning. MKP, pp 341\u2013349. https:\/\/doi.org\/10.1016\/S0735-1097(86)80281-9","DOI":"10.1016\/S0735-1097(86)80281-9"},{"key":"11072_CR78","unstructured":"Martins AF, Astudillo RF (2016) From softmax to sparsemax: a sparse model of attention and multi-label classification. In: Machine learning, vol\u00a04. JMLR, pp 2432\u20132443"},{"issue":"4","key":"11072_CR79","doi-asserted-by":"publisher","first-page":"1404","DOI":"10.1109\/18.243457","volume":"39","author":"J Miller","year":"1993","unstructured":"Miller J, Goodman R, Smyth P (1993) On loss functions which minimize to conditional expected values and posterior probabilities. IEEE Trans Inform Theory 39(4):1404\u20131408. https:\/\/doi.org\/10.1109\/18.243457","journal-title":"IEEE Trans Inform Theory"},{"key":"11072_CR80","unstructured":"Ng A (2017) Deep learning specialization. DeepLearning.AI\/Coursera. https:\/\/www.deeplearning.ai\/program\/deep-learning-specialization\/. Accessed 10 Oct 2020"},{"key":"11072_CR81","doi-asserted-by":"publisher","unstructured":"Ng AY (2004) Feature selection, L 1 vs. L 2 regularization, and rotational invariance. In: Machine learning. ACM, Banff, pp 1\u20138. https:\/\/doi.org\/10.1145\/1015330.1015435, http:\/\/portal.acm.org\/citation.cfm?doid=1015330.1015435","DOI":"10.1145\/1015330.1015435"},{"issue":"3","key":"11072_CR82","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1023\/A:1011139631724","volume":"42","author":"A Oliva","year":"2001","unstructured":"Oliva A, Torralba A (2001) Modeling the shape of the scene: a holistic representation of the spatial envelope. Int J Comput Vis 42(3):145\u2013175. https:\/\/doi.org\/10.1023\/A:1011139631724","journal-title":"Int J Comput Vis"},{"key":"11072_CR83","unstructured":"Ozyildirim BM, Kiran M (2020) Do optimization methods in deep learning applications matter? arxiv:2002.12642"},{"issue":"1","key":"11072_CR84","doi-asserted-by":"publisher","first-page":"1295","DOI":"10.1016\/j.jksuci.2019.09.014","volume":"34","author":"KM Patrick","year":"2022","unstructured":"Patrick KM, Adekoya FA, Mighty AA, Edward BY (2022) Capsule networks\u2013a survey. J King Saud Univ Comput Inf Sci 34(1):1295\u20131310. https:\/\/doi.org\/10.1016\/j.jksuci.2019.09.014","journal-title":"J King Saud Univ Comput Inf Sci"},{"issue":"10","key":"11072_CR85","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1016\/j.patcog.2004.12.014","volume":"38","author":"A Payne","year":"2005","unstructured":"Payne A, Singh S (2005) Indoor vs. outdoor scene classification in digital photographs. Pattern Recogn 38(10):1533\u20131545. https:\/\/doi.org\/10.1016\/j.patcog.2004.12.014","journal-title":"Pattern Recogn"},{"key":"11072_CR86","doi-asserted-by":"publisher","unstructured":"Penatti OAB, Nogueira K, dos Santos JA (2015) Do deep features generalize from everyday objects to remote sensing and aerial scenes domains? In: Computer vision and pattern recognition workshops. IEEE, Boston, pp 44\u201351. https:\/\/doi.org\/10.1109\/CVPRW.2015.7301382","DOI":"10.1109\/CVPRW.2015.7301382"},{"key":"11072_CR87","unstructured":"Punjabi A, Schmid J, Katsaggelos AK (2020) Examining the benefits of capsule neural networks, pp 1\u201313. http:\/\/arxiv.org\/abs\/2001.10964"},{"key":"11072_CR88","doi-asserted-by":"publisher","first-page":"337","DOI":"10.1016\/j.isprsjprs.2020.09.020","volume":"169","author":"X Qi","year":"2020","unstructured":"Qi X, Zhu P, Wang Y, Zhang L, Peng J, Wu M, Chen J, Zhao X, Zang N, Mathiopoulos PT (2020) MLRSNet: a multi-label high spatial resolution remote sensing dataset for semantic scene understanding. ISPRS J Photogramm Remote Sens 169:337\u2013350. https:\/\/doi.org\/10.1016\/j.isprsjprs.2020.09.020","journal-title":"ISPRS J Photogramm Remote Sens"},{"key":"11072_CR89","doi-asserted-by":"publisher","unstructured":"Quattoni A, Torralba A (2009) Recognizing indoor scenes. In: Computer vision and pattern recognition. IEEE, Miami, pp 413\u2013420. https:\/\/doi.org\/10.1109\/CVPR.2009.5206537","DOI":"10.1109\/CVPR.2009.5206537"},{"issue":"5","key":"11072_CR90","doi-asserted-by":"publisher","first-page":"1063","DOI":"10.1162\/089976604773135104","volume":"16","author":"L Rosasco","year":"2004","unstructured":"Rosasco L, De Vito E, Caponnetto A, Piana M, Verri A (2004) Are loss functions all the same? Neural Comput 16(5):1063\u20131076. https:\/\/doi.org\/10.1162\/089976604773135104","journal-title":"Neural Comput"},{"key":"11072_CR91","unstructured":"Sabour S, Frosst N, Hinton GE (2017) Dynamic routing between capsules. In: The 31st international conference on neural information processing systems. NIPS\u201917. Curran Associates Inc., Red Hook, p 3859\u20133869"},{"key":"11072_CR92","doi-asserted-by":"publisher","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen LC (2018) MobileNetV2: inverted residuals and linear bottlenecks. In: Computer vision and pattern recognition. IEEE, pp 4510\u20134520. https:\/\/doi.org\/10.1109\/CVPR.2018.00474, https:\/\/ieeexplore.ieee.org\/document\/8578572\/","DOI":"10.1109\/CVPR.2018.00474"},{"key":"11072_CR93","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1109\/ICPR.2002.1047420","volume":"16","author":"N Serrano","year":"2002","unstructured":"Serrano N, Savakis A, Luo J (2002) A computationally efficient approach to indoor\/outdoor scene classification. IEEE Pattern Recogn 16:146\u2013149. https:\/\/doi.org\/10.1109\/ICPR.2002.1047420","journal-title":"IEEE Pattern Recogn"},{"issue":"9","key":"11072_CR94","doi-asserted-by":"publisher","first-page":"1773","DOI":"10.1016\/j.patcog.2004.03.003","volume":"37","author":"N Serrano","year":"2004","unstructured":"Serrano N, Savakis AE, Luo J (2004) Improved scene classification using efficient low-level features and semantic cues. Pattern Recogn 37(9):1773\u20131784. https:\/\/doi.org\/10.1016\/j.patcog.2004.03.003","journal-title":"Pattern Recogn"},{"key":"11072_CR95","doi-asserted-by":"publisher","unstructured":"Shen X, Boutell M, Luo J, Brown C (2004) Multi-label machine learning and its application to semantic scene classification. In: Storage and retrieval methods and applications for multimedia. SPIE, pp 188\u2013199. https:\/\/doi.org\/10.1117\/12.523428","DOI":"10.1117\/12.523428"},{"issue":"8","key":"11072_CR96","doi-asserted-by":"publisher","first-page":"2395","DOI":"10.1080\/01431161.2011.608740","volume":"33","author":"G Sheng","year":"2011","unstructured":"Sheng G, Yang W, Xu T, Sun H (2011) High-resolution satellite scene classification using a sparse coding based multiple feature combination. Int J Remote Sens 33(8):2395\u20132412. https:\/\/doi.org\/10.1080\/01431161.2011.608740","journal-title":"Int J Remote Sens"},{"issue":"60","key":"11072_CR97","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-019-0197-0","volume":"6","author":"C Shorten","year":"2019","unstructured":"Shorten C, Khoshgoftaar TM (2019) A survey on Image data augmentation for deep learning. J Big Data 6(60):1\u201348. https:\/\/doi.org\/10.1186\/s40537-019-0197-0","journal-title":"J Big Data"},{"key":"11072_CR98","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition, pp 1\u201314. arxiv:1409.1556"},{"issue":"13","key":"11072_CR99","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1142\/S0218001420520138","volume":"34","author":"D Soydaner","year":"2020","unstructured":"Soydaner D (2020) A comparison of optimization algorithms for deep learning. Int J Pattern Recogn Artif Intell 34(13):1\u201326. https:\/\/doi.org\/10.1142\/S0218001420520138","journal-title":"Int J Pattern Recogn Artif Intell"},{"issue":"1","key":"11072_CR100","doi-asserted-by":"publisher","first-page":"1929","DOI":"10.5555\/2627435.2670313","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958. https:\/\/doi.org\/10.5555\/2627435.2670313","journal-title":"J Mach Learn Res"},{"key":"11072_CR101","doi-asserted-by":"publisher","first-page":"12377","DOI":"10.1007\/s11042-017-4894-4","volume":"77","author":"P Srivastava","year":"2018","unstructured":"Srivastava P, Khare A (2018) Utilizing multiscale local binary pattern for content-based image retrieval. Multim Tools Appl 77:12377\u201312403. https:\/\/doi.org\/10.1007\/s11042-017-4894-4","journal-title":"Multim Tools Appl"},{"issue":"7","key":"11072_CR102","doi-asserted-by":"publisher","first-page":"1031","DOI":"10.1109\/LGRS.2019.2893306","volume":"16","author":"R Stivaktakis","year":"2019","unstructured":"Stivaktakis R, Tsagkatakis G, Tsakalides P (2019) Deep learning for multilabel land cover scene categorization using data augmentation. IEEE Geosci Remote Sens Lett 16(7):1031\u20131035. https:\/\/doi.org\/10.1109\/LGRS.2019.2893306","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"11072_CR103","doi-asserted-by":"publisher","first-page":"95934","DOI":"10.1109\/ACCESS.2020.2995805","volume":"8","author":"G Sumbul","year":"2020","unstructured":"Sumbul G, Demir B (2020) A deep multi-attention driven approach for multi-label remote sensing image classification. IEEE Access 8:95934\u201395946. https:\/\/doi.org\/10.1109\/ACCESS.2020.2995805","journal-title":"IEEE Access"},{"key":"11072_CR104","doi-asserted-by":"publisher","unstructured":"Sumbul G, Charfuelan M, Demir B, Markl V (2019) Bigearthnet: a large-scale benchmark archive for remote sensing image understanding. In: International geoscience and remote sensing symposium. IEEE, Yokohama, pp 5901\u20135904. https:\/\/doi.org\/10.1109\/IGARSS.2019.8900532","DOI":"10.1109\/IGARSS.2019.8900532"},{"issue":"1","key":"11072_CR105","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1109\/TGRS.2019.2931801","volume":"58","author":"H Sun","year":"2020","unstructured":"Sun H, Li S, Zheng X, Lu X (2020) Remote sensing scene classification by gated bidirectional network. IEEE Trans Geosci Remote Sens 58(1):82\u201396. https:\/\/doi.org\/10.1109\/TGRS.2019.2931801","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11072_CR106","unstructured":"Sun R (2019) Optimization for deep learning: theory and algorithms. arxiv:1912.08957"},{"issue":"2","key":"11072_CR107","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/s40305-020-00309-6","volume":"8","author":"RY Sun","year":"2020","unstructured":"Sun RY (2020) Optimization for deep learning: an overview. J Oper Res Soc China 8(2):249\u2013294. https:\/\/doi.org\/10.1007\/s40305-020-00309-6","journal-title":"J Oper Res Soc China"},{"issue":"8","key":"11072_CR108","doi-asserted-by":"publisher","first-page":"3668","DOI":"10.1109\/TCYB.2019.2950779","volume":"50","author":"S Sun","year":"2020","unstructured":"Sun S, Cao Z, Zhu H, Zhao J (2020) A survey of optimization methods from a machine learning perspective. IEEE Trans Cybern 50(8):3668\u20133681. https:\/\/doi.org\/10.1109\/TCYB.2019.2950779","journal-title":"IEEE Trans Cybern"},{"key":"11072_CR109","doi-asserted-by":"publisher","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Computer vision and pattern recognition. IEEE, pp 1\u20139. https:\/\/doi.org\/10.1109\/CVPR.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"11072_CR110","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi A (2016) Inception-v4, inception-ResNet and the impact of residual connections on learning","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"11072_CR111","doi-asserted-by":"publisher","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Computer vision and pattern recognition. IEEE, pp 2818\u20132826. https:\/\/doi.org\/10.1109\/CVPR.2016.308","DOI":"10.1109\/CVPR.2016.308"},{"key":"11072_CR112","unstructured":"Tan M, Le QV (2019) EfficientNet: rethinking model scaling for convolutional neural networks. In: Machine learning research. JMLR, pp 6105\u20136114. http:\/\/proceedings.mlr.press\/v97\/tan19a.html"},{"key":"11072_CR113","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, Jegou H (2021) Training data-efficient image transformers and distillation through attention. In: International conference on machine learning. PMLR, pp 10347\u201310357. arxiv:2012.12877"},{"issue":"2","key":"11072_CR114","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/s10772-019-09663-0","volume":"23","author":"S Umamaheswaran","year":"2019","unstructured":"Umamaheswaran S, Lakshmanan R, Vinothkumar V, Arvind K, Nagarajan S (2019) New and robust composite micro structure descriptor (CMSD) for CBIR. Int J Speech Technol 23(2):243\u2013249. https:\/\/doi.org\/10.1007\/s10772-019-09663-0","journal-title":"Int J Speech Technol"},{"issue":"12","key":"11072_CR115","doi-asserted-by":"publisher","first-page":"1921","DOI":"10.1016\/S0031-3203(98)00079-X","volume":"31","author":"A Vailaya","year":"1998","unstructured":"Vailaya A, Jain A, Jiang Zhangs H (1998) On image classification: city images vs landscapes. Pattern Recogn 31(12):1921\u20131935. https:\/\/doi.org\/10.1016\/S0031-3203(98)00079-X","journal-title":"Pattern Recogn"},{"key":"11072_CR116","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez A, Kaiser L, Polosukhin I (2017) Attention is all you need Ashish. In: Advances in neural information processing systems, pp 5999\u20136009"},{"key":"11072_CR117","unstructured":"Vinyals O, Povey D (2012) Krylov subspace descent for deep learning. In: Artificial intelligence and statistics, La Palma, Canary Islands, vol 22, pp 1261\u20131268"},{"issue":"8","key":"11072_CR118","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/j.compeleceng.2014.04.018","volume":"40","author":"SK Vipparthi","year":"2014","unstructured":"Vipparthi SK, Nagar SK (2014) Multi-joint histogram based modelling for image indexing and retrieval. Comput Electr Eng 40(8):163\u2013173. https:\/\/doi.org\/10.1016\/j.compeleceng.2014.04.018","journal-title":"Comput Electr Eng"},{"issue":"9","key":"11072_CR119","doi-asserted-by":"publisher","first-page":"947","DOI":"10.1109\/34.955109","volume":"23","author":"J Wang","year":"2001","unstructured":"Wang J, Li J, Wiederhold G (2001) Simplicity: semantics-sensitive integrated matching for picture libraries. IEEE Trans Pattern Anal Mach Intell 23(9):947\u2013963. https:\/\/doi.org\/10.1109\/34.955109","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"6","key":"11072_CR120","doi-asserted-by":"publisher","first-page":"598","DOI":"10.1080\/2150704X.2020.1746854","volume":"11","author":"Y Wei","year":"2020","unstructured":"Wei Y, Luo X, Lixin Hu YP, Feng J (2020) An improved unsupervised representation learning generative adversarial network for remote sensing image scene classification. Remote Sens Lett 11(6):598\u2013607. https:\/\/doi.org\/10.1080\/2150704X.2020.1746854","journal-title":"Remote Sens Lett"},{"key":"11072_CR121","doi-asserted-by":"publisher","first-page":"4788","DOI":"10.1109\/TIP.2021.3074804","volume":"30","author":"Y Wei","year":"2021","unstructured":"Wei Y, Zhang Z, Wang Y, Xu M, Yang Y, Yan S, Wang M (2021) Deraincyclegan: Rain attentive cyclegan for single image deraining and rainmaking. IEEE Trans Image Process 30:4788\u20134801. https:\/\/doi.org\/10.1109\/TIP.2021.3074804","journal-title":"IEEE Trans Image Process"},{"key":"11072_CR122","unstructured":"Weng L (2018) Attention? Attention! lilianwenggithubio\/lil-log.https:\/\/lilianweng.github.io\/lil-log\/2018\/06\/24\/attention-attention.html"},{"key":"11072_CR123","unstructured":"Weng L (2020) The transformer family. lilianwenggithubio\/lil-log. https:\/\/lilianweng.github.io\/lil-log\/2020\/03\/27\/the-transformer-family.html"},{"issue":"7","key":"11072_CR124","doi-asserted-by":"publisher","first-page":"3965","DOI":"10.1109\/TGRS.2017.2685945","volume":"55","author":"GS Xia","year":"2017","unstructured":"Xia GS, Hu J, Hu F, Shi B, Bai X, Zhong Y, Zhang L, Lu X (2017) AID: a benchmark data set for performance evaluation of aerial scene classification. IEEE Trans Geosci Remote Sens 55(7):3965\u20133981. https:\/\/doi.org\/10.1109\/TGRS.2017.2685945","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11072_CR125","doi-asserted-by":"publisher","unstructured":"Xiao J, Hays J, Ehinger K, Oliva A, Torralba A (2010) Sun database: large-scale scene recognition from abbey to zoo. In: Computer vision and pattern recognition. IEEE, San Francisco, CA, pp 3485\u20133492. https:\/\/doi.org\/10.1109\/CVPR.2010.5539970","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"11072_CR126","doi-asserted-by":"publisher","unstructured":"Xie S, Girshick R, Dollar P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Computer vision and pattern recognition. IEEE, pp 5987\u20135995. https:\/\/doi.org\/10.1109\/CVPR.2017.634, http:\/\/ieeexplore.ieee.org\/document\/8100117\/","DOI":"10.1109\/CVPR.2017.634"},{"key":"11072_CR127","doi-asserted-by":"publisher","unstructured":"Yan R, Liu Y, Jin R, Hauptmann A (2003) On predicting rare classes with SVM ensembles in scene classification. In: Acoustics, speech, and signal processing. IEEE, pp 3\u201321. https:\/\/doi.org\/10.1109\/ICASSP.2003.1199097","DOI":"10.1109\/ICASSP.2003.1199097"},{"key":"11072_CR128","doi-asserted-by":"publisher","unstructured":"Yang J, Jiang YG, Hauptmann AG, Ngo CW (2007) Evaluating bag-of-visual-words representations in scene classification. In: Multimedia conference and exhibition. ACM, pp 197\u2013206. https:\/\/doi.org\/10.1145\/1290082.1290111","DOI":"10.1145\/1290082.1290111"},{"key":"11072_CR129","doi-asserted-by":"publisher","unstructured":"Yang Y, Newsam S (2010) Bag-of-visual-words and spatial extensions for land-use classification. In: GIS: Proceedings of the ACM international symposium on advances in geographic information systems, pp 270\u2013279. https:\/\/doi.org\/10.1145\/1869790.1869829","DOI":"10.1145\/1869790.1869829"},{"issue":"2","key":"11072_CR130","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1007\/s00365-006-0663-2","volume":"26","author":"Y Yao","year":"2007","unstructured":"Yao Y, Rosasco L, Caponnetto A (2007) On early stopping in gradient descent learning. Constr Approx 26(2):289\u2013315. https:\/\/doi.org\/10.1007\/s00365-006-0663-2","journal-title":"Constr Approx"},{"key":"11072_CR131","unstructured":"yzimm (2021) The amount of parameters (parameters) and the amount of calculation (flops) in the convolutional neural network CNN. https:\/\/chowdera.com\/2021\/04\/20210420120616773r.html"},{"key":"11072_CR132","doi-asserted-by":"publisher","unstructured":"Zeiler MD, Fergus R (2014) Visualizing and understanding convolutional networks. In: Lecture notes in computer science, vol 8689 LNCS. Springer, pp 818\u2013833. https:\/\/doi.org\/10.1007\/978-3-319-10590-1_53","DOI":"10.1007\/978-3-319-10590-1_53"},{"issue":"5","key":"11072_CR133","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/rs11050494","volume":"11","author":"W Zhang","year":"2019","unstructured":"Zhang W, Tang P, Zhao L (2019) Remote sensing image scene classification using CNN-CapsNet. Remote Sens 11(5):1\u201322. https:\/\/doi.org\/10.3390\/rs11050494","journal-title":"Remote Sens"},{"key":"11072_CR134","doi-asserted-by":"crossref","unstructured":"Zhang X, Zhou X, Lin M, Sun J (2018) Shufflenet: an extremely efficient convolutional neural network for mobile devices. In: computer vision and pattern recognition. IEEE, Salt Lake City, pp 6848\u20136856. https:\/\/openaccess.thecvf.com\/content_cvpr_2018\/html\/Zhang_ShuffleNet_An_Extremely_CVPR_2018_paper.html","DOI":"10.1109\/CVPR.2018.00716"},{"issue":"4","key":"11072_CR135","doi-asserted-by":"publisher","first-page":"2108","DOI":"10.1109\/TGRS.2015.2496185","volume":"54","author":"B Zhao","year":"2016","unstructured":"Zhao B, Zhong Y, Xia GS, Zhang L (2016) Dirichlet-derived multiple topic scene classification model for high spatial resolution remote sensing imagery. IEEE Trans Geosci Remote Sens 54(4):2108\u20132123. https:\/\/doi.org\/10.1109\/TGRS.2015.2496185","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"3","key":"11072_CR136","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1117\/1.JRS.10.035004","volume":"10","author":"J ZhaoLi","year":"2016","unstructured":"ZhaoLi J, Tang P, Huo L (2016) Feature significance-based multibag-of-visual-words model for remote sensing image scene classification. J Appl Remote Sens 10(3):1\u201321. https:\/\/doi.org\/10.1117\/1.JRS.10.035004","journal-title":"J Appl Remote Sens"},{"key":"11072_CR137","doi-asserted-by":"publisher","first-page":"4706576","DOI":"10.1155\/2020\/4706576","volume":"2020","author":"Q Zheng","year":"2020","unstructured":"Zheng Q, Yang M, Tian X, Jiang N, Wang D (2020) A full stage data augmentation method in deep convolutional neural network for natural image classification. Discret Dyn Nat Soc 2020:4706576. https:\/\/doi.org\/10.1155\/2020\/4706576","journal-title":"Discret Dyn Nat Soc"},{"key":"11072_CR138","doi-asserted-by":"publisher","first-page":"7723","DOI":"10.1007\/s00521-020-05514-1","volume":"33","author":"Q Zheng","year":"2021","unstructured":"Zheng Q, Zhao P, Yang Li HW, Yang Y (2021) Spectrum interference-based two-level data augmentation method in deep learning for automatic modulation classification. Neural Comput Appl 33:7723\u20137745. https:\/\/doi.org\/10.1007\/s00521-020-05514-1","journal-title":"Neural Comput Appl"},{"issue":"7","key":"11072_CR139","doi-asserted-by":"publisher","first-page":"4799","DOI":"10.1109\/TGRS.2019.2893115","volume":"57","author":"X Zheng","year":"2019","unstructured":"Zheng X, Yuan Y, Lu X (2019) A deep scene representation for aerial scene classification. IEEE Trans Geosci Remote Sens 57(7):4799\u20134809. https:\/\/doi.org\/10.1109\/TGRS.2019.2893115","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"11072_CR140","doi-asserted-by":"publisher","unstructured":"Zhou B, Lapedriza A, Xiao J, Torralba A, Oliva A (2014) Learning deep features for scene recognition using places database. In: Neural information processing systems, Boston, MA, pp 487\u2013495. https:\/\/doi.org\/10.5555\/2968826.2968881","DOI":"10.5555\/2968826.2968881"},{"issue":"Part A","key":"11072_CR141","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1016\/j.isprsjprs.2018.01.004","volume":"145","author":"W Zhou","year":"2018","unstructured":"Zhou W, Newsam S, Li C, Shao Z (2018) PatternNet: a benchmark dataset for performance evaluation of remote sensing image retrieval. ISPRS J Photogram Remote Sens 145(Part A):197\u2013209. https:\/\/doi.org\/10.1016\/j.isprsjprs.2018.01.004","journal-title":"ISPRS J Photogram Remote Sens"},{"issue":"1","key":"11072_CR142","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1827\/1\/012165","volume":"1827","author":"M Zhu","year":"2021","unstructured":"Zhu M (2021) A brief analysis of GAN variants on image classification and generation. J Phys: Conf Ser 1827(1):012165. https:\/\/doi.org\/10.1088\/1742-6596\/1827\/1\/012165","journal-title":"J Phys: Conf Ser"},{"key":"11072_CR143","doi-asserted-by":"publisher","unstructured":"Zoph B, Vasudevan V, Shlens J, Le QV (2018) Learning transferable architectures for scalable image recognition. In: Computer vision and pattern recognition. IEEE, pp 8697\u20138710. https:\/\/doi.org\/10.1109\/CVPR.2018.00907, https:\/\/ieeexplore.ieee.org\/document\/8579005\/","DOI":"10.1109\/CVPR.2018.00907"},{"issue":"11","key":"11072_CR144","doi-asserted-by":"publisher","first-page":"2321","DOI":"10.1109\/LGRS.2015.2475299","volume":"12","author":"Q Zou","year":"2015","unstructured":"Zou Q, Ni L, Zhang T, Wang Q (2015) Deep learning based feature selection for remote sensing scene classification. IEEE Trans Geosci Remote Sens Lett 12(11):2321\u20132325. https:\/\/doi.org\/10.1109\/LGRS.2015.2475299","journal-title":"IEEE Trans Geosci Remote Sens Lett"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11072-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11063-022-11072-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-022-11072-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,8]],"date-time":"2023-07-08T12:18:20Z","timestamp":1688818700000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11063-022-11072-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,18]]},"references-count":144,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["11072"],"URL":"https:\/\/doi.org\/10.1007\/s11063-022-11072-5","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"value":"1370-4621","type":"print"},{"value":"1573-773X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,18]]},"assertion":[{"value":"16 October 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 November 2022","order":3,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Update","order":4,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This article was retracted on 14 October 2022","order":5,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}