{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T09:55:34Z","timestamp":1766138134584},"reference-count":112,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T00:00:00Z","timestamp":1666051200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T00:00:00Z","timestamp":1666051200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s00530-022-01010-9","type":"journal-article","created":{"date-parts":[[2022,10,18]],"date-time":"2022-10-18T16:04:36Z","timestamp":1666109076000},"page":"669-691","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Semantic embedding: scene image classification using scene-specific objects"],"prefix":"10.1007","volume":"29","author":[{"given":"Mohammad Javad","family":"Parseh","sequence":"first","affiliation":[]},{"given":"Mohammad","family":"Rahmanimanesh","sequence":"additional","affiliation":[]},{"given":"Parviz","family":"Keshavarzi","sequence":"additional","affiliation":[]},{"given":"Zohreh","family":"Azimifar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,18]]},"reference":[{"key":"1010_CR1","doi-asserted-by":"crossref","unstructured":"Zeng, H., Chen, G.: Scene recognition with comprehensive regions graph modeling. In: International Conference on Image and Graphics, Springer, pp. 630\u2013641 (2019)","DOI":"10.1007\/978-3-030-34113-8_52"},{"key":"1010_CR2","doi-asserted-by":"crossref","unstructured":"L\u00f3pez-Garc\u00eda, F., Garc\u00eda-D\u00edaz, A., Fdez-Vidal, X. R., Pardo, X. M., Dosil, R., Luna, D.: Improving scene recognition through visual attention. In: Iberian Conference on Pattern Recognition and Image Analysis, Springer, pp. 16\u201323 (2009)","DOI":"10.1007\/978-3-642-02172-5_4"},{"key":"1010_CR3","doi-asserted-by":"crossref","unstructured":"Kwitt, R., Vasconcelos, N., Rasiwasia, N.: Scene recognition on the semantic manifold. In: European Conference on Computer Vision, Springer, pp. 359\u2013372 (2012)","DOI":"10.1007\/978-3-642-33765-9_26"},{"key":"1010_CR4","doi-asserted-by":"publisher","first-page":"440","DOI":"10.1016\/j.procs.2020.03.253","volume":"167","author":"A Basu","year":"2020","unstructured":"Basu, A., Petropoulakis, L., Di Caterina, G., Soraghan, J.: Indoor home scene recognition using capsule neural networks. Proc. Comput. Sci. 167, 440\u2013448 (2020)","journal-title":"Proc. Comput. Sci."},{"key":"1010_CR5","doi-asserted-by":"publisher","first-page":"1005","DOI":"10.1016\/j.procs.2020.03.400","volume":"167","author":"S Masood","year":"2020","unstructured":"Masood, S., Ahsan, U., Munawwar, F., Rizvi, D.R., Ahmed, M.: Scene recognition from image using convolutional neural network. Proc. Comput. Sci. 167, 1005\u20131012 (2020)","journal-title":"Proc. Comput. Sci."},{"issue":"3","key":"1010_CR6","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1023\/A:1011139631724","volume":"42","author":"A Oliva","year":"2001","unstructured":"Oliva, A., Torralba, A.: Modeling the shape of the scene: A holistic representation of the spatial envelope. Int. J. Comput. Vis. 42(3), 145\u2013175 (2001)","journal-title":"Int. J. Comput. Vis."},{"key":"1010_CR7","doi-asserted-by":"crossref","unstructured":"Vogel, J., Schiele, B.: A semantic typicality measure for natural scene categorization. In: Joint Pattern Recognition Symposium, Springer, pp. 195\u2013203 (2004)","DOI":"10.1007\/978-3-540-28649-3_24"},{"issue":"10","key":"1010_CR8","doi-asserted-by":"publisher","first-page":"1533","DOI":"10.1016\/j.patcog.2004.12.014","volume":"38","author":"A Payne","year":"2005","unstructured":"Payne, A., Singh, S.: Indoor vs. outdoor scene classification in digital photographs. Pattern Recogn. 38(10), 1533\u20131545 (2005)","journal-title":"Pattern Recogn."},{"issue":"8","key":"1010_CR9","first-page":"1489","volume":"33","author":"J Wu","year":"2010","unstructured":"Wu, J., Rehg, J.M.: Centrist: a visual descriptor for scene categorization. IEEE Trans. Pattern Anal. Mach. Intell. 33(8), 1489\u20131501 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"1010_CR10","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1016\/j.patcog.2011.06.012","volume":"45","author":"X Meng","year":"2012","unstructured":"Meng, X., Wang, Z., Wu, L.: Building global image features for scene recognition. Pattern Recogn. 45(1), 373\u2013380 (2012)","journal-title":"Pattern Recogn."},{"issue":"2","key":"1010_CR11","doi-asserted-by":"publisher","first-page":"823","DOI":"10.1109\/TIP.2013.2295756","volume":"23","author":"Y Xiao","year":"2013","unstructured":"Xiao, Y., Wu, J., Yuan, J.: mCENTRIST: a multi-channel feature generation mechanism for scene categorization. IEEE Trans. Image Process. 23(2), 823\u2013836 (2013)","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"1010_CR12","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1016\/0031-3203(95)00067-4","volume":"29","author":"T Ojala","year":"1996","unstructured":"Ojala, T., Pietik\u00e4inen, M., Harwood, D.: A comparative study of texture measures with classification based on featured distributions. Pattern Recogn. 29(1), 51\u201359 (1996)","journal-title":"Pattern Recogn."},{"issue":"2","key":"1010_CR13","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe, D.G.: Distinctive image features from scale-invariant keypoints. Int. J. Comput. Vis. 60(2), 91\u2013110 (2004)","journal-title":"Int. J. Comput. Vis."},{"key":"1010_CR14","doi-asserted-by":"crossref","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR'05), vol. 1: IEEE, pp. 886\u2013893 (2005)","DOI":"10.1109\/CVPR.2005.177"},{"key":"1010_CR15","doi-asserted-by":"crossref","unstructured":"Bay, H., Tuytelaars, T., Van Gool, L.: Surf: Speeded up robust features. In: European conference on computer vision, Springer, pp. 404\u2013417 (2006)","DOI":"10.1007\/11744023_32"},{"key":"1010_CR16","doi-asserted-by":"crossref","unstructured":"Margolin, R., Zelnik-Manor, L., Tal, A.: Otc: a novel local descriptor for scene classification. In: European Conference on Computer Vision, Springer, pp. 377\u2013391 (2014)","DOI":"10.1007\/978-3-319-10584-0_25"},{"key":"1010_CR17","doi-asserted-by":"crossref","unstructured":"Sivic, J., Zisserman, A.: Video google: a text retrieval approach to object matching in videos. In: Computer Vision, IEEE International Conference on, vol. 3: IEEE Computer Society, pp. 1470\u20131470 (2003)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"1010_CR18","unstructured":"Csurka, G., Dance, C., Fan, L., Willamowski, J., Bray, C.: Visual categorization with bags of keypoints. In: Workshop on statistical learning in computer vision, ECCV, vol. 1, no. 1\u201322: Prague, pp. 1\u20132 (2004)"},{"key":"1010_CR19","doi-asserted-by":"crossref","unstructured":"Fei-Fei, L., Perona, P.: A bayesian hierarchical model for learning natural scene categories. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'05), vol. 2: IEEE, pp. 524\u2013531 (2005)","DOI":"10.1109\/CVPR.2005.16"},{"key":"1010_CR20","doi-asserted-by":"crossref","unstructured":"Grauman, K., Darrell, T.: The pyramid match kernel: Discriminative classification with sets of image features. In: Tenth IEEE International Conference on Computer Vision (ICCV'05) Volume 1, vol. 2: IEEE, pp. 1458\u20131465 (2005)","DOI":"10.1109\/ICCV.2005.239"},{"key":"1010_CR21","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., Ponce, J.: Beyond bags of features: spatial pyramid matching for recognizing natural scene categories. In: 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06), vol. 2: IEEE, pp. 2169\u20132178 (2006)","DOI":"10.1109\/CVPR.2006.68"},{"key":"1010_CR22","doi-asserted-by":"crossref","unstructured":"Van Gemert, J. C., Geusebroek, J.-M., Veenman, C. J., Smeulders, A. W.: Kernel codebooks for scene categorization. In: European conference on computer vision, Springer, pp. 696\u2013709 (2008)","DOI":"10.1007\/978-3-540-88690-7_52"},{"key":"1010_CR23","doi-asserted-by":"crossref","unstructured":"Wu, J., Rehg, J. M.: Beyond the euclidean distance: Creating effective visual codebooks using the histogram intersection kernel. In: 2009 IEEE 12th International Conference on Computer Vision, IEEE, pp. 630\u2013637 (2009)","DOI":"10.1109\/ICCV.2009.5459178"},{"key":"1010_CR24","unstructured":"Yang, J., Yu, K., Gong, Y., Huang, T.: Linear spatial pyramid matching using sparse coding for image classification. In: 2009 IEEE Conference on computer vision and pattern recognition, IEEE, pp. 1794\u20131801 (2009)"},{"key":"1010_CR25","doi-asserted-by":"crossref","unstructured":"Gao, S., Tsang, I. W.-H., Chia, L.-T., Zhao, P.: Local features are not lonely\u2013Laplacian sparse coding for image classification. In: 2010 IEEE computer society conference on computer vision and pattern recognition, IEEE, pp. 3555\u20133561 (2010)","DOI":"10.1109\/CVPR.2010.5539943"},{"key":"1010_CR26","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, J., Yu, K., Lv, F., Huang, T., Gong, Y.: Locality-constrained linear coding for image classification. In: 2010 IEEE computer society conference on computer vision and pattern recognition, IEEE, pp. 3360\u20133367 (2010)","DOI":"10.1109\/CVPR.2010.5540018"},{"key":"1010_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, C., Liu, J., Tian, Q., Xu, C., Lu, H., Ma, S.: Image classification by non-negative sparse coding, low-rank and sparse decomposition. In: CVPR 2011, IEEE, pp. 1673\u20131680 (2011)","DOI":"10.1109\/CVPR.2011.5995484"},{"issue":"7","key":"1010_CR28","doi-asserted-by":"publisher","first-page":"3372","DOI":"10.1109\/TIP.2016.2567076","volume":"25","author":"SH Khan","year":"2016","unstructured":"Khan, S.H., Hayat, M., Bennamoun, M., Togneri, R., Sohel, F.A.: A discriminative representation of convolutional features for indoor scene recognition. IEEE Trans. Image Process. 25(7), 3372\u20133383 (2016)","journal-title":"IEEE Trans. Image Process."},{"issue":"5","key":"1010_CR29","doi-asserted-by":"publisher","first-page":"1874","DOI":"10.1016\/j.patcog.2009.11.009","volume":"43","author":"J Qin","year":"2010","unstructured":"Qin, J., Yung, N.H.: Scene categorization via contextual visual words. Pattern Recogn. 43(5), 1874\u20131888 (2010)","journal-title":"Pattern Recogn."},{"issue":"4","key":"1010_CR30","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1016\/j.patcog.2011.09.020","volume":"45","author":"NM Elfiky","year":"2012","unstructured":"Elfiky, N.M., Khan, F.S., Van De Weijer, J., Gonzalez, J.: Discriminative compact pyramids for object and scene recognition. Pattern Recogn. 45(4), 1627\u20131636 (2012)","journal-title":"Pattern Recogn."},{"issue":"1","key":"1010_CR31","doi-asserted-by":"publisher","first-page":"424","DOI":"10.1016\/j.patcog.2012.07.017","volume":"46","author":"L Zhou","year":"2013","unstructured":"Zhou, L., Zhou, Z., Hu, D.: Scene classification using a multi-resolution bag-of-features model. Pattern Recogn. 46(1), 424\u2013433 (2013)","journal-title":"Pattern Recogn."},{"key":"1010_CR32","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1016\/j.patcog.2018.06.017","volume":"84","author":"M Clement","year":"2018","unstructured":"Clement, M., Kurtz, C., Wendling, L.: Learning spatial relations and shapes for structural object description and scene recognition. Pattern Recogn. 84, 197\u2013210 (2018)","journal-title":"Pattern Recogn."},{"key":"1010_CR33","doi-asserted-by":"crossref","unstructured":"Sydorov, V., Sakurada, M., Lampert, C. H.: Deep fisher kernels-end to end learning of the fisher kernel gmm parameters. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1402\u20131409 (2014)","DOI":"10.1109\/CVPR.2014.182"},{"issue":"10","key":"1010_CR34","doi-asserted-by":"publisher","first-page":"2222","DOI":"10.1109\/TNNLS.2014.2359471","volume":"26","author":"Y Yuan","year":"2015","unstructured":"Yuan, Y., Mou, L., Lu, X.: Scene recognition by manifold regularized deep learning architecture. IEEE Trans. Neural Netw. Learn. Syst. 26(10), 2222\u20132233 (2015)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"issue":"12","key":"1010_CR35","doi-asserted-by":"publisher","first-page":"2212","DOI":"10.1109\/TNNLS.2014.2307532","volume":"25","author":"H Goh","year":"2014","unstructured":"Goh, H., Thome, N., Cord, M., Lim, J.-H.: Learning deep hierarchical visual feature coding. IEEE Trans. Neural Netw. Learn. Syst. 25(12), 2212\u20132225 (2014)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"1010_CR36","doi-asserted-by":"crossref","unstructured":"Xie, G.-S., Zhang, X.-Y., Liu, C.-L.: Efficient feature coding based on auto-encoder network for image classification. In: Asian Conference on Computer Vision, Springer, pp. 628\u2013642 (2014).","DOI":"10.1007\/978-3-319-16865-4_41"},{"key":"1010_CR37","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1016\/j.patcog.2018.04.025","volume":"82","author":"L Xie","year":"2018","unstructured":"Xie, L., et al.: Improved spatial pyramid matching for scene recognition. Pattern Recogn. 82, 118\u2013129 (2018)","journal-title":"Pattern Recogn."},{"issue":"7","key":"1010_CR38","doi-asserted-by":"publisher","first-page":"1271","DOI":"10.1109\/TPAMI.2009.132","volume":"32","author":"JC Van Gemert","year":"2009","unstructured":"Van Gemert, J.C., Veenman, C.J., Smeulders, A.W., Geusebroek, J.-M.: Visual word ambiguity. IEEE Trans. Pattern Anal. Mach. Intell. 32(7), 1271\u20131283 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1010_CR39","doi-asserted-by":"crossref","unstructured":"Boureau, Y.-L., Bach, F., LeCun, Y., Ponce, J.: Learning mid-level features for recognition. In: 2010 IEEE computer society conference on computer vision and pattern recognition, IEEE, pp. 2559\u20132566 (2010)","DOI":"10.1109\/CVPR.2010.5539963"},{"issue":"8","key":"1010_CR40","doi-asserted-by":"publisher","first-page":"702","DOI":"10.1016\/j.patrec.2009.12.010","volume":"31","author":"X Zhou","year":"2010","unstructured":"Zhou, X., Zhuang, X., Tang, H., Hasegawa-Johnson, M., Huang, T.S.: Novel Gaussianized vector representation for improved natural scene categorization. Pattern Recogn. Lett. 31(8), 702\u2013708 (2010)","journal-title":"Pattern Recogn. Lett."},{"key":"1010_CR41","doi-asserted-by":"crossref","unstructured":"Harada, T., Ushiku, Y., Yamashita, Y., Kuniyoshi, Y.: Discriminative spatial pyramid. In: CVPR 2011, IEEE, pp. 1617\u20131624 (2011).","DOI":"10.1109\/CVPR.2011.5995691"},{"key":"1010_CR42","doi-asserted-by":"crossref","unstructured":"Xie, L., Wang, J., Guo, B., Zhang, B., Tian, Q.: Orientational pyramid matching for recognizing indoor scenes. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3734\u20133741 (2014)","DOI":"10.1109\/CVPR.2014.477"},{"key":"1010_CR43","unstructured":"Jaakkola, T. S., Haussler, D.: Exploiting generative models in discriminative classifiers. Adv. Neural Inform. Process. Syst. pp. 487\u2013493 (1999)"},{"issue":"3","key":"1010_CR44","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/s11263-013-0636-x","volume":"105","author":"J S\u00e1nchez","year":"2013","unstructured":"S\u00e1nchez, J., Perronnin, F., Mensink, T., Verbeek, J.: Image classification with the fisher vector: theory and practice. Int. J. Comput. Vis. 105(3), 222\u2013245 (2013)","journal-title":"Int. J. Comput. Vis."},{"issue":"9","key":"1010_CR45","doi-asserted-by":"publisher","first-page":"1704","DOI":"10.1109\/TPAMI.2011.235","volume":"34","author":"H J\u00e9gou","year":"2011","unstructured":"J\u00e9gou, H., Perronnin, F., Douze, M., S\u00e1nchez, J., P\u00e9rez, P., Schmid, C.: Aggregating local image descriptors into compact codes. IEEE Trans. Pattern Anal. Mach. Intell. 34(9), 1704\u20131716 (2011)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1010_CR46","unstructured":"Cimpoi, M., Maji, S., Vedaldi, A.: Deep filter banks for texture recognition and segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3828\u20133836."},{"key":"1010_CR47","doi-asserted-by":"crossref","unstructured":"Gong, Y., Wang, L., Guo, R., Lazebnik, S.: Multi-scale orderless pooling of deep convolutional activation features. In: European conference on computer vision, Springer, pp. 392\u2013407 (2014)","DOI":"10.1007\/978-3-319-10584-0_26"},{"key":"1010_CR48","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Yuan, J., Yu, G.: Randomized spatial partition for scene recognition. In: European conference on computer vision, Springer, pp. 730\u2013743 (2012)","DOI":"10.1007\/978-3-642-33709-3_52"},{"issue":"8","key":"1010_CR49","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1109\/LSP.2016.2641020","volume":"24","author":"C Weng","year":"2016","unstructured":"Weng, C., Wang, H., Yuan, J., Jiang, X.: Discovering class-specific spatial layouts for scene recognition. IEEE Signal Process. Lett. 24(8), 1143\u20131147 (2016)","journal-title":"IEEE Signal Process. Lett."},{"issue":"9","key":"1010_CR50","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 37(9), 1904\u20131916 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1010_CR51","doi-asserted-by":"crossref","unstructured":"Yang, M., Li, B., Fan, H., Jiang, Y.: Randomized spatial pooling in deep convolutional networks for scene recognition. In: 2015 IEEE International Conference on Image Processing (ICIP), IEEE, pp. 402\u2013406 (2015).","DOI":"10.1109\/ICIP.2015.7350829"},{"issue":"10","key":"1010_CR52","doi-asserted-by":"publisher","first-page":"4829","DOI":"10.1109\/TIP.2016.2599292","volume":"25","author":"M Hayat","year":"2016","unstructured":"Hayat, M., Khan, S.H., Bennamoun, M., An, S.: A spatial layout and scale invariant feature representation for indoor scene classification. IEEE Trans. Image Process. 25(10), 4829\u20134841 (2016)","journal-title":"IEEE Trans. Image Process."},{"key":"1010_CR53","unstructured":"Li, L.-J., Su, H., Li, F.-F., Xing, E. P.: Object bank: a high-level image representation for scene classification and semantic feature sparsification (2010)."},{"key":"1010_CR54","doi-asserted-by":"crossref","unstructured":"Pandey, M., Lazebnik, S.: Scene recognition and weakly supervised object localization with deformable part-based models. In: 2011 International Conference on Computer Vision, IEEE, pp. 1307\u20131314 (2011).","DOI":"10.1109\/ICCV.2011.6126383"},{"key":"1010_CR55","doi-asserted-by":"crossref","unstructured":"Singh, S., Gupta, A., Efros, A. A.: Unsupervised discovery of mid-level discriminative patches. In: European Conference on Computer Vision, Springer, pp. 73\u201386 (2012)","DOI":"10.1007\/978-3-642-33709-3_6"},{"key":"1010_CR56","doi-asserted-by":"crossref","unstructured":"Juneja, M., Vedaldi, A., Jawahar, C., Zisserman, A.: Blocks that shout: Distinctive parts for scene classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 923\u2013930 (2013)","DOI":"10.1109\/CVPR.2013.124"},{"key":"1010_CR57","doi-asserted-by":"publisher","first-page":"159","DOI":"10.1016\/j.patcog.2016.03.020","volume":"56","author":"Y Yuan","year":"2016","unstructured":"Yuan, Y., Wan, J., Wang, Q.: Congested scene classification via efficient unsupervised feature learning and density estimation. Pattern Recogn. 56, 159\u2013169 (2016)","journal-title":"Pattern Recogn."},{"key":"1010_CR58","doi-asserted-by":"crossref","unstructured":"Lin, D., Lu, C., Liao, R., Jia, J.: Learning important spatial pooling regions for scene classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3726\u20133733 (2014)","DOI":"10.1109\/CVPR.2014.476"},{"key":"1010_CR59","doi-asserted-by":"crossref","unstructured":"Zuo, Z., Wang, G., Shuai, B., Zhao, L., Yang, Q., Jiang, X.: Learning discriminative and shareable features for scene classification. In: European Conference on Computer Vision. Springer, pp. 552\u2013568 (2014).","DOI":"10.1007\/978-3-319-10590-1_36"},{"key":"1010_CR60","doi-asserted-by":"publisher","first-page":"45230","DOI":"10.1109\/ACCESS.2019.2908448","volume":"7","author":"J Shi","year":"2019","unstructured":"Shi, J., Zhu, H., Yu, S., Wu, W., Shi, H.: Scene categorization model using deep visually sensitive features. IEEE Access 7, 45230\u201345239 (2019)","journal-title":"IEEE Access"},{"key":"1010_CR61","doi-asserted-by":"crossref","unstructured":"Cao, L., Fei-Fei, L.: Spatially coherent latent topic model for concurrent segmentation and classification of objects and scenes. In: 2007 IEEE 11th International Conference on Computer Vision, IEEE, pp. 1\u20138 (2007)","DOI":"10.1109\/ICCV.2007.4408965"},{"key":"1010_CR62","doi-asserted-by":"crossref","unstructured":"Niu, Z., Hua, G., Gao, X., Tian, Q.: Context aware topic model for scene recognition. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, IEEE, pp. 2743\u20132750 (2012)","DOI":"10.1109\/CVPR.2012.6247997"},{"key":"1010_CR63","doi-asserted-by":"crossref","unstructured":"Parizi, S. N., Oberlin, J. G., Felzenszwalb, P. F.: Reconfigurable models for scene recognition. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition.IEEE, pp. 2775\u20132782 (2012)","DOI":"10.1109\/CVPR.2012.6248001"},{"issue":"6","key":"1010_CR64","doi-asserted-by":"publisher","first-page":"2721","DOI":"10.1109\/TIP.2017.2686017","volume":"26","author":"X Song","year":"2017","unstructured":"Song, X., Jiang, S., Herranz, L.: Multi-scale multi-feature context modeling for scene recognition in the semantic manifold. IEEE Trans. Image Process. 26(6), 2721\u20132735 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"1010_CR65","doi-asserted-by":"crossref","unstructured":"Wu, R., Wang, B., Wang, W., Yu, Y.: Harvesting discriminative meta objects with deep CNN features for scene classification. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1287\u20131295 (2015)","DOI":"10.1109\/ICCV.2015.152"},{"key":"1010_CR66","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1016\/j.patcog.2016.01.019","volume":"59","author":"X Song","year":"2016","unstructured":"Song, X., Jiang, S., Herranz, L., Kong, Y., Zheng, K.: Category co-occurrence modeling for large scale scene recognition. Pattern Recogn. 59, 98\u2013111 (2016)","journal-title":"Pattern Recogn."},{"key":"1010_CR67","doi-asserted-by":"publisher","first-page":"474","DOI":"10.1016\/j.patcog.2017.09.025","volume":"74","author":"X Cheng","year":"2018","unstructured":"Cheng, X., Lu, J., Feng, J., Yuan, B., Zhou, J.: Scene recognition with objectness. Pattern Recogn. 74, 474\u2013487 (2018)","journal-title":"Pattern Recogn."},{"key":"1010_CR68","doi-asserted-by":"crossref","unstructured":"Yang, S., Ramanan, D.: Multi-scale recognition with DAG-CNNs. In: Proceedings of the IEEE international conference on computer vision, pp. 1215\u20131223 (2015)","DOI":"10.1109\/ICCV.2015.144"},{"key":"1010_CR69","doi-asserted-by":"publisher","first-page":"188","DOI":"10.1016\/j.neucom.2016.11.023","volume":"225","author":"P Tang","year":"2017","unstructured":"Tang, P., Wang, H., Kwong, S.: G-MS2F: GoogLeNet based multi-stage feature fusion of deep CNN for scene recognition. Neurocomputing 225, 188\u2013197 (2017)","journal-title":"Neurocomputing"},{"issue":"6","key":"1010_CR70","doi-asserted-by":"publisher","first-page":"1263","DOI":"10.1109\/TCSVT.2015.2511543","volume":"27","author":"G-S Xie","year":"2015","unstructured":"Xie, G.-S., Zhang, X.-Y., Yan, S., Liu, C.-L.: Hybrid CNN and dictionary-based models for scene recognition and domain adaptation. IEEE Transa. Circ. Syst. Video Technol. 27(6), 1263\u20131274 (2015)","journal-title":"IEEE Transa. Circ. Syst. Video Technol."},{"issue":"2","key":"1010_CR71","doi-asserted-by":"publisher","first-page":"808","DOI":"10.1109\/TIP.2016.2629443","volume":"26","author":"S Guo","year":"2016","unstructured":"Guo, S., Huang, W., Wang, L., Qiao, Y.: Locally supervised deep hybrid model for scene recognition. IEEE Trans. Image Process. 26(2), 808\u2013820 (2016)","journal-title":"IEEE Trans. Image Process."},{"key":"1010_CR72","doi-asserted-by":"crossref","unstructured":"Liu, Y., Chen, Q., Chen, W., Wassell, I.: Dictionary learning inspired deep network for scene recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32, no. 1 (2018)","DOI":"10.1609\/aaai.v32i1.12312"},{"issue":"4","key":"1010_CR73","doi-asserted-by":"publisher","first-page":"2028","DOI":"10.1109\/TIP.2017.2666739","volume":"26","author":"Z Wang","year":"2017","unstructured":"Wang, Z., Wang, L., Wang, Y., Zhang, B., Qiao, Y.: Weakly supervised patchnets: describing and aggregating local patches for scene recognition. IEEE Trans. Image Process. 26(4), 2028\u20132041 (2017)","journal-title":"IEEE Trans. Image Process."},{"issue":"1","key":"1010_CR74","first-page":"1","volume":"15","author":"S Jiang","year":"2019","unstructured":"Jiang, S., Chen, G., Song, X., Liu, L.: Deep patch representations with shared codebook for scene classification. ACM Trans. Multimed. Comput. Commun. Appl. (TOMM) 15(1), 1\u201317 (2019)","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl. (TOMM)"},{"key":"1010_CR75","doi-asserted-by":"publisher","first-page":"82066","DOI":"10.1109\/ACCESS.2020.2989863","volume":"8","author":"H Seong","year":"2020","unstructured":"Seong, H., Hyun, J., Kim, E.: Fosnet: an end-to-end trainable deep neural network for scene recognition. IEEE Access 8, 82066\u201382077 (2020)","journal-title":"IEEE Access"},{"issue":"25","key":"1010_CR76","doi-asserted-by":"publisher","first-page":"18033","DOI":"10.1007\/s11042-019-08264-y","volume":"79","author":"AG Sorkhi","year":"2020","unstructured":"Sorkhi, A.G., Hassanpour, H., Fateh, M.: A comprehensive system for image scene classification. Multimed. Tools Appl. 79(25), 18033\u201318058 (2020)","journal-title":"Multimed. Tools Appl."},{"key":"1010_CR77","doi-asserted-by":"crossref","unstructured":"Speer, R., Chin, J., Havasi, C.: Conceptnet 5.5: an open multilingual graph of general knowledge. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 31, no. 1 (2017)","DOI":"10.1609\/aaai.v31i1.11164"},{"issue":"1","key":"1010_CR78","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. 123(1), 32\u201373 (2017)","journal-title":"Int. J. Comput. Vis."},{"key":"1010_CR79","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., et al.: Microsoft coco: common objects in context. In: European conference on computer vision. Springer, pp. 740\u2013755 (2014).","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1010_CR80","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"1010_CR81","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: towards real-time object detection with region proposal networks. arXiv preprint arXiv:1506.01497 (2015)"},{"key":"1010_CR82","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1010_CR83","doi-asserted-by":"crossref","unstructured":"Yang, X.-S.: Firefly algorithms for multimodal optimization. In: International symposium on stochastic algorithms. Springer, pp. 169\u2013178 (2009)","DOI":"10.1007\/978-3-642-04944-6_14"},{"key":"1010_CR84","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/j.patrec.2018.10.022","volume":"130","author":"J Guo","year":"2020","unstructured":"Guo, J., Yuan, X., Xu, P., Bai, H., Liu, B.: Improved image clustering with deep semantic embedding. Pattern Recogn. Lett. 130, 225\u2013233 (2020)","journal-title":"Pattern Recogn. Lett."},{"key":"1010_CR85","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1016\/j.sigpro.2014.07.027","volume":"112","author":"J Yu","year":"2015","unstructured":"Yu, J., Hong, C., Tao, D., Wang, M.: Semantic embedding for indoor scene recognition by weighted hypergraph learning. Signal Process. 112, 129\u2013136 (2015)","journal-title":"Signal Process."},{"key":"1010_CR86","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.knosys.2018.07.020","volume":"160","author":"F Huang","year":"2018","unstructured":"Huang, F., Zhang, X., Li, Z., Zhao, Z., He, Y.: From content to links: social image embedding with deep multimodal model. Knowl.-Based Syst. 160, 251\u2013264 (2018)","journal-title":"Knowl.-Based Syst."},{"key":"1010_CR87","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1016\/j.eswa.2016.10.038","volume":"71","author":"S Bai","year":"2017","unstructured":"Bai, S.: Growing random forest on deep convolutional neural networks for scene categorization. Expert Syst. Appl. 71, 279\u2013287 (2017)","journal-title":"Expert Syst. Appl."},{"issue":"09","key":"1010_CR88","doi-asserted-by":"publisher","first-page":"1755013","DOI":"10.1142\/S0218001417550138","volume":"31","author":"S Bai","year":"2017","unstructured":"Bai, S.: Scene categorization through using objects represented by deep features. Int. J. Pattern Recogn. Artif. Intell. 31(09), 1755013 (2017)","journal-title":"Int. J. Pattern Recogn. Artif. Intell."},{"issue":"10","key":"1010_CR89","doi-asserted-by":"publisher","first-page":"3004","DOI":"10.1016\/j.patcog.2015.02.003","volume":"48","author":"Z Zuo","year":"2015","unstructured":"Zuo, Z., Wang, G., Shuai, B., Zhao, L., Yang, Q.: Exemplar based deep discriminative and shareable feature learning for scene image classification. Pattern Recogn. 48(10), 3004\u20133015 (2015)","journal-title":"Pattern Recogn."},{"key":"1010_CR90","doi-asserted-by":"crossref","unstructured":"Li, L.-J., Fei-Fei, L.: What, where and who? Classifying events by scene and object recognition. In: 2007 IEEE 11th international conference on computer vision. IEEE, pp. 1\u20138 (2007).","DOI":"10.1109\/ICCV.2007.4408872"},{"key":"1010_CR91","unstructured":"Baldassano, C.: Visual Scene Perception in the Human Brain: Connections to Memory, Categorization, and Social Cognition. Stanford University (2015)."},{"key":"1010_CR92","unstructured":"Wei, P., Wang, B.: Food image classification and image retrieval based on visual features and machine learning. Multimed. Syst. pp. 1\u201312 (2020)"},{"key":"1010_CR93","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1016\/j.patcog.2017.10.039","volume":"76","author":"B Chen","year":"2018","unstructured":"Chen, B., Li, J., Wei, G., Ma, B.: A novel localized and second order feature coding network for image recognition. Pattern Recogn. 76, 339\u2013348 (2018)","journal-title":"Pattern Recogn."},{"key":"1010_CR94","unstructured":"Gamage, BMSV: An embarrassingly simple comparison of machine learning algorithms for indoor scene classification. arXiv preprint arXiv:2109.12261 (2021)"},{"key":"1010_CR95","doi-asserted-by":"crossref","unstructured":"Qiu, J., Yang, Y., Wang, X., Tao, D.: Scene essence. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 8322\u20138333 (2021).","DOI":"10.1109\/CVPR46437.2021.00822"},{"key":"1010_CR96","doi-asserted-by":"crossref","unstructured":"Herranz, L., Jiang, S., Li, X.: Scene recognition with cnns: objects, scales and dataset bias. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 571\u2013579 (2016).","DOI":"10.1109\/CVPR.2016.68"},{"issue":"1","key":"1010_CR97","doi-asserted-by":"publisher","first-page":"82","DOI":"10.1007\/s00530-005-0202-7","volume":"11","author":"M Boutell","year":"2005","unstructured":"Boutell, M., Luo, J., Brown, C.: A generalized temporal context model for classifying image collections. Multimed. Syst. 11(1), 82\u201392 (2005)","journal-title":"Multimed. Syst."},{"key":"1010_CR98","doi-asserted-by":"crossref","unstructured":"Li, Y., Dixit, M., Vasconcelos, N.: Deep scene image classification with the MFAFVNet. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5746\u20135754 (2017)","DOI":"10.1109\/ICCV.2017.613"},{"key":"1010_CR99","doi-asserted-by":"publisher","first-page":"5877","DOI":"10.1109\/TIP.2020.2986599","volume":"29","author":"G Chen","year":"2020","unstructured":"Chen, G., Song, X., Zeng, H., Jiang, S.: Scene recognition with prototype-agnostic scene layout. IEEE Trans. Image Process. 29, 5877\u20135888 (2020)","journal-title":"IEEE Trans. Image Process."},{"issue":"5","key":"1010_CR100","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1007\/s00371-018-1503-0","volume":"35","author":"L Kabbai","year":"2019","unstructured":"Kabbai, L., Abdellaoui, M., Douik, A.: Image classification by combining local and global features. Vis. Comput. 35(5), 679\u2013693 (2019)","journal-title":"Vis. Comput."},{"key":"1010_CR101","first-page":"5638","volume":"101","author":"SH Khan","year":"2017","unstructured":"Khan, S.H., Hayat, M., Porikli, F.: Scene categorization with spectral features. Proc. IEEE Int. Conf. Comput. Vis. 101, 5638\u20135648 (2017)","journal-title":"Proc. IEEE Int. Conf. Comput. Vis."},{"issue":"10","key":"1010_CR102","doi-asserted-by":"publisher","first-page":"1227","DOI":"10.1007\/s00371-016-1348-3","volume":"33","author":"M Liu","year":"2017","unstructured":"Liu, M., Guo, Y., Wang, J.: Indoor scene modeling from a single image using normal inference and edge features. Vis. Comput. 33(10), 1227\u20131240 (2017)","journal-title":"Vis. Comput."},{"key":"1010_CR103","doi-asserted-by":"crossref","unstructured":"Liu L., et al.: Compositional model based fisher vector coding for image classification. IEEE Trans Pattern Anal Mach Intell. 39(12), 2335\u20132348 (2017).","DOI":"10.1109\/TPAMI.2017.2651061"},{"key":"1010_CR104","unstructured":"Dixit, M. D., Vasconcelos, N.: Object based scene representations using fisher scores of local subspace projections. Adv. Neural Inform. Process. Syst. 29, 2811\u20132819 (2016)."},{"key":"1010_CR105","doi-asserted-by":"crossref","unstructured":"Quattoni A., Torralba A.: Recognizing indoor scenes. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. IEEE, pp. 413\u2013420 (2009)","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"1010_CR106","first-page":"106","volume":"23","author":"L-J Li","year":"2010","unstructured":"Li, L.-J., Su, H., Fei-Fei, L., Xing, E.: Object bank: a high-level image representation for scene classification and semantic feature sparsification. Adv. Neural. Inf. Process. Syst. 23, 106 (2010)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"1010_CR107","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/s11263-013-0660-x","volume":"107","author":"L-J Li","year":"2014","unstructured":"Li, L.-J., Su, H., Lim, Y., Fei-Fei, L.: Object bank: an object-level image representation for high-level visual recognition. Int. J. Comput. Vis. 107(1), 20\u201339 (2014)","journal-title":"Int. J. Comput. Vis."},{"key":"1010_CR108","doi-asserted-by":"crossref","unstructured":"Zhang, L., Xie, S., Zhen, X.: Towards optimal object bank for scene classification. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, pp. 1967\u20131970 (2013).","DOI":"10.1109\/ICASSP.2013.6637997"},{"issue":"8","key":"1010_CR109","doi-asserted-by":"publisher","first-page":"3241","DOI":"10.1109\/TIP.2014.2328894","volume":"23","author":"L Zhang","year":"2014","unstructured":"Zhang, L., Zhen, X., Shao, L.: Learning object-to-class kernels for scene classification. IEEE Trans. Image Process. 23(8), 3241\u20133253 (2014)","journal-title":"IEEE Trans. Image Process."},{"key":"1010_CR110","doi-asserted-by":"crossref","unstructured":"Pennington J., Socher, R., Manning, C. D.: Glove: global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), 1532\u20131543 (2014).","DOI":"10.3115\/v1\/D14-1162"},{"issue":"1","key":"1010_CR111","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/j.1467-9922.2010.00616.x","volume":"61","author":"R Edwards","year":"2011","unstructured":"Edwards, R., Collins, L.: Lexical frequency profiles and Zipf\u2019s law. Lang. Learn. 61(1), 1\u201330 (2011)","journal-title":"Lang. Learn."},{"issue":"6","key":"1010_CR112","doi-asserted-by":"publisher","first-page":"1056","DOI":"10.1109\/TPAMI.2015.2476802","volume":"38","author":"Y Zheng","year":"2015","unstructured":"Zheng, Y., Zhang, Y.-J., Larochelle, H.: A deep and autoregressive approach for topic modeling of multimodal data. IEEE Trans. Pattern Anal. Mach. Intell. 38(6), 1056\u20131069 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-022-01010-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-022-01010-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-022-01010-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T01:06:42Z","timestamp":1728176802000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-022-01010-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,18]]},"references-count":112,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["1010"],"URL":"https:\/\/doi.org\/10.1007\/s00530-022-01010-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"type":"print","value":"0942-4962"},{"type":"electronic","value":"1432-1882"}],"subject":[],"published":{"date-parts":[[2022,10,18]]},"assertion":[{"value":"14 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 September 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 October 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}