{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T20:47:06Z","timestamp":1775594826532,"version":"3.50.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s00530-023-01141-7","type":"journal-article","created":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T21:01:26Z","timestamp":1690923686000},"page":"2483-2494","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Image aesthetics assessment using composite features from transformer and CNN"],"prefix":"10.1007","volume":"29","author":[{"given":"Yongzhen","family":"Ke","sequence":"first","affiliation":[]},{"given":"Yin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Kai","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Fan","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Shuai","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,1]]},"reference":[{"key":"1141_CR1","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Commun. ACM 60, 84\u201390 (2017). https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun. ACM"},{"key":"1141_CR2","doi-asserted-by":"publisher","unstructured":"She, D., Lai, Y.-K., Yi, G., Xu, K.: Hierarchical layout-aware graph convolutional network for unified aesthetics assessment. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 8471\u20138480 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.00837","DOI":"10.1109\/CVPR46437.2021.00837"},{"key":"1141_CR3","unstructured":"Vaswani, A., Shazeer, N.M., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I.: Attention is all you need. In Presented at the NIPS June 12 (2017)"},{"key":"1141_CR4","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv. (2020)"},{"key":"1141_CR5","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. arXiv (2020)"},{"key":"1141_CR6","doi-asserted-by":"publisher","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: transformer for semantic segmentation. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 7242\u20137252 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00717","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"1141_CR7","doi-asserted-by":"publisher","first-page":"2274","DOI":"10.1109\/TPAMI.2012.120","volume":"34","author":"R Achanta","year":"2012","unstructured":"Achanta, R., Shaji, A., Smith, K., Lucchi, A., Fua, P., S\u00fcsstrunk, S.: SLIC superpixels compared to state-of-the-art superpixel methods. IEEE Trans. Pattern Anal. Mach. Intell. 34, 2274\u20132282 (2012). https:\/\/doi.org\/10.1109\/TPAMI.2012.120","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1141_CR8","doi-asserted-by":"publisher","unstructured":"Lu, X., Lin, Z., Jin, H., Yang, J., Wang, J.Z.: RAPID: rating pictorial aesthetics using deep learning. In: Proceedings of the 22nd ACM International Conference on Multimedia. pp. 457\u2013466 (2014). https:\/\/doi.org\/10.1145\/2647868.2654927","DOI":"10.1145\/2647868.2654927"},{"key":"1141_CR9","doi-asserted-by":"publisher","first-page":"3998","DOI":"10.1109\/TIP.2018.2831899","volume":"27","author":"H Talebi","year":"2018","unstructured":"Talebi, H., Milanfar, P.: NIMA: neural image assessment. IEEE Trans. Image Process. 27, 3998\u20134011 (2018). https:\/\/doi.org\/10.1109\/TIP.2018.2831899","journal-title":"IEEE Trans. Image Process."},{"key":"1141_CR10","unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv (2017)"},{"key":"1141_CR11","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. CoRR. (2014)"},{"key":"1141_CR12","doi-asserted-by":"publisher","unstructured":"Szegedy, C., Wei Liu, Yangqing Jia, Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., Rabinovich, A.: Going deeper with convolutions. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 1\u20139 (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298594","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1141_CR13","doi-asserted-by":"publisher","unstructured":"Wu, O., Hu, W., Gao, J: Learning to predict the perceived visual quality of photos. In: 2011 International Conference on Computer Vision. pp. 225\u2013232 (2011). https:\/\/doi.org\/10.1109\/ICCV.2011.6126246","DOI":"10.1109\/ICCV.2011.6126246"},{"key":"1141_CR14","doi-asserted-by":"publisher","unstructured":"Kong, S., Shen, X., Lin, Z., Mech, R., Fowlkes, C.: Photo aesthetics ranking network with attributes and content adaptation. Vol. 9905, pp. 662\u2013679 (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_40","DOI":"10.1007\/978-3-319-46448-0_40"},{"key":"1141_CR15","doi-asserted-by":"publisher","first-page":"247","DOI":"10.1016\/j.neucom.2018.06.099","volume":"395","author":"F Gao","year":"2020","unstructured":"Gao, F., Li, Z., Yu, J., Yu, J., Huang, Q., Tian, Q.: Style-adaptive photo aesthetic rating via convolutional neural networks and multi-task learning. Neurocomputing 395, 247\u2013254 (2020). https:\/\/doi.org\/10.1016\/j.neucom.2018.06.099","journal-title":"Neurocomputing"},{"key":"1141_CR16","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 770\u2013778 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"1141_CR17","doi-asserted-by":"publisher","unstructured":"Murray, N., Marchesotti, L., Perronnin, F.: AVA: a large-scale database for aesthetic visual analysis. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition. pp. 2408\u20132415 (2012). https:\/\/doi.org\/10.1109\/CVPR.2012.6247954","DOI":"10.1109\/CVPR.2012.6247954"},{"key":"1141_CR18","doi-asserted-by":"publisher","unstructured":"Yang, Y., Xu, L., Li, L., Qie, N., Li, Y., Zhang, P., Guo, Y.: Personalized image aesthetics assessment with rich attributes. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 19829\u201319837 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01924","DOI":"10.1109\/CVPR52688.2022.01924"},{"key":"1141_CR19","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1109\/TMM.2021.3123468","volume":"25","author":"H Zhu","year":"2023","unstructured":"Zhu, H., Zhou, Y., Li, L., Li, Y., Guo, Y.: Learning personalized image aesthetics from subjective and objective attributes. IEEE Trans. Multimedia 25, 179\u2013190 (2023). https:\/\/doi.org\/10.1109\/TMM.2021.3123468","journal-title":"IEEE Trans. Multimedia"},{"key":"1141_CR20","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TCYB.2020.2984670","volume":"52","author":"H Zhu","year":"2022","unstructured":"Zhu, H., Li, L., Wu, J., Zhao, S., Ding, G., Shi, G.: Personalized image aesthetics assessment via meta-learning with bilevel gradient optimization. IEEE Trans. Cybern. 52, 1798\u20131811 (2022). https:\/\/doi.org\/10.1109\/TCYB.2020.2984670","journal-title":"IEEE Trans. Cybern."},{"key":"1141_CR21","doi-asserted-by":"publisher","unstructured":"Liu, D., Puri, R., Kamath, N., Bhattacharya, S.: Composition-aware image aesthetics assessment. In: 2020 IEEE Winter Conference on Applications of Computer Vision (WACV). pp. 3558\u20133567 (2020). https:\/\/doi.org\/10.1109\/WACV45572.2020.9093412","DOI":"10.1109\/WACV45572.2020.9093412"},{"key":"1141_CR22","doi-asserted-by":"publisher","unstructured":"Peng, Z., Huang, W., Gu, S., Xie, L., Wang, Y., Jiao, J., Ye, Q.: Conformer: local features coupling global representations for visual recognition. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 357\u2013366 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00042","DOI":"10.1109\/ICCV48922.2021.00042"},{"key":"1141_CR23","doi-asserted-by":"publisher","unstructured":"Srinivas, A., Lin, T.-Y., Parmar, N., Shlens, J., Abbeel, P., Vaswani, A.: bottleneck transformers for visual recognition. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 16514\u201316524 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01625","DOI":"10.1109\/CVPR46437.2021.01625"},{"key":"1141_CR24","doi-asserted-by":"publisher","unstructured":"Guo, J., Han, K., Wu, H., Tang, Y., Chen, X., Wang, Y., Xu, C.: CMT: Convolutional neural networks meet vision transformers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 12165\u201312175 (2022). https:\/\/doi.org\/10.1109\/CVPR52688.2022.01186","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"1141_CR25","doi-asserted-by":"publisher","unstructured":"Wu, H., Xiao, B., Codella, N., Liu, M., Dai, X., Yuan, L., Zhang, L.: CvT: introducing convolutions to vision transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 22\u201331 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00009","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"1141_CR26","unstructured":"Li, K., Wang, Y., Gao, P., Song, G., Liu, Y., Li, H., Qiao, Y.: UniFormer: unified transformer for efficient spatiotemporal representation learning. arXiv (2022)"},{"key":"1141_CR27","doi-asserted-by":"publisher","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Kai, L., Li, F.-F.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. pp. 248\u2013255 (2009). https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1141_CR28","doi-asserted-by":"publisher","unstructured":"Achanta, R., Susstrunk, S.: Superpixels and polygons using simple non-iterative clustering. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 4895\u20134904 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.520","DOI":"10.1109\/CVPR.2017.520"},{"key":"1141_CR29","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1007\/s11263-014-0744-2","volume":"111","author":"M Van Den Bergh","year":"2015","unstructured":"Van Den Bergh, M., Boix, X., Roig, G., Van Gool, L.: SEEDS: superpixels extracted via energy-driven sampling. Int. J. Comput. Vis. 111, 298\u2013314 (2015). https:\/\/doi.org\/10.1007\/s11263-014-0744-2","journal-title":"Int. J. Comput. Vis."},{"key":"1141_CR30","doi-asserted-by":"publisher","unstructured":"Yao, J., Boben, M., Fidler, S., Urtasun, R.: Real-time coarse-to-fine topologically preserving segmentation. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 2947\u20132955 (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298913","DOI":"10.1109\/CVPR.2015.7298913"},{"key":"1141_CR31","doi-asserted-by":"publisher","first-page":"3898","DOI":"10.1109\/TIP.2020.2968285","volume":"29","author":"L Li","year":"2020","unstructured":"Li, L., Zhu, H., Zhao, S., Ding, G., Lin, W.: Personality-assisted multi-task learning for generic and personalized image aesthetics assessment. IEEE Trans. Image Process. 29, 3898\u20133910 (2020). https:\/\/doi.org\/10.1109\/TIP.2020.2968285","journal-title":"IEEE Trans. Image Process."},{"key":"1141_CR32","doi-asserted-by":"publisher","unstructured":"Chen, Q., Zhang, W., Zhou, N., Lei, P., Xu, Y., Zheng, Y., Fan, J.: Adaptive fractional dilated convolution network for image aesthetics assessment. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 14102\u201314111 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01412","DOI":"10.1109\/CVPR42600.2020.01412"},{"key":"1141_CR33","doi-asserted-by":"publisher","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 2921\u20132929 (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.319","DOI":"10.1109\/CVPR.2016.319"},{"key":"1141_CR34","doi-asserted-by":"publisher","unstructured":"Ma, S., Liu, J., Chen, C.W.: A-lamp: adaptive layout-aware multi-patch deep convolutional neural network for photo aesthetic assessment. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). pp. 722\u2013731 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.84","DOI":"10.1109\/CVPR.2017.84"},{"key":"1141_CR35","doi-asserted-by":"publisher","unstructured":"Fu, X., Yan, J., Fan, C.: Image aesthetics assessment using composite features from off-the-shelf deep models. In: 2018 25th IEEE International Conference on Image Processing (ICIP). pp. 3528\u20133532 (2018). https:\/\/doi.org\/10.1109\/ICIP.2018.8451133","DOI":"10.1109\/ICIP.2018.8451133"},{"key":"1141_CR36","doi-asserted-by":"publisher","unstructured":"Hosu, V., Goldlucke, B., Saupe, D.: Effective aesthetics prediction with multi-level spatially pooled features. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 9367\u20139375 (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00960","DOI":"10.1109\/CVPR.2019.00960"},{"key":"1141_CR37","doi-asserted-by":"publisher","unstructured":"Ko, K., Lee, J.-T., Kim, C.-S.: PAC-Net: pairwise aesthetic comparison network for image aesthetic assessment. In: 2018 25th IEEE International Conference on Image Processing (ICIP). pp. 2491\u20132495 (2018). https:\/\/doi.org\/10.1109\/ICIP.2018.8451621","DOI":"10.1109\/ICIP.2018.8451621"},{"key":"1141_CR38","doi-asserted-by":"publisher","unstructured":"Lee, J.-T., Kim, C.-S.: Image aesthetic assessment based on pairwise comparison\u2014a unified approach to score regression, binary classification, and personalization. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 1191\u20131200 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00128","DOI":"10.1109\/ICCV.2019.00128"},{"key":"1141_CR39","doi-asserted-by":"publisher","first-page":"1548","DOI":"10.1109\/TIP.2019.2941778","volume":"29","author":"H Zeng","year":"2020","unstructured":"Zeng, H., Cao, Z., Zhang, L., Bovik, A.C.: A unified probabilistic formulation of image aesthetic assessment. IEEE Trans. Image Process. 29, 1548\u20131561 (2020). https:\/\/doi.org\/10.1109\/TIP.2019.2941778","journal-title":"IEEE Trans. Image Process."},{"key":"1141_CR40","unstructured":"Murray, N., Gordo, A.: A deep architecture for unified aesthetic prediction. arXiv (2017)"},{"key":"1141_CR41","doi-asserted-by":"publisher","unstructured":"Sheng, K., Dong, W., Ma, C., Mei, X., Huang, F., Hu, B.-G.: Attention-based multi-patch aggregation for image aesthetic assessment. In: Proceedings of the 26th ACM international conference on Multimedia. pp. 879\u2013886 (2018). https:\/\/doi.org\/10.1145\/3240508.3240554","DOI":"10.1145\/3240508.3240554"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-023-01141-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-023-01141-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-023-01141-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T13:12:55Z","timestamp":1694783575000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-023-01141-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,1]]},"references-count":41,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["1141"],"URL":"https:\/\/doi.org\/10.1007\/s00530-023-01141-7","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8,1]]},"assertion":[{"value":"30 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 July 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 August 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"The authors have no financial or proprietary interests in any material discussed in this article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}