{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T18:04:10Z","timestamp":1770833050485,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"21-23","license":[{"start":{"date-parts":[[2021,7,31]],"date-time":"2021-07-31T00:00:00Z","timestamp":1627689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,31]],"date-time":"2021-07-31T00:00:00Z","timestamp":1627689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1007\/s11042-021-11215-1","type":"journal-article","created":{"date-parts":[[2021,7,31]],"date-time":"2021-07-31T17:02:58Z","timestamp":1627750978000},"page":"32567-32593","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Action recognition in still images using a multi-attention guided network with weakly supervised saliency detection"],"prefix":"10.1007","volume":"80","author":[{"given":"Seyed Sajad","family":"Ashrafi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6266-6607","authenticated-orcid":false,"given":"Shahriar B.","family":"Shokouhi","sequence":"additional","affiliation":[]},{"given":"Ahmad","family":"Ayatollahi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,31]]},"reference":[{"issue":"17","key":"11215_CR1","doi-asserted-by":"publisher","first-page":"24923","DOI":"10.1007\/s11042-019-7674-5","volume":"78","author":"S Aly","year":"2019","unstructured":"Aly S, Sayed A (2019) Human action recognition using bag of global and local Zernike moment features. Multimed Tools Appl 78(17):24923\u201324953. https:\/\/doi.org\/10.1007\/s11042-019-7674-5","journal-title":"Multimed Tools Appl"},{"key":"11215_CR2","doi-asserted-by":"publisher","unstructured":"Amirkhani D, Bastanfard A (2019) Inpainted image quality evaluation based on saliency map features. https:\/\/doi.org\/10.1109\/ICSPIS48872.2019.9066140","DOI":"10.1109\/ICSPIS48872.2019.9066140"},{"key":"11215_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11042-020-09004-3","volume":"79","author":"DR Beddiar","year":"2020","unstructured":"Beddiar DR, Nini B, Sabokrou M, Hadid A (2020) Vision-based human activity recognition: a survey. Multimed Tools Appl 79:1\u201347. https:\/\/doi.org\/10.1007\/s11042-020-09004-3","journal-title":"Multimed Tools Appl"},{"issue":"15","key":"11215_CR4","doi-asserted-by":"publisher","first-page":"21085","DOI":"10.1007\/s11042-019-7365-2","volume":"78","author":"MF Bulbul","year":"2019","unstructured":"Bulbul MF, Islam S, Ali H (2019) 3D human action analysis and recognition through GLAC descriptor on 2D motion and static posture images. Multimed Tools Appl 78(15):21085\u201321111. https:\/\/doi.org\/10.1007\/s11042-019-7365-2","journal-title":"Multimed Tools Appl"},{"key":"11215_CR5","unstructured":"Cao Z, Hidalgo G, Simon T, Wei S-E, Sheikh Y (2018) OpenPose: Realtime multi-person 2D pose estimation using part affinity field. Accessed: Jun. 18, 2020. [online]. Available: http:\/\/arxiv.org\/abs\/1812.08008"},{"issue":"3","key":"11215_CR6","doi-asserted-by":"publisher","first-page":"4405","DOI":"10.1007\/s11042-015-3177-1","volume":"76","author":"C Chen","year":"2017","unstructured":"Chen C, Jafari R, Kehtarnavaz N (2017) A survey of depth and inertial sensor fusion for human action recognition. Multimed Tools Appl 76(3):4405\u20134425. https:\/\/doi.org\/10.1007\/s11042-015-3177-1","journal-title":"Multimed Tools Appl"},{"key":"11215_CR7","doi-asserted-by":"publisher","first-page":"97.1","DOI":"10.5244\/C.24.97","volume-title":"Procedings of the British machine vision conference 2010","author":"V Delaitre","year":"2010","unstructured":"Delaitre V, Laptev I, Sivic J (2010) Recognizing human actions in still images: a study of bag-of-features and part-based representations. In: Procedings of the British machine vision conference 2010, pp 97.1\u201397.11. https:\/\/doi.org\/10.5244\/C.24.97"},{"issue":"2","key":"11215_CR8","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CKI, Winn J, Zisserman A (2010) The pascal visual object classes (VOC) challenge. Int J Comput Vis 88(2):303\u2013338. https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int J Comput Vis"},{"key":"11215_CR9","doi-asserted-by":"publisher","unstructured":"Girshick R (2015) Fast R-CNN. Proc IEEE Int Conf Comput Vis 2015:1440\u20131448. https:\/\/doi.org\/10.1109\/ICCV.2015.169","DOI":"10.1109\/ICCV.2015.169"},{"key":"11215_CR10","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1109\/CVPR.2014.81","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition","author":"R Girshick","year":"2014","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, pp 580\u2013587. https:\/\/doi.org\/10.1109\/CVPR.2014.81"},{"key":"11215_CR11","doi-asserted-by":"publisher","unstructured":"Gkioxari G, Girshick R, Malik J (2015) Contextual action recognition with R\u2217CNN, Proc IEEE Int Conf Comput Vis vol. 2015 inter, pp. 1080\u20131088 https:\/\/doi.org\/10.1109\/ICCV.2015.129","DOI":"10.1109\/ICCV.2015.129"},{"issue":"10","key":"11215_CR12","doi-asserted-by":"publisher","first-page":"3343","DOI":"10.1016\/j.patcog.2014.04.018","volume":"47","author":"G Guo","year":"2014","unstructured":"Guo G, Lai A (2014) A survey on still image based human action recognition. Pattern Recogn 47(10):3343\u20133361. https:\/\/doi.org\/10.1016\/j.patcog.2014.04.018","journal-title":"Pattern Recogn"},{"issue":"10","key":"11215_CR13","doi-asserted-by":"publisher","first-page":"1775","DOI":"10.1109\/TPAMI.2009.83","volume":"31","author":"A Gupta","year":"2009","unstructured":"Gupta A, Kembhavi A, Davis LS (2009) Observing human-object interactions: using spatial and functional compatibility for recognition. IEEE Trans Pattern Anal Mach Intell 31(10):1775\u20131789. https:\/\/doi.org\/10.1109\/TPAMI.2009.83","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11215_CR14","doi-asserted-by":"publisher","first-page":"770","DOI":"10.1109\/CVPR.2016.90","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2016-December","author":"K He","year":"2016","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2016-December, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90"},{"key":"11215_CR15","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.imavis.2017.01.010","volume":"60","author":"S Herath","year":"2017","unstructured":"Herath S, Harandi M, Porikli F (2017) Going deeper into action recognition: a survey. Image Vis Comput 60:4\u201321. https:\/\/doi.org\/10.1016\/j.imavis.2017.01.010","journal-title":"Image Vis Comput"},{"key":"11215_CR16","unstructured":"Hu T, Qi H, Huang Q, Lu Y (2019) See better before looking closer: weakly supervised data augmentation network for fine-grained visual classification. Accessed: Apr. 13, 2020. [online]. Available: http:\/\/arxiv.org\/abs\/1901.09891"},{"key":"11215_CR17","doi-asserted-by":"publisher","unstructured":"Ikizler N, Cinbis RG, Pehlivan S, Duygulu P (2008) Recognizing actions from still images. https:\/\/doi.org\/10.1109\/icpr.2008.4761663","DOI":"10.1109\/icpr.2008.4761663"},{"key":"11215_CR18","doi-asserted-by":"publisher","unstructured":"Li LJ, Fei-Fei L (2007) What, where and who? Classifying events by scene and object recognition. https:\/\/doi.org\/10.1109\/ICCV.2007.4408872","DOI":"10.1109\/ICCV.2007.4408872"},{"issue":"14","key":"11215_CR19","doi-asserted-by":"publisher","first-page":"19587","DOI":"10.1007\/s11042-019-7356-3","volume":"78","author":"Z Li","year":"2019","unstructured":"Li Z, Zheng Z, Lin F, Leung H, Li Q (2019) Action recognition from depth sequence using depth motion maps-based local ternary patterns and CNN. Multimed Tools Appl 78(14):19587\u201319601. https:\/\/doi.org\/10.1007\/s11042-019-7356-3","journal-title":"Multimed Tools Appl"},{"key":"11215_CR20","doi-asserted-by":"publisher","first-page":"107341","DOI":"10.1016\/j.patcog.2020.107341","volume":"104","author":"Y Li","year":"2020","unstructured":"Li Y, Li K, Wang X (2020) Recognizing actions in images by fusing multiple body structure cues. Pattern Recogn 104:107341. https:\/\/doi.org\/10.1016\/j.patcog.2020.107341","journal-title":"Pattern Recogn"},{"issue":"5","key":"11215_CR21","doi-asserted-by":"publisher","first-page":"955","DOI":"10.1109\/JSTSP.2020.3002391","volume":"14","author":"X Liao","year":"2020","unstructured":"Liao X, Li K, Zhu X, Liu KJR (2020) Robust detection of image operator chain with two-stream convolutional neural network. IEEE J Sel Top Signal Process 14(5):955\u2013968. https:\/\/doi.org\/10.1109\/JSTSP.2020.3002391","journal-title":"IEEE J Sel Top Signal Process"},{"key":"11215_CR22","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1007\/978-3-030-20873-8_10","volume-title":"Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 11365 LNCS","author":"L Liu","year":"2019","unstructured":"Liu L, Tan RT, You S (2019) Loss guided activation for action recognition in still images. In: Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 11365 LNCS, pp 152\u2013167. https:\/\/doi.org\/10.1007\/978-3-030-20873-8_10"},{"key":"11215_CR23","unstructured":"Lu X, Wang W, Ma C, Shen J, Shao L, Porikli F (2020) See more, know more: unsupervised video object segmentation with co-attention Siamese networks,\u201d Proc. IEEE Comput. Soc. Conf. Comput. Vis. Pattern Recognit., vol. 2019-June, pp. 3618\u20133627. Accessed: Dec. 22, 2020. [Online]. Available: http:\/\/arxiv.org\/abs\/2001.06810"},{"key":"11215_CR24","doi-asserted-by":"publisher","first-page":"581","DOI":"10.1109\/ITSC.2019.8917128","volume-title":"2019 IEEE intelligent transportation systems conference, ITSC 2019","author":"D Ludl","year":"2019","unstructured":"Ludl D, Gulde T, Curio C (2019) Simple yet efficient real-time pose-based action recognition. In: 2019 IEEE intelligent transportation systems conference, ITSC 2019, pp 581\u2013588. https:\/\/doi.org\/10.1109\/ITSC.2019.8917128"},{"key":"11215_CR25","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1016\/j.patcog.2017.01.027","volume":"68","author":"S Ma","year":"2017","unstructured":"Ma S, Bargal SA, Zhang J, Sigal L, Sclaroff S (2017) Do less and achieve more: training CNNs for action recognition utilizing action images from the web. Pattern Recogn 68:334\u2013345. https:\/\/doi.org\/10.1016\/j.patcog.2017.01.027","journal-title":"Pattern Recogn"},{"key":"11215_CR26","doi-asserted-by":"publisher","first-page":"828","DOI":"10.1007\/978-3-642-33765-9_59","volume-title":"Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 7575 LNCS, no. PART 4","author":"J McAuley","year":"2012","unstructured":"McAuley J, Leskovec J (2012) Image labeling on a network: using social-network metadata for image classification. In: Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 7575 LNCS, no. PART 4, pp 828\u2013841. https:\/\/doi.org\/10.1007\/978-3-642-33765-9_59"},{"key":"11215_CR27","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 9912 LNCS","author":"A Newell","year":"2016","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. In: Lecture notes in computer science (including subseries lecture notes in artificial intelligence and lecture notes in bioinformatics), vol. 9912 LNCS, pp 483\u2013499. https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"issue":"6","key":"11215_CR28","doi-asserted-by":"publisher","first-page":"865","DOI":"10.1109\/TSMCC.2011.2178594","volume":"42","author":"OP Popoola","year":"2012","unstructured":"Popoola OP, Wang K (2012) Video-based abnormal human behavior recognitiona review. IEEE Trans Syst Man Cybern Part C Appl Rev 42(6):865\u2013878. https:\/\/doi.org\/10.1109\/TSMCC.2011.2178594","journal-title":"IEEE Trans Syst Man Cybern Part C Appl Rev"},{"key":"11215_CR29","unstructured":"PyTorch. (2016) https:\/\/pytorch.org\/ (accessed September 1, 2016)."},{"key":"11215_CR30","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1016\/j.neucom.2017.06.041","volume":"267","author":"T Qi","year":"2017","unstructured":"Qi T, Xu Y, Quan Y, Wang Y, Ling H (2017) Image-based action recognition using hint-enhanced deep neural networks. Neurocomputing 267:475\u2013488. https:\/\/doi.org\/10.1016\/j.neucom.2017.06.041","journal-title":"Neurocomputing"},{"key":"11215_CR31","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1109\/ICIP.2011.6116197","volume-title":"Proceedings - international conference on image processing, ICIP","author":"K Raja","year":"2011","unstructured":"Raja K, Laptev I, P\u00e9rez P, Oisel L (2011) Joint pose estimation and action recognition in image graphs. In: Proceedings - international conference on image processing, ICIP, pp 25\u201328. https:\/\/doi.org\/10.1109\/ICIP.2011.6116197"},{"key":"11215_CR32","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2015) You only look once: unified, real-time object detection. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit vol. 2016-December, pp. 779\u2013788. Accessed: Apr. 12, 2020. [Online]. Available: http:\/\/arxiv.org\/abs\/1506.02640"},{"issue":"6","key":"11215_CR33","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"11215_CR34","doi-asserted-by":"publisher","first-page":"152","DOI":"10.1016\/j.jvcir.2019.05.004","volume":"62","author":"H Sadeghi","year":"2019","unstructured":"Sadeghi H, Raie AA (2019) Histogram distance metric learning for facial expression recognition. J Vis Commun Image Represent 62:152\u2013165. https:\/\/doi.org\/10.1016\/j.jvcir.2019.05.004","journal-title":"J Vis Commun Image Represent"},{"key":"11215_CR35","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/CVPR.2015.7298594","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 07\u201312-June-2015","author":"C Szegedy","year":"2015","unstructured":"Szegedy C et al (2015) Going deeper with convolutions. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 07\u201312-June-2015, pp 1\u20139. https:\/\/doi.org\/10.1109\/CVPR.2015.7298594"},{"key":"11215_CR36","doi-asserted-by":"publisher","unstructured":"Thurau C, Hlav\u00e1\u010d V (2008) Pose primitive based human action recognition in videos or still images. https:\/\/doi.org\/10.1109\/CVPR.2008.4587721","DOI":"10.1109\/CVPR.2008.4587721"},{"issue":"17\u201318","key":"11215_CR37","doi-asserted-by":"publisher","first-page":"12679","DOI":"10.1007\/s11042-020-08611-4","volume":"79","author":"D Tian","year":"2020","unstructured":"Tian D, Lu ZM, Chen X, Ma LH (2020) An attentional spatial temporal graph convolutional network with co-occurrence feature learning for action recognition. Multimed Tools Appl 79(17\u201318):12679\u201312697. https:\/\/doi.org\/10.1007\/s11042-020-08611-4","journal-title":"Multimed Tools Appl"},{"key":"11215_CR38","doi-asserted-by":"publisher","first-page":"1654","DOI":"10.1109\/CVPR.2006.321","volume":"2","author":"Y Wang","year":"2006","unstructured":"Wang Y, Jiang H, Drew MS, Li ZN, Mori G (2006) Unsupervised discovery of action classes. Proc IEEE Comput Soc Confer Comput Vis Pattern Recog 2:1654\u20131661. https:\/\/doi.org\/10.1109\/CVPR.2006.321","journal-title":"Proc IEEE Comput Soc Confer Comput Vis Pattern Recog"},{"key":"11215_CR39","doi-asserted-by":"publisher","first-page":"1042","DOI":"10.1109\/ICME.2019.00183","volume-title":"Proceedings - IEEE international conference on multimedia and expo, vol. 2019-July","author":"M Xin","year":"2019","unstructured":"Xin M, Wang S, Cheng J (2019) Entanglement loss for context-based still image action recognition. In: Proceedings - IEEE international conference on multimedia and expo, vol. 2019-July, pp 1042\u20131047. https:\/\/doi.org\/10.1109\/ICME.2019.00183"},{"key":"11215_CR40","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1016\/j.image.2017.03.010","volume":"54","author":"S Yan","year":"2017","unstructured":"Yan S, Smith JS, Zhang B (2017) Action recognition from still images based on deep VLAD spatial pyramids. Signal Process Image Commun 54:118\u2013129. https:\/\/doi.org\/10.1016\/j.image.2017.03.010","journal-title":"Signal Process Image Commun"},{"issue":"4","key":"11215_CR41","doi-asserted-by":"publisher","first-page":"1116","DOI":"10.1109\/TCDS.2017.2783944","volume":"10","author":"S Yan","year":"2018","unstructured":"Yan S, Smith JS, Lu W, Zhang B (2018) Multibranch attention networks for action recognition in still images. IEEE Trans Cogn Dev Syst 10(4):1116\u20131125. https:\/\/doi.org\/10.1109\/TCDS.2017.2783944","journal-title":"IEEE Trans Cogn Dev Syst"},{"key":"11215_CR42","doi-asserted-by":"publisher","first-page":"1389","DOI":"10.1109\/CVPR.2019.00148","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2019-June","author":"W Yang","year":"2019","unstructured":"Yang W, Huang H, Zhang Z, Chen X, Huang K, Zhang S (2019) Towards rich feature discovery with class activation maps augmentation for person re-identification. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, vol. 2019-June, pp 1389\u20131398. https:\/\/doi.org\/10.1109\/CVPR.2019.00148"},{"key":"11215_CR43","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1109\/CVPR.2010.5540235","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition","author":"B Yao","year":"2010","unstructured":"Yao B, Fei-Fei L (2010) Modeling mutual context of object and human pose in human-object interaction activities. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, pp 17\u201324. https:\/\/doi.org\/10.1109\/CVPR.2010.5540235"},{"key":"11215_CR44","doi-asserted-by":"publisher","first-page":"1331","DOI":"10.1109\/ICCV.2011.6126386","volume-title":"Proceedings of the IEEE international conference on computer vision","author":"B Yao","year":"2011","unstructured":"Yao B, Jiang X, Khosla A, Lin AL, Guibas L, Fei-Fei L (2011) Human action recognition by learning bases of action attributes and parts. In: Proceedings of the IEEE international conference on computer vision, pp 1331\u20131338. https:\/\/doi.org\/10.1109\/ICCV.2011.6126386"},{"issue":"6","key":"11215_CR45","doi-asserted-by":"publisher","first-page":"2860","DOI":"10.1109\/TIP.2019.2891888","volume":"28","author":"H Yao","year":"2019","unstructured":"Yao H, Zhang S, Hong R, Zhang Y, Xu C, Tian Q (2019) Deep representation learning with part loss for person re-identification. IEEE Trans Image Process 28(6):2860\u20132871. https:\/\/doi.org\/10.1109\/TIP.2019.2891888","journal-title":"IEEE Trans Image Process"},{"key":"11215_CR46","unstructured":"Zagoruyko S, Komodakis N (2016) Paying more attention to attention: improving the performance of convolutional neural networks via attention transfer, 5th Int. Conf. Learn. Represent. ICLR 2017 - Conf. Track proc. Accessed: Jun. 19, 2020. [Online]. Available: http:\/\/arxiv.org\/abs\/1612.03928"},{"key":"11215_CR47","doi-asserted-by":"publisher","first-page":"3411","DOI":"10.1109\/ICCV.2017.367","volume-title":"Proceedings of the IEEE international conference on computer vision, vol. 2017-October","author":"Z Zhao","year":"2017","unstructured":"Zhao Z, Ma H, You S (2017) Single image action recognition using semantic body part actions. In: Proceedings of the IEEE international conference on computer vision, vol. 2017-October, pp 3411\u20133419. https:\/\/doi.org\/10.1109\/ICCV.2017.367"},{"key":"11215_CR48","doi-asserted-by":"publisher","first-page":"5219","DOI":"10.1109\/ICCV.2017.557","volume-title":"Proceedings of the IEEE international conference on computer vision, vol. 2017-October","author":"H Zheng","year":"2017","unstructured":"Zheng H, Fu J, Mei T, Luo J (2017) Learning multi-attention convolutional neural network for fine-grained image recognition. In: Proceedings of the IEEE international conference on computer vision, vol. 2017-October, pp 5219\u20135227. https:\/\/doi.org\/10.1109\/ICCV.2017.557"},{"key":"11215_CR49","unstructured":"Zhou W, Li H, Tian Q (2020) Recent advance in content-based image retrieval: a literature survey, Jun. 2017. Accessed: Jun. 20, 2020. [Online]. Available: http:\/\/arxiv.org\/abs\/1706.06064"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11215-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11215-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11215-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,10]],"date-time":"2021-10-10T05:05:56Z","timestamp":1633842356000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11215-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,31]]},"references-count":49,"journal-issue":{"issue":"21-23","published-print":{"date-parts":[[2021,9]]}},"alternative-id":["11215"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11215-1","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,31]]},"assertion":[{"value":"24 August 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 January 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 July 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 July 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}