{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,21]],"date-time":"2025-12-21T06:24:28Z","timestamp":1766298268012,"version":"3.37.3"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,3,30]],"date-time":"2021-03-30T00:00:00Z","timestamp":1617062400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,3,30]],"date-time":"2021-03-30T00:00:00Z","timestamp":1617062400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s42979-021-00563-2","type":"journal-article","created":{"date-parts":[[2021,3,30]],"date-time":"2021-03-30T16:02:52Z","timestamp":1617120172000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Deep Learning-Based Image Retrieval System with Clustering on Attention-Based Representations"],"prefix":"10.1007","volume":"2","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7036-1336","authenticated-orcid":false,"given":"Sumanth S.","family":"Rao","sequence":"first","affiliation":[]},{"given":"Shahid","family":"Ikram","sequence":"additional","affiliation":[]},{"given":"Parashara","family":"Ramesh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,3,30]]},"reference":[{"key":"563_CR1","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick L. Microsoft COCO: common objects in context. In: Computer vision\u2014ECCV, vol. 8693. Springer, Cham; 2014.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"563_CR2","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhudinov R, Zemel R, Bengio Y. Show, attend and tell: neural image caption generation with visual attention. In: Proceedings of the 32nd international conference on ICML; 2015. pp. 2048\u201357."},{"key":"563_CR3","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I. Attention is all you need. In: Advances in neural information processing systems; 2017, pp. 5998\u20136008."},{"key":"563_CR4","unstructured":"Weston J, Bengio S, Usunier N, WSABIE: scaling up to large vocabulary image annotation. In: IJCAI international joint conference on artificial intelligence; 2017. pp. 2764\u201370."},{"key":"563_CR5","unstructured":"Vendrov I, Kiros R, Fidler S, Urtasun R. Order-embeddings of images and language. In: International conference on learning representations. 2016."},{"key":"563_CR6","doi-asserted-by":"crossref","unstructured":"Gu J, Cai J, Joty S, Niu L, Wang G. Look, imagine and match: Improving textual-visual cross-modal retrieval with generative models. In: Proceedings of the IEEE conference on computer vision and pattern recognition; 2018. pp. 7181\u201389.","DOI":"10.1109\/CVPR.2018.00750"},{"key":"563_CR7","doi-asserted-by":"crossref","unstructured":"Chen Y, Wang JZ, Krovetz R. Content-based image retrieval by clustering. In: Proceedings of the 5th ACM SIGMM international workshop on Multimedia information retrieval. ACM Press; 2003. pp. 193\u2013200.","DOI":"10.1145\/973264.973295"},{"issue":"5","key":"563_CR8","doi-asserted-by":"publisher","first-page":"988","DOI":"10.1109\/TKDE.2002.1033769","volume":"14","author":"G Sheikholeslami","year":"2002","unstructured":"Sheikholeslami G, Chang W, Zhang A. SemQuery: semantic clustering and querying on heterogeneous features for visual data. IEEE Trans Knowl Data Eng. 2002;14(5):988\u20131002.","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"563_CR9","doi-asserted-by":"crossref","unstructured":"Smith JR, Chang SF. VisualSEEK: a fully automated content-based query system. In: Proc. 4th acm int\u2019l conf. on multimedia; 1996. pp. 87\u20138.","DOI":"10.1145\/244130.244151"},{"issue":"1","key":"563_CR10","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/83.892448","volume":"10","author":"A Vailaya","year":"2001","unstructured":"Vailaya A, Figueiredo MAT, Jain AK, Zhang H-J. Image classification for content-based indexing. IEEE Trans Image Process. 2001;10(1):117\u201330.","journal-title":"IEEE Trans Image Process"},{"key":"563_CR11","unstructured":"Maria-Florina B, Dick T, White C. Data-driven clustering via parameterized Lloyd's families. In: Proceedings of the 32nd international conference on neural information processing systems 2018. pp. 10664\u201374."},{"key":"563_CR12","unstructured":"Kingma DP, Ba J. Adam: a method for stochastic optimization. In: 3rd International conference for learning representations, San Diego 2015. arXiv:1412.6980."},{"key":"563_CR13","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A. Going deeper with convolutions. In: The IEEE conference on computer vision and pattern recognition (CVPR); 2015. pp. 1\u20139.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"563_CR14","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J. Deep residual learning for image recognition. In: The IEEE conference on computer vision and pattern recognition (CVPR); 2016. pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"563_CR15","doi-asserted-by":"crossref","unstructured":"Bojanowski P, Grave E, Joulin A, Mikolov T. Enriching word vectors with subword information. In: Transactions of the association for computational linguistics. 2016.","DOI":"10.1162\/tacl_a_00051"},{"key":"563_CR16","doi-asserted-by":"crossref","unstructured":"Joulin A, Grave E, Bojanowski P, Mikolov T. Bag of tricks for efficient text classification. In: 15th conference of the european chapter of the association for computational linguistics, vol. 2; 2017. pp. 427\u201331.","DOI":"10.18653\/v1\/E17-2068"},{"key":"563_CR17","doi-asserted-by":"crossref","unstructured":"Wang S, Wang R, Yao Z, Shan S, Chen X. Cross-modal scene graph matching for relationship-aware image-text retrieval. In WACV; 2020. pp. 1497\u2013506.","DOI":"10.1109\/WACV45572.2020.9093614"},{"key":"563_CR18","doi-asserted-by":"crossref","unstructured":"Li K, Zhang Y, Li K, Li Y, Fu Y. Visual Semantic Reasoning for Image-Text Matching. In ICCV; 2019. pp. 4653\u2013661.","DOI":"10.1109\/ICCV.2019.00475"},{"key":"563_CR19","doi-asserted-by":"crossref","unstructured":"Chen H, Ding G, Liu X, Lin Z, Liu J, Han J. IMRAM: iterative matching with recurrent attention memory for cross-modal image-text retrieval. In CVPR; 2020. pp. 12655\u2013663.","DOI":"10.1109\/CVPR42600.2020.01267"},{"key":"563_CR20","doi-asserted-by":"crossref","unstructured":"Wei X, Zhang T, Li Y, Zhang Y, Wu F. 2020. Multi-modality cross attention network for image and sentence matching. In CVPR; 2020. pp. 10941\u2013950.","DOI":"10.1109\/CVPR42600.2020.01095"},{"key":"563_CR21","doi-asserted-by":"crossref","unstructured":"Lee KH, Chen X, Hua G, Hu H, He X. Stacked cross attention for image-text matching. In ECCV; 2018. pp. 212\u201328.","DOI":"10.1007\/978-3-030-01225-0_13"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-021-00563-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-021-00563-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-021-00563-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T17:38:21Z","timestamp":1620927501000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-021-00563-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,30]]},"references-count":21,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["563"],"URL":"https:\/\/doi.org\/10.1007\/s42979-021-00563-2","relation":{},"ISSN":["2662-995X","2661-8907"],"issn-type":[{"type":"print","value":"2662-995X"},{"type":"electronic","value":"2661-8907"}],"subject":[],"published":{"date-parts":[[2021,3,30]]},"assertion":[{"value":"19 December 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 March 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 March 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"179"}}