{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T18:47:49Z","timestamp":1771958869835,"version":"3.50.1"},"reference-count":26,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,11,26]],"date-time":"2021-11-26T00:00:00Z","timestamp":1637884800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,11,26]],"date-time":"2021-11-26T00:00:00Z","timestamp":1637884800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,11,26]],"date-time":"2021-11-26T00:00:00Z","timestamp":1637884800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,11,26]]},"DOI":"10.1109\/iscmi53840.2021.9654828","type":"proceedings-article","created":{"date-parts":[[2021,12,30]],"date-time":"2021-12-30T00:46:48Z","timestamp":1640825208000},"page":"242-246","source":"Crossref","is-referenced-by-count":16,"title":["A Linear Sub-Structure with Co-Variance Shift for Image Captioning"],"prefix":"10.1109","author":[{"given":"Shaik","family":"Rafi","sequence":"first","affiliation":[]},{"given":"Ranjita","family":"Das","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.345"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/s13369-019-04262-2"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.524"},{"key":"ref14","article-title":"Rethinking the Inception Architecture for CVision","author":"szegedy","year":"2016","journal-title":"IEEE Conf on CVPR 2016"},{"key":"ref15","article-title":"Christopher Manning GloVe: Global Vectors for Word Representation","author":"pennington","year":"0","journal-title":"Proceedings of the 2014 Conference on Empirical Methods in NLP(EMNLP)"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/EAIT.2018.8470438"},{"key":"ref18","first-page":"3156","article-title":"Show and tell: A neural im-age caption generator","author":"vinyals","year":"2015","journal-title":"Proc IEEE Conf CVPR"},{"key":"ref19","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref4","first-page":"15","article-title":"Every picture tells a story: generating sentences from images","author":"farhadi","year":"2010","journal-title":"Euro CVD conf"},{"key":"ref3","year":"0"},{"key":"ref6","first-page":"1412","author":"luong","year":"2015","journal-title":"Effective approaches to attention-based neural ML in EMNLP"},{"key":"ref5","first-page":"1143","article-title":"Im2Text: Describing images using 1 million captioned photographs","author":"ordonez","year":"2011","journal-title":"Proceedings of the Advances in NIPS"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2915033"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3994"},{"key":"ref2","first-page":"6298","article-title":"SCA-CNN: Spatial and channel-wise attention in CNN for image captioning","author":"chen","year":"2017","journal-title":"Proc IEEE\/CVF Conf Comput CVPR"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2598339"},{"key":"ref9","volume":"pr 98","author":"wang","year":"2020","journal-title":"Learning visual relationship and context-aware attention for image captioning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.277"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref21","article-title":"Deep captioning with multimodal recurrent neural networks (m-rnn)","author":"mao","year":"2015","journal-title":"Proc IEEE Int Conf Learn Represent"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2896516"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_31"},{"key":"ref26","first-page":"311","author":"papineni","year":"2002","journal-title":"BLEU a Method for Automatic Evaluation of Machine Translation Proceedings of the 40th Annual Meeting of the ACL"},{"key":"ref25","first-page":"107928","volume":"115","author":"ji","year":"2021","journal-title":"Divergent-convergent attention for image captioning Pattern Recognition"}],"event":{"name":"2021 8th International Conference on Soft Computing & Machine Intelligence (ISCMI)","location":"Cario, Egypt","start":{"date-parts":[[2021,11,26]]},"end":{"date-parts":[[2021,11,27]]}},"container-title":["2021 8th International Conference on Soft Computing &amp; Machine Intelligence (ISCMI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9654797\/9654798\/09654828.pdf?arnumber=9654828","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T17:00:06Z","timestamp":1652202006000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9654828\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11,26]]},"references-count":26,"URL":"https:\/\/doi.org\/10.1109\/iscmi53840.2021.9654828","relation":{},"subject":[],"published":{"date-parts":[[2021,11,26]]}}}