{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:20:51Z","timestamp":1750220451027,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,12,9]],"date-time":"2020-12-09T00:00:00Z","timestamp":1607472000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,12,9]]},"DOI":"10.1145\/3448823.3448825","type":"proceedings-article","created":{"date-parts":[[2021,3,4]],"date-time":"2021-03-04T11:33:39Z","timestamp":1614857619000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Generative Adversarial and Self-Attention Based Fine-Grained Cross-Media Retrieval"],"prefix":"10.1145","author":[{"given":"Jin","family":"Hong","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}]},{"given":"Haonan","family":"Luo","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}]},{"given":"Yazhou","family":"Yao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}]},{"given":"Zhenmin","family":"Tang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2021,3,4]]},"reference":[{"volume-title":"Deep supervised cross-modal retrieval,\" IEEE Conference on Computer Vision and Pattern Recognition, 10394--10403","year":"2019","author":"Zhen L.","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2966220"},{"volume-title":"Finegrained Cross-media Representation Learning with Deep Quantization Attention Network,\" ACM International Conference on Multimedia, 1313--1321","year":"2019","author":"Liang M.","key":"e_1_3_2_1_3_1"},{"volume-title":"Generalising fine-grained sketch-based image retrieval,\" IEEE Conference on Computer Vision and Pattern Recognition, 677--686","year":"2019","author":"Pang K.","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2688133"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2688133"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019291"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2013.2276704"},{"key":"e_1_3_2_1_9_1","article-title":"Mhtn: Modal-adversarial hybrid transfer network for cross-modal retrieval","author":"Huang X.","year":"2018","journal-title":"IEEE Transactions on Cybernetics"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2869721"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2847248"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2684626"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2966644"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2019.2903036"},{"volume-title":"Learning cross-media joint representation with sparse and semisupervised regularization,\" ACM International Conference on Multimedia, 154--162","year":"2017","author":"Wang B.","key":"e_1_3_2_1_15_1"},{"volume-title":"Generative adversarial nets,\" Advances in neural information processing systems, 2672--2680","year":"2014","author":"Ian G.","key":"e_1_3_2_1_16_1"},{"volume-title":"Segeqa:Video segmentation based visual attention for embodied question answering,\" IEEE International Conference on Computer Vision, 9667--9676","year":"2019","author":"Luo H.","key":"e_1_3_2_1_17_1"},{"volume-title":"Cross-Media Shared Representation by Hierarchical Learning with Multiple Deep Net-works,\" International Joint Conference on Artificial Intelligence, 3846--3853","year":"2016","author":"Peng Y.","key":"e_1_3_2_1_18_1"},{"volume-title":"A structured self-attentive sentence embedding,\" arXiv preprint arXiv:1703.03130","year":"2017","author":"Lin Z.","key":"e_1_3_2_1_19_1"},{"volume-title":"Stacked cross attention for image-text matching,\" European Conference on Computer Vision, 201--216","year":"2018","author":"Lee K.","key":"e_1_3_2_1_20_1"},{"key":"e_1_3_2_1_21_1","unstructured":"A. Vaswani N. Shazeer N. Parmar Uszkoreit \"Attention is all you need \" Advances in neural information processing systems 5998--6008 2017.  A. Vaswani N. Shazeer N. Parmar Uszkoreit \"Attention is all you need \" Advances in neural information processing systems 5998--6008 2017."},{"volume-title":"Fine-grained video categorization with redundancy reduction attention,\" European Conference on Computer Vision, 136--152","year":"2018","author":"Zhu C.","key":"e_1_3_2_1_23_1"},{"key":"e_1_3_2_1_24_1","unstructured":"Gr\u00f6chenig Karlheinz \"Foundations of time-frequency analysis \" Springer Science and Business Media 2013.  Gr\u00f6chenig Karlheinz \"Foundations of time-frequency analysis \" Springer Science and Business Media 2013."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/2946645.2946704"},{"volume-title":"Generalized semantic preserving hashing for n-label cross-modal retrieval,\" IEEE Conference on Computer Vision and Pattern Recognition, 4076--4084","year":"2017","author":"Mandal D.","key":"e_1_3_2_1_26_1"},{"volume-title":"Look, imagine and match: Improving textual-visual cross-modal retrieval with generative models,\" IEEE Conference on Computer Vision and Pattern Recognition, 7181--7189","year":"2018","author":"Gu J.","key":"e_1_3_2_1_27_1"},{"volume-title":"ImageNet: A large-scale hierarchical image database,\" IEEE Conference on Computer Vision and Pattern Recognition, 248--255","year":"2009","author":"Deng J.","key":"e_1_3_2_1_28_1"},{"volume-title":"Deep Residual Learning for Image Recognition,\" IEEE Conference on Computer Vision and Pattern Recognition, 770--778","year":"2016","author":"He K.","key":"e_1_3_2_1_29_1"},{"volume-title":"Attention Is All You Need,\" Advances in neural information processing systems, 5998--6008","year":"2017","author":"Vaswani A.","key":"e_1_3_2_1_30_1"},{"volume-title":"Fine-grained Visual Textual Alignment for Cross-Modal Retrieval using Transformer Encoders,\" Computing Research Repository","year":"2020","author":"Messina N.","key":"e_1_3_2_1_31_1"},{"volume-title":"Deep Multimodal Image-Text Embeddings for Automatic Cross-Media Retrieval,\" Computing Research Repository","year":"2020","author":"Abdi K.","key":"e_1_3_2_1_32_1"},{"volume-title":"Semisupervised Cross Media Retrieval by Distance-Preserving Correlation Learning and Multi-modal Manifold Regularization,\" Pacific Rim International Conference on Artificial Intelligence, 30--42","year":"2019","author":"Wang T.","key":"e_1_3_2_1_33_1"}],"event":{"name":"ICVISP 2020: 2020 4th International Conference on Vision, Image and Signal Processing","acronym":"ICVISP 2020","location":"Bangkok Thailand"},"container-title":["Proceedings of the 2020 4th International Conference on Vision, Image and Signal Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448823.3448825","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3448823.3448825","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:47:53Z","timestamp":1750193273000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3448823.3448825"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,9]]},"references-count":32,"alternative-id":["10.1145\/3448823.3448825","10.1145\/3448823"],"URL":"https:\/\/doi.org\/10.1145\/3448823.3448825","relation":{},"subject":[],"published":{"date-parts":[[2020,12,9]]},"assertion":[{"value":"2021-03-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}