{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:16:31Z","timestamp":1750220191936,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":15,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,9,23]]},"DOI":"10.1145\/3573942.3574065","type":"proceedings-article","created":{"date-parts":[[2023,5,16]],"date-time":"2023-05-16T23:45:42Z","timestamp":1684280742000},"page":"581-586","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Image Description Generation Method Based on X-Linear Attention Mechanism"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4821-2518","authenticated-orcid":false,"given":"Pingan","family":"Qiao","sequence":"first","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3383-3573","authenticated-orcid":false,"given":"Yuan","family":"Li","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2705-1436","authenticated-orcid":false,"given":"Ruixue","family":"Shen","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, China"}]}],"member":"320","published-online":{"date-parts":[[2023,5,16]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Ordonez V","author":"Kulkarni G","year":"2013","unstructured":"Kulkarni G, Premraj V, Ordonez V, Babytalk: Understanding and generating simple image descriptions[J]. IEEE transactions on pattern analysis and machine intelligence, 2013, 35(12): 2891-2903."},{"key":"e_1_3_2_1_2_1","unstructured":"Sutskever I Vinyals O Le Q V. Sequence to sequence learning with neural networks[C]\/\/Advances in neural information processing systems. 2014: 3104-3112."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Vinyals O Toshev A Bengio S Show and tell: A neural image caption generator[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 3156-3164.","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"e_1_3_2_1_4_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks[J]. Advances in neural information processing systems","author":"Ren S","year":"2015","unstructured":"Ren S, He K, Girshick R, Faster r-cnn: Towards real-time object detection with region proposal networks[J]. Advances in neural information processing systems, 2015, 28: 91-99."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Lin T Y RoyChowdhury A Maji S. Bilinear cnn models for fine-grained visual recognition[C]\/\/Proceedings of the IEEE international conference on computer vision. 2015: 1449-1457.","DOI":"10.1109\/ICCV.2015.170"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Gao Y Beijbom O Zhang N Compact bilinear pooling[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 317-326.","DOI":"10.1109\/CVPR.2016.41"},{"key":"e_1_3_2_1_7_1","volume-title":"Multimodal compact bilinear pooling for visual question answering and visual grounding[J]. arXiv preprint arXiv:1606.01847","author":"Fukui A","year":"2016","unstructured":"Fukui A, Park D H, Yang D, Multimodal compact bilinear pooling for visual question answering and visual grounding[J]. arXiv preprint arXiv:1606.01847, 2016."},{"key":"e_1_3_2_1_8_1","volume-title":"Hadamard product for low-rank bilinear pooling[J]. arXiv preprint arXiv:1610.04325","author":"Kim J H","year":"2016","unstructured":"Kim J H, On K W, Lim W, Hadamard product for low-rank bilinear pooling[J]. arXiv preprint arXiv:1610.04325, 2016."},{"key":"e_1_3_2_1_9_1","volume-title":"Adaptively aligned image captioning via adaptive attention time[J]. Advances in neural information processing systems","author":"Huang L","year":"2019","unstructured":"Huang L, Wang W, Xia Y, Adaptively aligned image captioning via adaptive attention time[J]. Advances in neural information processing systems, 2019, 32."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"[10] Hu J Shen L Sun G. Squeeze-and-excitation networks[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 7132-7141.","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_2_1_11_1","volume-title":"Continuously differentiable exponential linear units[J]. arXiv preprint arXiv:1704.07483","author":"Barron J T","year":"2017","unstructured":"Barron J T. Continuously differentiable exponential linear units[J]. arXiv preprint arXiv:1704.07483, 2017."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Karpathy A Fei-Fei L. Deep visual-semantic alignments for generating image descriptions[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 3128-3137.","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"e_1_3_2_1_13_1","volume-title":"Scheduled sampling for sequence prediction with recurrent neural networks[J]. arXiv preprint arXiv:1506.03099","author":"Bengio S","year":"2015","unstructured":"Bengio S, Vinyals O, Jaitly N, Scheduled sampling for sequence prediction with recurrent neural networks[J]. arXiv preprint arXiv:1506.03099, 2015."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Rennie S J Marcheret E Mroueh Y Self-critical sequence training for image captioning[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 7008-7024.","DOI":"10.1109\/CVPR.2017.131"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Anderson P He X Buehler C Bottom-up and top-down attention for image captioning and visual question answering[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 6077-6086.","DOI":"10.1109\/CVPR.2018.00636"}],"event":{"name":"AIPR 2022: 2022 5th International Conference on Artificial Intelligence and Pattern Recognition","acronym":"AIPR 2022","location":"Xiamen China"},"container-title":["Proceedings of the 2022 5th International Conference on Artificial Intelligence and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574065","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3573942.3574065","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:32Z","timestamp":1750186952000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3573942.3574065"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,23]]},"references-count":15,"alternative-id":["10.1145\/3573942.3574065","10.1145\/3573942"],"URL":"https:\/\/doi.org\/10.1145\/3573942.3574065","relation":{},"subject":[],"published":{"date-parts":[[2022,9,23]]},"assertion":[{"value":"2023-05-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}