{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T09:58:58Z","timestamp":1766138338018,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":22,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,11,17]],"date-time":"2022-11-17T00:00:00Z","timestamp":1668643200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,11,17]]},"DOI":"10.1145\/3581807.3581879","type":"proceedings-article","created":{"date-parts":[[2023,5,23]],"date-time":"2023-05-23T00:02:28Z","timestamp":1684800148000},"page":"491-495","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["RGFGM-LXMERT-An Improve Architecture Based On LXMERT"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-6942-1580","authenticated-orcid":false,"given":"Renjie","family":"Yu","sequence":"first","affiliation":[{"name":"Department of Computer and Information Technology, Guangxi University Xingjian College of Sciences and Liberal Arts, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,5,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"a","author":"Tan H.","year":"2019","unstructured":"H. Tan and M. J. a . p. a. Bansal, \"Lxmert: Learning cross-modality encoder representations from transformers,\" 2019 . H. Tan and M. J. a. p. a. Bansal, \"Lxmert: Learning cross-modality encoder representations from transformers,\" 2019."},{"key":"e_1_3_2_1_2_1","volume-title":"a","author":"Devlin J.","year":"2018","unstructured":"J. Devlin , M.-W. Chang , K. Lee , and K. J. a . p. a. Toutanova, \"Bert: Pretraining of de ep bidirectional transformers for language understanding,\" 2018 . J. Devlin, M.-W. Chang, K. Lee, and K. J. a. p. a. Toutanova, \"Bert: Pretraining of deep bidirectional transformers for language understanding,\" 2018."},{"key":"e_1_3_2_1_3_1","volume-title":"a","author":"Liu P.","year":"2021","unstructured":"P. Liu , W. Yuan , J. Fu , Z. Jiang , H. Hayashi , and G. J. a . p. a. Neubig, \"Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing,\" 2021 . P. Liu, W. Yuan, J. Fu, Z. Jiang, H. Hayashi, and G. J. a. p. a. Neubig, \"Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing,\" 2021."},{"key":"e_1_3_2_1_4_1","volume-title":"a","author":"Sinha K.","year":"2021","unstructured":"K. Sinha , R. Jia , D. Hupkes , J. Pineau , A. Williams , and D. J. a . p. a. Kiela, \"Masked language modeling and the distributional hypothesis: Order word matters pre-training for little,\" 2021 . K. Sinha, R. Jia, D. Hupkes, J. Pineau, A. Williams, and D. J. a. p. a. Kiela, \"Masked language modeling and the distributional hypothesis: Order word matters pre-training for little,\" 2021."},{"key":"e_1_3_2_1_5_1","first-page":"2425","volume-title":"Visual question answering,\" in Proceedings of the IEEE international conference on computer vision","author":"Antol S.","year":"2015","unstructured":"S. Antol , \"Vqa : Visual question answering,\" in Proceedings of the IEEE international conference on computer vision , 2015 , pp. 2425 - 2433 . S. Antol , \"Vqa: Visual question answering,\" in Proceedings of the IEEE international conference on computer vision, 2015, pp. 2425-2433."},{"key":"e_1_3_2_1_6_1","first-page":"3","volume-title":"Visual question answering: Datasets, algorithms, and future challenges","author":"Kafle K.","year":"2017","unstructured":"K. Kafle , C. J. C. V. Kanan , and I. Understanding , \" Visual question answering: Datasets, algorithms, and future challenges ,\" vol. 163 , pp. 3 - 20 , 2017 . K. Kafle, C. J. C. V. Kanan, and I. Understanding, \"Visual question answering: Datasets, algorithms, and future challenges,\" vol. 163, pp. 3-20, 2017."},{"key":"e_1_3_2_1_7_1","first-page":"6700","volume-title":"Gqa: A new dataset for real-world visual reasoning and compositional question answering,\" in Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","author":"Hudson D. A.","year":"2019","unstructured":"D. A. Hudson and C. D. Manning , \" Gqa: A new dataset for real-world visual reasoning and compositional question answering,\" in Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition , 2019 , pp. 6700 - 6709 . D. A. Hudson and C. D. Manning, \"Gqa: A new dataset for real-world visual reasoning and compositional question answering,\" in Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 2019, pp. 6700-6709."},{"key":"e_1_3_2_1_8_1","volume-title":"a","author":"Suhr A.","year":"2019","unstructured":"A. Suhr and Y. J. a . p. a. Artzi , \"Nlvr2 visual bias analysis,\" 2019 . A. Suhr and Y. J. a. p. a. Artzi, \"Nlvr2 visual bias analysis,\" 2019."},{"key":"e_1_3_2_1_9_1","first-page":"1352","author":"Bachlechner T.","year":"2021","unstructured":"T. Bachlechner , B. P. Majumder , H. Mao , G. Cottrell , and J. McAuley , \"Rezero is all you need: Fast convergence at large depth,\" in Uncertainty in Artificial Intelligence , 2021 , pp. 1352 - 1361 : PMLR. T. Bachlechner, B. P. Majumder, H. Mao, G. Cottrell, and J. McAuley, \"Rezero is all you need: Fast convergence at large depth,\" in Uncertainty in Artificial Intelligence, 2021, pp. 1352-1361: PMLR.","journal-title":"Uncertainty in Artificial Intelligence"},{"key":"e_1_3_2_1_10_1","volume-title":"a","author":"Goodfellow I. J.","year":"2014","unstructured":"I. J. Goodfellow , J. Shlens , and C. J. a . p. a. Szegedy , \"Explaining and harnessing adversarial examples,\" 2014 . I. J. Goodfellow, J. Shlens, and C. J. a. p. a. Szegedy, \"Explaining and harnessing adversarial examples,\" 2014."},{"volume-title":"t. o. n. n. Li, and l. systems, \"Adversarial examples: Attacks and defenses for deep learning","author":"Yuan X.","key":"e_1_3_2_1_11_1","unstructured":"X. Yuan , P. He , Q. Zhu , X. J. I. t. o. n. n. Li, and l. systems, \"Adversarial examples: Attacks and defenses for deep learning ,\" vol. 30 , no. 9, pp. 2805-2824, 2019. X. Yuan, P. He, Q. Zhu, X. J. I. t. o. n. n. Li, and l. systems, \"Adversarial examples: Attacks and defenses for deep learning,\" vol. 30, no. 9, pp. 2805-2824, 2019."},{"key":"e_1_3_2_1_12_1","first-page":"933","volume-title":"Language modeling with gated convolutional networks,\" in International conference on machine learning","author":"Dauphin Y. N.","year":"2017","unstructured":"Y. N. Dauphin , A. Fan , M. Auli , and D. Grangier , \" Language modeling with gated convolutional networks,\" in International conference on machine learning , 2017 , pp. 933 - 941 : PMLR. Y. N. Dauphin, A. Fan, M. Auli, and D. Grangier, \"Language modeling with gated convolutional networks,\" in International conference on machine learning, 2017, pp. 933-941: PMLR."},{"key":"e_1_3_2_1_13_1","first-page":"1362","volume-title":"no. 1","author":"K\u00f6gel M.","year":"2011","unstructured":"M. K\u00f6gel and R. J. I. P. V. Findeisen , \" A fast gradient method for embedded linear predictive control,\" vol. 44 , no. 1 , pp. 1362 - 1367 , 2011 . M. K\u00f6gel and R. J. I. P. V. Findeisen, \"A fast gradient method for embedded linear predictive control,\" vol. 44, no. 1, pp. 1362-1367, 2011."},{"key":"e_1_3_2_1_14_1","unstructured":"A. Vaswani \"Attention is all you need \" vol. 30 2017.  A. Vaswani \"Attention is all you need \" vol. 30 2017."},{"key":"e_1_3_2_1_15_1","volume-title":"i. N. I. P. S. Lin, \"Understanding and improving layer normalization","author":"Xu J.","year":"2019","unstructured":"J. Xu , X. Sun , Z. Zhang , G. Zhao , and J. J. A. i. N. I. P. S. Lin, \"Understanding and improving layer normalization ,\" vol. 32 , 2019 . J. Xu, X. Sun, Z. Zhang, G. Zhao, and J. J. A. i. N. I. P. S. Lin, \"Understanding and improving layer normalization,\" vol. 32, 2019."},{"key":"e_1_3_2_1_16_1","first-page":"2096","volume-title":"no. 1","author":"Ganin Y.","year":"2016","unstructured":"Y. Ganin , \"Domain-adversarial training of neural networks,\" vol. 17 , no. 1 , pp. 2096 - 2030 , 2016 . Y. Ganin , \"Domain-adversarial training of neural networks,\" vol. 17, no. 1, pp. 2096-2030, 2016."},{"key":"e_1_3_2_1_17_1","unstructured":"A. Shafahi \"Adversarial training for free! \" vol. 32 2019.  A. Shafahi \"Adversarial training for free! \" vol. 32 2019."},{"key":"e_1_3_2_1_18_1","volume-title":"a","author":"Wong E.","year":"2020","unstructured":"E. Wong , L. Rice , and J. Z. J. a . p. a. Kolter, \"Fast is better than free: Revisiting adversarial training,\" 2020 . E. Wong, L. Rice, and J. Z. J. a. p. a. Kolter, \"Fast is better than free: Revisiting adversarial training,\" 2020."},{"key":"e_1_3_2_1_19_1","first-page":"10015","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"35","author":"Veness J.","year":"2021","unstructured":"J. Veness , \"Gated linear networks,\" in Proceedings of the AAAI Conference on Artificial Intelligence , 2021 , vol. 35 , no. 11, pp. 10015 - 10023 . J. Veness , \"Gated linear networks,\" in Proceedings of the AAAI Conference on Artificial Intelligence, 2021, vol. 35, no. 11, pp. 10015-10023."},{"key":"e_1_3_2_1_20_1","volume-title":"a","author":"Hendrycks D.","year":"2016","unstructured":"D. Hendrycks and K. J. a . p. a. Gimpel , \"Gaussian error linear units (gelus),\" 2016 . D. Hendrycks and K. J. a. p. a. Gimpel, \"Gaussian error linear units (gelus),\" 2016."},{"key":"e_1_3_2_1_21_1","first-page":"740","volume-title":"Common objects in context,\" in European conference on computer vision","author":"Lin T.-Y.","year":"2014","unstructured":"T.-Y. Lin , \"Microsoft coco : Common objects in context,\" in European conference on computer vision , 2014 , pp. 740 - 755 : Springer . T.-Y. Lin , \"Microsoft coco: Common objects in context,\" in European conference on computer vision, 2014, pp. 740-755: Springer."},{"volume-title":"Connecting language and vision using crowdsourced dense image annotations","author":"Krishna R.","key":"e_1_3_2_1_22_1","unstructured":"R. Krishna , \"Visual genome : Connecting language and vision using crowdsourced dense image annotations ,\" vol. 123 , no. 1, pp. 32-73, 2017. R. Krishna , \"Visual genome: Connecting language and vision using crowdsourced dense image annotations,\" vol. 123, no. 1, pp. 32-73, 2017."}],"event":{"name":"ICCPR 2022: 2022 11th International Conference on Computing and Pattern Recognition","acronym":"ICCPR 2022","location":"Beijing China"},"container-title":["Proceedings of the 2022 11th International Conference on Computing and Pattern Recognition"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581807.3581879","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581807.3581879","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:49:30Z","timestamp":1750182570000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581807.3581879"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,17]]},"references-count":22,"alternative-id":["10.1145\/3581807.3581879","10.1145\/3581807"],"URL":"https:\/\/doi.org\/10.1145\/3581807.3581879","relation":{},"subject":[],"published":{"date-parts":[[2022,11,17]]},"assertion":[{"value":"2023-05-22","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}