{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,9]],"date-time":"2026-01-09T14:16:09Z","timestamp":1767968169231,"version":"3.49.0"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["4192057"],"award-info":[{"award-number":["4192057"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9892312","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Contrastive Learning Based Visual Representation Enhancement for Multimodal Machine Translation"],"prefix":"10.1109","author":[{"given":"Shike","family":"Wang","sequence":"first","affiliation":[{"name":"School of Computer Science, Beijing Language and Culture University,Beijing,China"}]},{"given":"Wen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xiaomi AI Lab,Beijing,China"}]},{"given":"Wenyu","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Computer Science, Beijing Language and Culture University,Beijing,China"}]},{"given":"Dong","family":"Yu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Beijing Language and Culture University,Beijing,China"}]},{"given":"Pengyuan","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science, Beijing Language and Culture University,Beijing,China"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4746"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-3210"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413715"},{"key":"ref31","article-title":"Auxiliary tasks in multi-task learning","volume":"abs 1805 6334","author":"liebel","year":"2018","journal-title":"CoRR"},{"key":"ref30","author":"ba","year":"2016","journal-title":"Layer normalization"},{"key":"ref37","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics  - ACL '02"},{"key":"ref36","author":"kingma","year":"2017","journal-title":"Adam A method for stochastic optimization"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4718"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00166"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1422"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-4009"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.480"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref13","article-title":"A simple framework for contrastive learning of visual representations","volume":"abs 2002 5709","author":"chen","year":"2020","journal-title":"CoRR"},{"key":"ref14","article-title":"Understanding contrastive representation learning through alignment and uniformity on the hyper-sphere","volume":"abs 2005 10242","author":"wang","year":"2020","journal-title":"CoRR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"ref16","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4746"},{"key":"ref18","article-title":"Attention is all you need","volume":"abs 1706 3762","author":"vaswani","year":"2017","journal-title":"CoRR"},{"key":"ref19","first-page":"1320","author":"lin","year":"2020","journal-title":"Dynamic Context-Guided Capsule Network for Multimodal Machine Translation"},{"key":"ref28","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","volume":"abs 2006 9882","author":"caron","year":"2020","journal-title":"CoRR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1105"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3031549"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-2360"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.400"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00692"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1653"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1329"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.273"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-2346"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6442"},{"key":"ref1","article-title":"Sequence-to-sequence models can directly transcribe foreign speech","volume":"abs 1703 8581","author":"weiss","year":"2017","journal-title":"CoRR"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2020.06.010"},{"key":"ref22","author":"liu","year":"2021","journal-title":"Gumbel-attention for multi-modal machine translation"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6441"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.457"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.380"},{"key":"ref26","author":"hjelm","year":"2019","journal-title":"Learning deep representations by mutual information estimation and maximization"},{"key":"ref25","article-title":"Deep clustering for unsupervised learning of visual features","volume":"abs 1807 5520","author":"caron","year":"2018","journal-title":"CoRR"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","location":"Padua, Italy","start":{"date-parts":[[2022,7,18]]},"end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09892312.pdf?arnumber=9892312","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T22:58:28Z","timestamp":1667516308000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9892312\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9892312","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}