{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T16:21:33Z","timestamp":1761582093502,"version":"3.37.3"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6,6]]},"DOI":"10.1109\/icassp39728.2021.9413389","type":"proceedings-article","created":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T19:53:45Z","timestamp":1620935625000},"page":"7548-7552","source":"Crossref","is-referenced-by-count":6,"title":["Image-Assisted Transformer in Zero-Resource Multi-Modal Translation"],"prefix":"10.1109","author":[{"given":"Ping","family":"Huang","sequence":"first","affiliation":[{"name":"East China Normal University,School of Computer Science and Technology,Shanghai,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shiliang","family":"Sun","sequence":"additional","affiliation":[{"name":"East China Normal University,School of Computer Science and Technology,Shanghai,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Yang","sequence":"additional","affiliation":[{"name":"Huawei Technologies CO., LTD,2012 Labs,Beijing,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1009"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-3210"},{"key":"ref12","first-page":"13","article-title":"The iapr tc-12 benchmark: A new evaluation resource for visual information systems","volume":"2","author":"grubinger","year":"2006","journal-title":"Int Workshop OntoImage"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.3115\/1557769.1557821"},{"key":"ref15","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proceedings of the 40th Annual Meeting on Association for Computational Linguistics  - ACL '02"},{"article-title":"Zero-resource neural machine translation with multi-agent communi-cation game","year":"2018","author":"chen","key":"ref16"},{"article-title":"Emergent translation in multi-agent communication","year":"2017","author":"lee","key":"ref17"},{"key":"ref18","first-page":"10482","article-title":"Unsupervised multi-modal neural ma-chine translation","author":"su","year":"2019","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1422"},{"article-title":"Neural machine translation by jointly learning to align and translate","year":"2014","author":"bahdanau","key":"ref4"},{"key":"ref3","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref6","first-page":"6000","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6439"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/685"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018207"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s10590-017-9197-z"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/s10590-020-09250-0"},{"key":"ref9","first-page":"5926","article-title":"Mass: Masked sequence to sequence pre-training for language generation","author":"song","year":"2019","journal-title":"International Conference on Machine Learning"}],"event":{"name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2021,6,6]]},"location":"Toronto, ON, Canada","end":{"date-parts":[[2021,6,11]]}},"container-title":["ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9413349\/9413350\/09413389.pdf?arnumber=9413389","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,3]],"date-time":"2022-08-03T00:20:38Z","timestamp":1659486038000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9413389\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,6]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/icassp39728.2021.9413389","relation":{},"subject":[],"published":{"date-parts":[[2021,6,6]]}}}