{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T13:10:05Z","timestamp":1756213805882,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,10]],"date-time":"2024-06-10T00:00:00Z","timestamp":1717977600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-sa\/4.0\/"}],"funder":[{"name":"JSPS KAKENHI Grant Numbers","award":["22H00540 and 22H00548"],"award-info":[{"award-number":["22H00540 and 22H00548"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,10]]},"DOI":"10.1145\/3643488.3660303","type":"proceedings-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T12:21:00Z","timestamp":1718108460000},"page":"1-4","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving Cross-Modal Recipe Embeddings with Cross Decoder"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2103-6825","authenticated-orcid":false,"given":"Jing","family":"Yang","sequence":"first","affiliation":[{"name":"The University of Electro-Communications, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1355-4128","authenticated-orcid":false,"given":"Junwen","family":"Chen","sequence":"additional","affiliation":[{"name":"The University of Electro-Communications, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0431-183X","authenticated-orcid":false,"given":"Keiji","family":"Yanai","sequence":"additional","affiliation":[{"name":"The University of Electro-Communications, Japan"}]}],"member":"320","published-online":{"date-parts":[[2024,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Mustafa Shukor Nicolas Thome\u00a0Matthieu Cord. 2023. Vision and Structured-Language Pretraining for Cross-Modal Food Retrieval. In arXiv:2212.04267v2.","DOI":"10.2139\/ssrn.4511116"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Ricardo Guerrero 2021. Cross-modal Retrieval and Synthesis (X-MRS): Closing the modality gap in shared representation learning. In ACMMM.","DOI":"10.1145\/3474085.3475465"},{"key":"e_1_3_2_1_3_1","unstructured":"Junnan Li 2021. Align before Fuse: Vision and Language Representation Learning with Momentum Distillation. In NIPS."},{"key":"e_1_3_2_1_4_1","unstructured":"Alec Radford 2021. Learning Transferable Visual Models From Natural Language Supervision. In ICML."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Amaia Salvador 2017. Learning Cross-Modal Embeddings for Cooking Recipes and Food Images. In CVPR.","DOI":"10.1109\/CVPR.2017.327"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Amaia Salvador 2021. Revamping Cross-Modal Recipe Retrieval with Hierarchical Transformers and Self-supervised Learning. In CVPR.","DOI":"10.1109\/CVPR46437.2021.01522"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Mustafa Shukor 2022. Transformer Decoders with MultiModal Regularization for Cross-Modal Food Retrieval. In CVPR.","DOI":"10.1109\/CVPRW56347.2022.00503"},{"key":"e_1_3_2_1_8_1","unstructured":"Ashish Vaswani 2017. Attention is all you need. In NIPS."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Hao Wang 2019. Learning Cross-Modal Embeddings With Adversarial Networks for Cooking Recipes and Food Images. In CVPR.","DOI":"10.1109\/CVPR.2019.01184"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Jing Yang Junwen Chen and Keiji Yanai. 2023. Transformer-Based Cross-Modal Recipe Embeddings with Large Batch Training. In MMM.","DOI":"10.1007\/978-3-031-27818-1_39"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Bin Zhu 2019. R2GAN: Cross-Modal Recipe Retrieval With Generative Adversarial Network. In CVPR.","DOI":"10.1109\/CVPR.2019.01174"}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Phuket Thailand","acronym":"ICMR '24"},"container-title":["The Fifth Workshop on Intelligent Cross-Data Analysis and Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643488.3660303","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643488.3660303","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T12:48:56Z","timestamp":1756212536000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643488.3660303"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,10]]},"references-count":11,"alternative-id":["10.1145\/3643488.3660303","10.1145\/3643488"],"URL":"https:\/\/doi.org\/10.1145\/3643488.3660303","relation":{},"subject":[],"published":{"date-parts":[[2024,6,10]]},"assertion":[{"value":"2024-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}