{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T06:39:35Z","timestamp":1743143975464,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030858988"},{"type":"electronic","value":"9783030858995"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-85899-5_19","type":"book-chapter","created":{"date-parts":[[2021,8,18]],"date-time":"2021-08-18T10:06:38Z","timestamp":1629281198000},"page":"253-266","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Multimodal Encoders for Food-Oriented Cross-Modal Retrieval"],"prefix":"10.1007","author":[{"given":"Ying","family":"Chen","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3310-8347","authenticated-orcid":false,"given":"Dong","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Lin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jun-mei","family":"Han","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,19]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Carvalho, M., Cad\u00e8ne, R., Picard, D., Soulier, L., Thome, N., Cord, M.: Cross-modal retrieval in the cooking context: learning semantic text-image embeddings. In: Proceedings of the 41st International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 35\u201344 (2018)","DOI":"10.1145\/3209978.3210036"},{"issue":"9","key":"19_CR2","doi-asserted-by":"publisher","first-page":"2372","DOI":"10.1109\/TCSVT.2017.2705068","volume":"28","author":"Y Peng","year":"2017","unstructured":"Peng, Y., Huang, X., Zhao, Y.: An overview of cross-media retrieval: concepts, methodologies, benchmarks, and challenges. IEEE Trans. Circuits Syst. Video Technol. 28(9), 2372\u20132385 (2017)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3","key":"19_CR3","doi-asserted-by":"publisher","first-page":"1393","DOI":"10.1109\/TIP.2017.2655449","volume":"26","author":"Y Wang","year":"2017","unstructured":"Wang, Y., Lin, X., Wu, L., Zhang, W.: Effective multi-query expansions: collaborative deep networks for robust landmark retrieval. IEEE Trans. Image Process. 26(3), 1393\u20131404 (2017)","journal-title":"IEEE Trans. Image Process."},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Wang, Y., Lin, X., Wu, L., Zhang, W., Zhang, Q.: LBMCH: learning bridging mapping for cross-modal hashing. In: Proceedings of the 38th international ACM SIGIR conference on research and development in information retrieval, pp. 999\u20131002 (2015)","DOI":"10.1145\/2766462.2767825"},{"issue":"4","key":"19_CR5","doi-asserted-by":"publisher","first-page":"1602","DOI":"10.1109\/TIP.2018.2878970","volume":"28","author":"L Wu","year":"2018","unstructured":"Wu, L., Wang, Y., Shao, L.: Cycle-consistent deep generative hashing for cross-modal retrieval. IEEE Trans. Image Process. 28(4), 1602\u20131612 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"19_CR6","doi-asserted-by":"crossref","unstructured":"Wang, B., Yang, Y., Xu, X., Hanjalic, A., Shen, H.T.: Adversarial cross-modal retrieval. In: Proceedings of the 25th ACM international conference on Multimedia, pp. 154\u2013162 (2017)","DOI":"10.1145\/3123266.3123326"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Zhu, B., Ngo, C.H., Chen, J.J., Hao, Y.: R2GAN: cross-modal recipe retrieval with generative adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11477\u201311486 (2019)","DOI":"10.1109\/CVPR.2019.01174"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Wang, H., Sahoo, D., Liu, C.H., Lim, E.P., Hoi, S.C.H.: Learning cross-modal embeddings with adversarial networks for cooking recipes and food images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11572\u201311581 (2019)","DOI":"10.1109\/CVPR.2019.01184"},{"key":"19_CR9","unstructured":"Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., Courville, A.: Improved training of wasserstein gans. arXiv:1704.00028 (2017)"},{"key":"19_CR10","doi-asserted-by":"crossref","unstructured":"Fu, H., Wu, R., Liu, C., Sun, J.: MCEN: bridging cross-modal gap between cooking recipes and dish images with latent variable model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14570\u201314580 (2020)","DOI":"10.1109\/CVPR42600.2020.01458"},{"key":"19_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"597","DOI":"10.1007\/978-3-319-46493-0_36","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Ghifary","year":"2016","unstructured":"Ghifary, M., Kleijn, W.B., Zhang, M., Balduzzi, D., Li, W.: Deep reconstruction-classification networks for unsupervised domain adaptation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 597\u2013613. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_36"},{"key":"19_CR12","unstructured":"Vaswani, A., et al.: Attention is all you need. arXiv:1706.03762 (2017)"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Salvador, A., et al.: Learning cross-modal embeddings for cooking recipes and food images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3020\u20133028 (2017)","DOI":"10.1109\/CVPR.2017.327"},{"issue":"2","key":"19_CR14","doi-asserted-by":"publisher","first-page":"825","DOI":"10.1007\/s11280-018-0581-2","volume":"22","author":"F Zou","year":"2018","unstructured":"Zou, F., Bai, X., Luan, C., Li, K., Wang, Y., Ling, H.: Semi-supervised cross-modal learning for cross modal retrieval and image annotation. World Wide Web 22(2), 825\u2013841 (2018)","journal-title":"World Wide Web"},{"issue":"2","key":"19_CR15","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1007\/s11280-018-0541-x","volume":"22","author":"X Xu","year":"2018","unstructured":"Xu, X., He, L., Lu, H., Gao, L., Ji, Y.: Deep adversarial metric learning for cross-modal retrieval. World Wide Web 22(2), 657\u2013672 (2018)","journal-title":"World Wide Web"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Yu, Z., Wang, W., Li, G.: Multi-step self-attention network for cross-modal retrieval Based on a limited text space. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 2082\u20132086 (2019)","DOI":"10.1109\/ICASSP.2019.8682424"},{"key":"19_CR17","doi-asserted-by":"crossref","unstructured":"Ye, L., Rochan, M., Liu, Z., Wang, Y.: Cross-modal self-attention network for referring image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.10502\u201310511 (2019)","DOI":"10.1109\/CVPR.2019.01075"},{"key":"19_CR18","doi-asserted-by":"publisher","first-page":"530","DOI":"10.1016\/j.ins.2017.08.026","volume":"432","author":"X Gao","year":"2018","unstructured":"Gao, X., Mu, T., Goulermas, J., Wang, M.: Attention driven multimodal similarity learning. Inf. Sci. 432, 530\u2013542 (2018)","journal-title":"Inf. Sci."},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Wang, J., Huang, H., Huang, X., Gong, Y.: Hashtag recommendation for multimodal microblog using co-attention network. In: Proceedings of the International Joint Conference on Artificial Intelligence, pp. 3420\u20133426 (2017)","DOI":"10.24963\/ijcai.2017\/478"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Fu, J., Liu, X., Huang, X.: Adaptive co-attention network for named entity recognition in tweets. In: Proceedings of the AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11962"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Ma, R., Zhang, Q., Wang, J., Cui, L., Huang, X.: Mention recommendation for multimodal microblog with cross-attention memory network. In: Proceedings of the 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, pp. 195\u2013204 (2018)","DOI":"10.1145\/3209978.3210026"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Lee, K., Chen, X., Hua, G., Hu, H., He, X.: Stacked cross attention for image-text matching. In: Proceedings of the European Conference on Computer Vision, pp. 201\u2013216 (2018)","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"19_CR23","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. arXiv:1409.3215 (2014)"},{"key":"19_CR24","unstructured":"Lu, J., Batra, D., Parikh, D., Lee, S.: ViLBERT: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv:1908.02265 (2019)"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Sun, C., Myers, A., Vondrick, C., Murphy, K., Schmid, C.: VideoBERT: a joint model for video and language representation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7464\u20137473 (2019)","DOI":"10.1109\/ICCV.2019.00756"},{"key":"19_CR26","doi-asserted-by":"crossref","unstructured":"Tsai, Y.H.H., et al.: Multimodal transformer for unaligned multimodal language sequences. In: Proceedings of the 57th Conference of the Association for Computational Linguistics, pp. 6558\u20136569 (2019)","DOI":"10.18653\/v1\/P19-1656"},{"issue":"12","key":"19_CR27","doi-asserted-by":"publisher","first-page":"4467","DOI":"10.1109\/TCSVT.2019.2947482","volume":"30","author":"J Yu","year":"2019","unstructured":"Yu, J., Li, J., Yu, Z., Huang, Q.M.: Multimodal transformer with multi-view visual representation for image captioning. IEEE Trans. Circuits Syst. Video Technol. 30(12), 4467\u20134480 (2019)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"19_CR28","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the 2016 IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"19_CR29","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Zan, Z., Li, L., Liu, J., Zhou, D.: Sentence-based and noise-robust cross-modal retrieval on cooking recipes and Food Images. In: Proceedings of the 2020 International Conference on Multimedia Retrieval, pp.117\u2013125 (2020)","DOI":"10.1145\/3372278.3390681"},{"key":"19_CR31","unstructured":"Rezende, D.J., Mohamed, S., Wierstra, D.: Stochastic backpropagation and approximate inference in deep generative models. In: Proceedings of the 31th International Conference on Machine Learning, pp.1278\u20131286 (2014)"},{"key":"19_CR32","doi-asserted-by":"publisher","unstructured":"Hotelling, H.: Relations between two sets of variates. In: Breakthroughs in Statistics, pp. 162\u2013190. Springer, New York (1992). https:\/\/doi.org\/10.1007\/978-1-4612-4380-9_14","DOI":"10.1007\/978-1-4612-4380-9_14"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-85899-5_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T15:08:51Z","timestamp":1673104131000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-85899-5_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030858988","9783030858995"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-85899-5_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"19 August 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"184","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"44","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.6","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6.38","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}