{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T02:50:12Z","timestamp":1742957412101,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030922726"},{"type":"electronic","value":"9783030922733"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-92273-3_48","type":"book-chapter","created":{"date-parts":[[2021,12,4]],"date-time":"2021-12-04T21:34:27Z","timestamp":1638653667000},"page":"585-596","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Mutual Information-Based Disentanglement Framework for\u00a0Cross-Modal Retrieval"],"prefix":"10.1007","author":[{"given":"Han","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaowang","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiachen","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaojuan","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunliu","family":"Dou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yue","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhiyong","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,12,5]]},"reference":[{"key":"48_CR1","series-title":"Springer Undergraduate Texts in Mathematics and Technology","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-8001-2","volume-title":"An Introduction to Single-User Information Theory","author":"F Alajaji","year":"2018","unstructured":"Alajaji, F., Chen, P.-N.: An Introduction to Single-User Information Theory. SUTMT, Springer, Singapore (2018). https:\/\/doi.org\/10.1007\/978-981-10-8001-2"},{"key":"48_CR2","unstructured":"Belghazi, M.I., et al.: Mutual information neural estimation. In: 35th International Conference on Machine Learning, pp. 530\u2013539. PMLR, Stockholmsm\u00e4ssan, Stockholm, Sweden (2018)"},{"issue":"8","key":"48_CR3","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: a review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell. 35(8), 1798\u20131828 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"48_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1007\/978-3-030-58601-0_33","volume-title":"Computer Vision \u2013 ECCV 2020","author":"T Chen","year":"2020","unstructured":"Chen, T., Deng, J., Luo, J.: Adaptive offline quintuplet loss for image-text matching. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12358, pp. 549\u2013565. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58601-0_33"},{"key":"48_CR5","unstructured":"Faghri, F., Fleet, D. J., Kiros, J. R., Fidler, S.: VSE++: improving visual-semantic embeddings with hard negatives. In: 29th British Machine Vision Conference, Article 12. BMVA Press Newcastle, UK (2018)"},{"key":"48_CR6","doi-asserted-by":"crossref","unstructured":"Guo, W., Huang, H., Kong, X., He, R.: Learning disentangled representation for cross-modal retrieval with deep mutual information estimation. In: 27th ACM International Conference on Multimedia, pp. 1712\u20131720. ACM, Nice, France (2019)","DOI":"10.1145\/3343031.3351053"},{"key":"48_CR7","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: 26th IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778. IEEE Computer Society, Las Vegas, NV, USA (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"48_CR8","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: 25th IEEE Conference on Computer Vision and Pattern Recognition, pp. 3128\u20133137. IEEE Computer Society, Boston, MA, USA (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"48_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1007\/978-3-030-01225-0_13","volume-title":"Computer Vision \u2013 ECCV 2018","author":"K-H Lee","year":"2018","unstructured":"Lee, K.-H., Chen, X., Hua, G., Hu, H., He, X.: Stacked cross attention for image-text matching. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11208, pp. 212\u2013228. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01225-0_13"},{"key":"48_CR10","doi-asserted-by":"crossref","unstructured":"Li, C., Deng, C., Li, N., Liu, W., Gao, X., Tao, D.: Self-supervised adversarial hashing networks for cross-modal retrieval. In: 28th IEEE Conference on Computer Vision and Pattern Recognition, pp. 4242\u20134251. IEEE Computer Society, Salt Lake City, UT, USA (2018)","DOI":"10.1109\/CVPR.2018.00446"},{"key":"48_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"48_CR12","doi-asserted-by":"crossref","unstructured":"Liu, C., Mao, Z., Liu, A. A., Zhang, T., Wang, B., Zhang, Y.: Focus your attention: a bidirectional focal attention network for image-text matching. In: 27th ACM International Conference on Multimedia, pp. 3\u201311. ACM, Nice, France (2019)","DOI":"10.1145\/3343031.3350869"},{"key":"48_CR13","doi-asserted-by":"crossref","unstructured":"Ma, D., Zhai, X., Peng, Y.: Cross-media retrieval by cluster-based correlation analysis. In: 20th IEEE International Conference on Image Processing, pp. 3986\u20133990. IEEE, Melbourne, Australia (2013)","DOI":"10.1109\/ICIP.2013.6738821"},{"issue":"9","key":"48_CR14","doi-asserted-by":"publisher","first-page":"2372","DOI":"10.1109\/TCSVT.2017.2705068","volume":"28","author":"Y Peng","year":"2017","unstructured":"Peng, Y., Huang, X., Zhao, Y.: An overview of cross-media retrieval: concepts, methodologies, benchmarks, and challenges. IEEE Trans. Circuits Syst. Video Technol. 28(9), 2372\u20132385 (2017)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"11","key":"48_CR15","doi-asserted-by":"publisher","first-page":"5585","DOI":"10.1109\/TIP.2018.2852503","volume":"27","author":"Y Peng","year":"2018","unstructured":"Peng, Y., Qi, J., Yuan, Y.: Modality-specific cross-modal similarity measurement with recurrent attention network. IEEE Trans. Image Process. 27(11), 5585\u20135599 (2018)","journal-title":"IEEE Trans. Image Process."},{"key":"48_CR16","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., Caicedo, J.C., Hockenmaier, J., Lazebnik, S.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. In: 15th IEEE International Conference on Computer Vision, pp. 2641\u20132649. IEEE Computer Society, Santiago, Chile (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"48_CR17","doi-asserted-by":"crossref","unstructured":"Rasiwasia, N., Costa Pereira, J., Coviello, E., Doyle, G., Lanckriet, G. R., Levy, R., Vasconcelos, N.: A new approach to cross-modal multimedia retrieval. In: 18th ACM International Conference on Multimedia, pp. 251\u2013260. ACM, Firenze, Italy (2010)","DOI":"10.1145\/1873951.1873987"},{"key":"48_CR18","doi-asserted-by":"crossref","unstructured":"Song, Y., Soleymani, M.: Polysemous visual-semantic embedding for cross-modal retrieval. In: 29th IEEE Conference on Computer Vision and Pattern Recognition, pp. 1979\u20131988. IEEE, Long Beach, CA, USA (2019)","DOI":"10.1109\/CVPR.2019.00208"},{"key":"48_CR19","doi-asserted-by":"crossref","unstructured":"Wang, B., Yang, Y., Xu, X., Hanjalic, A., Shen, H.T.: Adversarial cross-modal retrieval. In: 25th ACM International Conference on Multimedia, pp. 154\u2013162. ACM, Mountain View, CA, USA (2017)","DOI":"10.1145\/3123266.3123326"},{"key":"48_CR20","doi-asserted-by":"crossref","unstructured":"Wang, T., Xu, X., Yang, Y., Hanjalic, A., Shen, H.T., Song, J.: Matching images and text with multi-modal tensor fusion and re-ranking. In: 27th ACM International Conference on Multimedia, pp. 12\u201320. ACM, Nice, France (2019)","DOI":"10.1145\/3343031.3350875"},{"key":"48_CR21","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: Camp: cross-modal adaptive message passing for text-image retrieval. In: 17th IEEE International Conference on Computer Vision, pp. 5763\u20135772. IEEE, Seoul, Korea (South) (2019)","DOI":"10.1109\/ICCV.2019.00586"},{"key":"48_CR22","doi-asserted-by":"crossref","unstructured":"Wehrmann, J., Kolling, C., Barros, R. C.: Adaptive cross-modal embeddings for image-text alignment. In: 32nd AAAI Conference on Artificial Intelligence, pp. 12313\u201312320. AAAI Press, New York, NY, USA (2020)","DOI":"10.1609\/aaai.v34i07.6915"},{"key":"48_CR23","doi-asserted-by":"crossref","unstructured":"Wei, J., Xu, X., Yang, Y., Ji, Y., Wang, Z., Shen, H.T.: Universal weighting metric learning for cross-moda matching. In: 30th IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13002\u201313011. IEEE, Seattle, WA, USA (2020)","DOI":"10.1109\/CVPR42600.2020.01302"},{"key":"48_CR24","doi-asserted-by":"crossref","unstructured":"Wu, H., Merler, M., Uceda-Sosa, R., Smith, J. R.: Learning to make better mistakes: Semantics-aware visual food recognition. In: 24th ACM International Conference on Multimedia, pp. 172\u2013176. ACM, Amsterdam, The Netherlands (2016)","DOI":"10.1145\/2964284.2967205"},{"key":"48_CR25","doi-asserted-by":"crossref","unstructured":"Yang, Y., Xu, D., Nie, F., Luo, J., Zhuang, Y.: Ranking with local regression and global alignment for cross media retrieval. In: 17th ACM international Conference on Multimedia, pp. 175\u2013184. ACM, Vancouver, British Columbia, Canada (2009)","DOI":"10.1145\/1631272.1631298"},{"key":"48_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Photo stream question answer. In: 28th ACM International Conference on Multimedia, pp. 3966\u20133975. ACM, Virtual Event (2020)","DOI":"10.1145\/3394171.3413745"},{"issue":"2","key":"48_CR27","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TMM.2007.911822","volume":"10","author":"YT Zhuang","year":"2008","unstructured":"Zhuang, Y.T., Yang, Y., Wu, F.: Mining semantic correlation of heterogeneous multimedia data for cross-media retrieval. IEEE Trans. Multimedia 10(2), 221\u2013229 (2008)","journal-title":"IEEE Trans. Multimedia"},{"key":"48_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/978-3-030-01261-8_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"C Li","year":"2018","unstructured":"Li, C., Zhu, C., Huang, Y., Tang, J., Wang, L.: Cross-modal ranking with soft consistency and noisy labels for robust RGB-T tracking. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11217, pp. 831\u2013847. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_49"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-92273-3_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,12,4]],"date-time":"2021-12-04T21:42:31Z","timestamp":1638654151000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-92273-3_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030922726","9783030922733"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-92273-3_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"5 December 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sanur, Bali","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Indonesia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iconip2021.apnns.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1093","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"226","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"177","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.57","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic the conference was held online.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}