{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T20:08:06Z","timestamp":1778789286292,"version":"3.51.4"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031200588","type":"print"},{"value":"9783031200595","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-20059-5_37","type":"book-chapter","created":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T16:02:50Z","timestamp":1666972970000},"page":"644-661","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["NewsStories: Illustrating Articles with\u00a0Visual Summaries"],"prefix":"10.1007","author":[{"given":"Reuben","family":"Tan","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bryan A.","family":"Plummer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kate","family":"Saenko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"JP","family":"Lewis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Avneesh","family":"Sud","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Leung","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,29]]},"reference":[{"key":"37_CR1","unstructured":"Aneja, S., Bregler, C., Nie\u00dfner, M.: COSMOS: catching out-of-context misinformation with self-supervised learning. CoRR abs\/2101.06278 (2021). https:\/\/arxiv.org\/abs\/2101.06278"},{"key":"37_CR2","doi-asserted-by":"publisher","unstructured":"Antol, S., et al.: VQA: visual question answering. In: 2015 IEEE International Conference on Computer Vision, ICCV 2015, Santiago, Chile, 7\u201313 December 2015, pp. 2425\u20132433. IEEE Computer Society (2015). https:\/\/doi.org\/10.1109\/ICCV.2015.279","DOI":"10.1109\/ICCV.2015.279"},{"key":"37_CR3","doi-asserted-by":"crossref","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: Frozen in time: a joint video and image encoder for end-to-end retrieval. CoRR abs\/2104.00650 (2021). https:\/\/arxiv.org\/abs\/2104.00650","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"37_CR4","doi-asserted-by":"crossref","unstructured":"Biten, A.F., Gomez, L., Rusinol, M., Karatzas, D.: Good news, everyone! context driven entity-aware captioning for news images. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12466\u201312475 (2019)","DOI":"10.1109\/CVPR.2019.01275"},{"key":"37_CR5","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.E.: A simple framework for contrastive learning of visual representations. In: Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13\u201318 July 2020, Virtual Event, pp. 1597\u20131607 (2020). https:\/\/proceedings.mlr.press\/v119\/chen20j.html"},{"key":"37_CR6","doi-asserted-by":"crossref","unstructured":"Chun, S., Oh, S.J., de Rezende, R.S., Kalantidis, Y., Larlus, D.: Probabilistic embeddings for cross-modal retrieval. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, 19\u201325 June 2021 (2021). https:\/\/openaccess.thecvf.com\/content\/CVPR2021\/html\/Chun_Probabilistic_Embeddings_for_Cross-Modal_Retrieval_CVPR_2021_paper.html","DOI":"10.1109\/CVPR46437.2021.00831"},{"key":"37_CR7","unstructured":"https:\/\/commoncrawl.org"},{"key":"37_CR8","doi-asserted-by":"crossref","unstructured":"Desai, K., Johnson, J.: Virtex: Learning visual representations from textual annotations. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, virtual, 19\u201325 June 2021, pp. 11162\u201311173. Computer Vision Foundation \/ IEEE (2021). https:\/\/openaccess.thecvf.com\/content\/CVPR2021\/html\/Desai_VirTex_Learning_Visual_Representations_From_Textual_Annotations_CVPR_2021_paper.html","DOI":"10.1109\/CVPR46437.2021.01101"},{"issue":"1\u20132","key":"37_CR9","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/S0004-3702(96)00034-3","volume":"89","author":"TG Dietterich","year":"1997","unstructured":"Dietterich, T.G., Lathrop, R.H., Lozano-P\u00e9rez, T.: Solving the multiple instance problem with axis-parallel rectangles. Artif. Intell. 89(1\u20132), 31\u201371 (1997). https:\/\/doi.org\/10.1016\/S0004-3702(96)00034-3","journal-title":"Artif. Intell."},{"key":"37_CR10","doi-asserted-by":"publisher","unstructured":"Faghri, F., Fleet, D.J., Kiros, J.R., Fidler, S.: VSE++: improving visual-semantic embeddings with hard negatives. In: British Machine Vision Conference 2018, BMVC 2018, Newcastle, UK, 3\u20136 September 2018, p. 12. BMVA Press (2018). https:\/\/doi.org\/10.1016\/S0004-3702(96)00034-3, https:\/\/bmvc2018.org\/contents\/papers\/0344.pdf","DOI":"10.1016\/S0004-3702(96)00034-3"},{"key":"37_CR11","unstructured":"Frome, A., Corrado, G.S., Shlens, J., Bengio, S., Dean, J., Ranzato, M., Mikolov, T.: DeViSE: a deep visual-semantic embedding model. In: Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a Meeting held 5\u20138 December 2013, Lake Tahoe, Nevada, United States (2013). https:\/\/proceedings.neurips.cc\/paper\/2013\/hash\/7cce53cf90577442771720a370c3c723-Abstract.html"},{"key":"37_CR12","doi-asserted-by":"crossref","unstructured":"Gu, X., et al.: Generating representative headlines for news stories. In: Proceeding of the the Web Conference 2020 (2020)","DOI":"10.1145\/3366423.3380247"},{"key":"37_CR13","unstructured":"Gurevych, I., Miyao, Y. (eds.): Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, ACL 2018, Melbourne, Australia, 15\u201320 July 2018, Volume 1: Long Papers. Association for Computational Linguistics (2018). https:\/\/aclanthology.org\/volumes\/P18-1\/"},{"key":"37_CR14","unstructured":"Huang, T.K., et al.: Visual storytelling. CoRR abs\/1604.03968 (2016). https:\/\/arxiv.org\/abs\/1604.03968"},{"key":"37_CR15","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: Proceedings of the 38th International Conference on Machine Learning, ICML (2021)"},{"key":"37_CR16","doi-asserted-by":"publisher","unstructured":"Joulin, A., van der Maaten, L., Jabri, A., Vasilache, N.: Learning visual features from large weakly supervised data. In: Computer Vision - ECCV 2016\u201314th European Conference, Amsterdam, The Netherlands, 11\u201314 October 2016, Proceedings, Part VII (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_5","DOI":"10.1007\/978-3-319-46478-7_5"},{"key":"37_CR17","doi-asserted-by":"publisher","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2015, Boston, MA, USA, 7\u201312 June 2015, pp. 3128\u20133137. IEEE Computer Society (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7298932","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"37_CR18","unstructured":"Kim, G., Moon, S., Sigal, L.: Ranking and retrieval of image sequences from multiple paragraph queries. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1993\u20132001 (2015)"},{"key":"37_CR19","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, 7\u20139 May 2015, Conference Track Proceedings (2015). https:\/\/arxiv.org\/abs\/1412.6980"},{"key":"37_CR20","doi-asserted-by":"crossref","unstructured":"Lee, K.H., Chen, X., Hua, G., Hu, H., He, X.: Stacked cross attention for image-text matching. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 201\u2013216 (2018)","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"37_CR21","doi-asserted-by":"publisher","unstructured":"Li, A., Jabri, A., Joulin, A., van der Maaten, L.: Learning visual n-grams from web data. In: IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, 22\u201329 October 2017, pp. 4193\u20134202. IEEE Computer Society (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.449https:\/\/doi.ieeecomputersociety.org\/10.1109\/ICCV.2017.449","DOI":"10.1109\/ICCV.2017.449"},{"key":"37_CR22","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, X., Gao, S., Chan, Z., Zhao, D., Yan, R.: Vmsmo: learning to generate multimodal summary for video-based news articles. arXiv preprint arXiv:2010.05406 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.752"},{"key":"37_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"37_CR24","doi-asserted-by":"crossref","unstructured":"Liu, F., Wang, Y., Wang, T., Ordonez, V.: Visual news: benchmark and challenges in news image captioning. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 6761\u20136771 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.542"},{"key":"37_CR25","doi-asserted-by":"crossref","unstructured":"Liu, J., Liu, T., Yu, C.: NewsEmbed: modeling news through pre-trained document representations. arXiv preprint arXiv:2106.00590 (2021)","DOI":"10.1145\/3447548.3467392"},{"key":"37_CR26","doi-asserted-by":"crossref","unstructured":"Loper, E., Bird, S.: NLTK: the natural language toolkit. CoRR cs.CL\/0205028 (2002). https:\/\/dblp.uni-trier.de\/db\/journals\/corr\/corr0205.html#cs-CL-0205028","DOI":"10.3115\/1118108.1118117"},{"key":"37_CR27","doi-asserted-by":"publisher","unstructured":"Miech, A., Alayrac, J., Smaira, L., Laptev, I., Sivic, J., Zisserman, A.: End-to-end learning of visual representations from uncurated instructional videos. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2020, Seattle, WA, USA, 13\u201319 June 2020 (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.00990, https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Miech_End-to-End_Learning_of_Visual_Representations_From_Uncurated_Instructional_Videos_CVPR_2020_paper.html","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"37_CR28","unstructured":"Oh, S.J., Murphy, K.P., Pan, J., Roth, J., Schroff, F., Gallagher, A.C.: Modeling uncertainty with hedged instance embeddings. In: 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, 6\u20139 May 2019 (2019). https:\/\/openreview.net\/forum?id=r1xQQhAqKX"},{"key":"37_CR29","unstructured":"van den Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. CoRR abs\/1807.03748 (2018). https:\/\/arxiv.org\/abs\/1807.03748"},{"key":"37_CR30","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning, ICML (2021)"},{"key":"37_CR31","doi-asserted-by":"publisher","unstructured":"Sariyildiz, M.B., Perez, J., Larlus, D.: Learning visual representations with caption annotations. In: Computer Vision - ECCV 2020\u201316th European Conference, Glasgow, UK, 23\u201328 August 2020, Proceedings, Part VIII (2020). https:\/\/doi.org\/10.1007\/978-3-030-58598-3_10","DOI":"10.1007\/978-3-030-58598-3_10"},{"key":"37_CR32","doi-asserted-by":"crossref","unstructured":"Song, Y., Soleymani, M.: Polysemous visual-semantic embedding for cross-modal retrieval. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR (2019)","DOI":"10.1109\/CVPR.2019.00208"},{"key":"37_CR33","doi-asserted-by":"publisher","unstructured":"Suhr, A., Zhou, S., Zhang, A., Zhang, I., Bai, H., Artzi, Y.: A corpus for reasoning about natural language grounded in photographs. In: Proceedings of the 57th Conference of the Association for Computational Linguistics, ACL 2019, Florence, Italy, July 28- August 2 2019, Volume 1: Long Papers (2019). https:\/\/doi.org\/10.18653\/v1\/p19-1644","DOI":"10.18653\/v1\/p19-1644"},{"key":"37_CR34","doi-asserted-by":"publisher","unstructured":"Tan, R., Plummer, B., Saenko, K.: Detecting cross-modal inconsistency to defend against neural fake news. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 2081\u20132106. Association for Computational Linguistics (2020). https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.163, https:\/\/aclanthology.org\/2020.emnlp-main.163","DOI":"10.18653\/v1\/2020.emnlp-main.163"},{"key":"37_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/978-3-030-58523-5_19","volume-title":"Computer Vision \u2013 ECCV 2020","author":"C Thomas","year":"2020","unstructured":"Thomas, C., Kovashka, A.: Preserving semantic neighborhoods for robust cross-modal retrieval. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12363, pp. 317\u2013335. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58523-5_19"},{"key":"37_CR36","doi-asserted-by":"crossref","unstructured":"Tran, A., Mathews, A., Xie, L.: Transform and tell: entity-aware news image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13035\u201313045 (2020)","DOI":"10.1109\/CVPR42600.2020.01305"},{"key":"37_CR37","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"37_CR38","doi-asserted-by":"crossref","unstructured":"Wang, L., Li, Y., Lazebnik, S.: Learning deep structure-preserving image-text embeddings. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5005\u20135013 (2016)","DOI":"10.1109\/CVPR.2016.541"},{"key":"37_CR39","doi-asserted-by":"crossref","unstructured":"Yamada, I., Asai, A., Shindo, H., Takeda, H., Matsumoto, Y.: LUKE: deep contextualized entity representations with entity-aware self-attention. arXiv preprint arXiv:2010.01057 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.523"},{"key":"37_CR40","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young, P., Lai, A., Hodosh, M., Hockenmaier, J.: From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans. Assoc. Compu. Linguist. 2, 67\u201378 (2014)","journal-title":"Trans. Assoc. Compu. Linguist."},{"key":"37_CR41","unstructured":"Zhang, Y., Jiang, H., Miura, Y., Manning, C.D., Langlotz, C.P.: Contrastive learning of medical visual representations from paired images and text. CoRR abs\/2010.00747 (2020). https:\/\/arxiv.org\/abs\/2010.00747"},{"key":"37_CR42","doi-asserted-by":"publisher","unstructured":"Zhu, Y., Groth, O., Bernstein, M.S., Fei-Fei, L.: Visual7W: grounded question answering in images. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2016, Las Vegas, NV, USA, 27\u201330 June 2016, pp. 4995\u20135004. IEEE Computer Society (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.540,https:\/\/doi.org\/10.1109\/CVPR.2016.540","DOI":"10.1109\/CVPR.2016.540,"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-20059-5_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,28]],"date-time":"2022-10-28T16:14:45Z","timestamp":1666973685000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-20059-5_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031200588","9783031200595"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-20059-5_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"29 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}