{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T15:53:06Z","timestamp":1742917986642,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031408366"},{"type":"electronic","value":"9783031408373"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-40837-3_17","type":"book-chapter","created":{"date-parts":[[2023,8,21]],"date-time":"2023-08-21T23:02:25Z","timestamp":1692658945000},"page":"280-300","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Event and\u00a0Entity Extraction from\u00a0Generated Video Captions"],"prefix":"10.1007","author":[{"given":"Johannes","family":"Scherer","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1762-1578","authenticated-orcid":false,"given":"Deepayan","family":"Bhowmik","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2653-9245","authenticated-orcid":false,"given":"Ansgar","family":"Scherp","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,22]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Aafaq, N., Mian, A., Liu, W., Gilani, S.Z., Shah, M.: Video description: a survey of methods, datasets, and evaluation metrics. ACM Comput. Surv. 52(6) (2019)","DOI":"10.1145\/3355390"},{"key":"17_CR2","unstructured":"Algur, S., Bhat, P.: Metadata construction model for web videos: a domain specific approach. IJECS 3 (2014)"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Algur, S.P., Bhat, P.: Web video mining: metadata predictive analysis using classification techniques. IJ Inf. Technol. Comput. Sci. 2 (2016)","DOI":"10.5815\/ijitcs.2016.02.09"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Del Corro, L., Gemulla, R.: Clausie: clause-based open information extraction. In: World Wide Web. ACM (2013)","DOI":"10.1145\/2488388.2488420"},{"key":"17_CR5","unstructured":"Devlin, J., Chang, M., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. CoRR abs\/1810.04805 (2018)"},{"key":"17_CR6","unstructured":"Fader, A., Soderland, S., Etzioni, O.: Identifying relations for open information extraction. In: EMNLP. ACL (2011)"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Galke, L., Scherp, A.: Bag-of-words vs. graph vs. sequence in text classification: questioning the necessity of text-graphs and the surprising strength of a wide MLP. In: Association for Computational Linguistics. ACL (2022)","DOI":"10.18653\/v1\/2022.acl-long.279"},{"key":"17_CR8","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3053249","author":"L Jiao","year":"2021","unstructured":"Jiao, L.: New generation deep learning for video object detection: a survey. IEEE Trans. Neural Netw. Learn. Syst. (2021). https:\/\/doi.org\/10.1109\/TNNLS.2021.3053249","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Kim, Y.: Convolutional neural networks for sentence classification. In: EMNLP. ACL (2014)","DOI":"10.3115\/v1\/D14-1181"},{"key":"17_CR10","doi-asserted-by":"publisher","unstructured":"Krishna, R., Hata, K., Ren, F., Fei-Fei, L., Niebles, J.C.: Dense-captioning events in videos. In: ICCV (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.83","DOI":"10.1109\/ICCV.2017.83"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Lavie, A., Agarwal, A.: METEOR: an automatic metric for MT evaluation with high levels of correlation with human judgments. In: Workshop on Statistical Machine Translation. ACL (2007)","DOI":"10.3115\/1626355.1626389"},{"key":"17_CR12","unstructured":"Li, Q., et al.: A survey on text classification: from shallow to deep learning. CoRR abs\/2008.00364 (2020)"},{"key":"17_CR13","doi-asserted-by":"publisher","unstructured":"Li, Y., Yao, T., Pan, Y., Chao, H., Mei, T.: Jointly localizing and describing events for dense video captioning. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00782","DOI":"10.1109\/CVPR.2018.00782"},{"key":"17_CR14","doi-asserted-by":"publisher","unstructured":"Long, X., Gan, C., de Melo, G., Wu, J., Liu, X., Wen, S.: Attention clusters: purely attention based local feature integration for video classification. In: CVPR (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00817","DOI":"10.1109\/CVPR.2018.00817"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Miller, G.A.: WordNet: a lexical database for English. Commun. ACM 38(11) (1995)","DOI":"10.1145\/219717.219748"},{"key":"17_CR16","unstructured":"Niklaus, C., Cetto, M., Freitas, A., Handschuh, S.: A survey on open information extraction. In: International Conference Computer Linguistics. ACL (2018)"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: ACL. ACL (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"17_CR18","unstructured":"Sarhan, I.; Spruit, M.; Atzmueller, M.; Duivesteijn, W.: Uncovering algorithmic approaches in open information extraction: a literature review. In: 30th Benelux Conference on Artificial Intelligence (2018). https:\/\/dspace.library.uu.nl\/handle\/1874\/374300"},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Sarvas, R., Herrarte, E., Wilhelm, A., Davis, M.: Metadata creation system for mobile images. In: MobiSys. ACM (2004)","DOI":"10.1145\/990064.990072"},{"key":"17_CR20","doi-asserted-by":"publisher","unstructured":"Scherer, J., Scherp, A., Bhowmik, D.: Semantic metadata extraction from dense video captioning. CoRR abs\/2211.02982 (2022). https:\/\/doi.org\/10.48550\/arXiv.2211.02982","DOI":"10.48550\/arXiv.2211.02982"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Shang, X., Di, D., Xiao, J., Cao, Y., Yang, X., Chua, T.S.: Annotating objects and relations in user-generated videos. In: Multimedia Retrieval. ACM (2019)","DOI":"10.1145\/3323873.3325056"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Shang, X., Ren, T., Guo, J., Zhang, H., Chua, T.S.: Video visual relation detection. In: Multimedia. ACM (2017)","DOI":"10.1145\/3123266.3123380"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Thomee, B., et al.: YFCC100M: the new data in multimedia research. Commun. ACM 59(2) (2016)","DOI":"10.1145\/2812802"},{"key":"17_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., et al.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhang, R., Lu, Z., Zheng, F., Cheng, R., Luo, P.: End-to-end dense video captioning with parallel decoding. ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00677"},{"key":"17_CR26","doi-asserted-by":"publisher","unstructured":"Zhou, H., et al.: Exploring emotion features and fusion strategies for audio-video emotion recognition. In: 2019 International Conference on Multimodal Interaction (2019). https:\/\/doi.org\/10.1145\/3340555.3355713","DOI":"10.1145\/3340555.3355713"},{"key":"17_CR27","doi-asserted-by":"publisher","unstructured":"Zhou, L., Zhou, Y., Corso, J.J., Socher, R., Xiong, C.: End-to-end dense video captioning with masked transformer. In: CVPR (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00911","DOI":"10.1109\/CVPR.2018.00911"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Extraction"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-40837-3_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T11:31:50Z","timestamp":1710329510000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-40837-3_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031408366","9783031408373"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-40837-3_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CD-MAKE","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Cross-Domain Conference for Machine Learning and Knowledge Extraction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Benevento","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cd-make2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/cd-make.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"60% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}