{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:25:33Z","timestamp":1777656333559,"version":"3.51.4"},"publisher-location":"Cham","reference-count":66,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031732010","type":"print"},{"value":"9783031732027","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T00:00:00Z","timestamp":1732147200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73202-7_3","type":"book-chapter","created":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T14:16:54Z","timestamp":1732112214000},"page":"39-56","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Discovering Novel Actions from\u00a0Open World Egocentric Videos with\u00a0Object-Grounded Visual Commonsense Reasoning"],"prefix":"10.1007","author":[{"given":"Sanjoy","family":"Kundu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shubham","family":"Trehan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1062-8929","authenticated-orcid":false,"given":"Sathyanarayanan N.","family":"Aakur","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,21]]},"reference":[{"key":"3_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"70","DOI":"10.1007\/978-3-031-19839-7_5","volume-title":"Computer Vision \u2013 ECCV 2022","author":"S Aakur","year":"2022","unstructured":"Aakur, S., Sarkar, S.: Actor-centered representations for action localization in streaming videos. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022, Part XXXVIII. LNCS, vol. 13698, pp. 70\u201387. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19839-7_5"},{"issue":"2","key":"3_CR2","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1090\/qam\/1530","volume":"77","author":"S Aakur","year":"2019","unstructured":"Aakur, S., de Souza, F., Sarkar, S.: Generating open world descriptions of video using common sense knowledge in a pattern theory framework. Q. Appl. Math. 77(2), 323\u2013356 (2019)","journal-title":"Q. Appl. Math."},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Aakur, S.N., Bagavathi, A.: Unsupervised gaze prediction in egocentric videos by energy-based surprise modeling. In: International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications (2021)","DOI":"10.5220\/0010288009350942"},{"key":"3_CR4","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.patrec.2022.03.007","volume":"156","author":"SN Aakur","year":"2022","unstructured":"Aakur, S.N., Kundu, S., Gunti, N.: Knowledge guided learning: open world egocentric action recognition with zero supervision. Pattern Recogn. Lett. 156, 38\u201345 (2022)","journal-title":"Pattern Recogn. Lett."},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Aakur, S.N., Sarkar, S.: Leveraging symbolic knowledge bases for commonsense natural language inference using pattern theory. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3287837"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Ashutosh, K., Girdhar, R., Torresani, L., Grauman, K.: HierVL: learning hierarchical video-language embeddings (2023)","DOI":"10.1109\/CVPR52729.2023.02209"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: Frozen in time: a joint video and image encoder for end-to-end retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1728\u20131738 (2021)","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"3_CR8","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: International Conference on Machine Learning, pp. 813\u2013824. PMLR (2021)"},{"key":"3_CR9","unstructured":"Bommasani, R., et\u00a0al.: On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Bosselut, A., Rashkin, H., Sap, M., Malaviya, C., Celikyilmaz, A., Choi, Y.: COMET: commonsense transformers for automatic knowledge graph construction. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pp. 4762\u20134779 (2019)","DOI":"10.18653\/v1\/P19-1470"},{"key":"3_CR11","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR12","unstructured":"Bubeck, S., et\u00a0al.: Sparks of artificial general intelligence: early experiments with GPT-4. arXiv preprint arXiv:2303.12712 (2023)"},{"key":"3_CR13","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)"},{"key":"3_CR14","unstructured":"Clark, K., Luong, M.T., Le, Q.V., Manning, C.D.: ELECTRA: pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555 (2020)"},{"issue":"11","key":"3_CR15","doi-asserted-by":"publisher","first-page":"4125","DOI":"10.1109\/TPAMI.2020.2991965","volume":"43","author":"D Damen","year":"2021","unstructured":"Damen, D., et al.: The epic-kitchens dataset: collection, challenges and baselines. IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI) 43(11), 4125\u20134141 (2021). https:\/\/doi.org\/10.1109\/TPAMI.2020.2991965","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell. (TPAMI)"},{"issue":"11","key":"3_CR16","doi-asserted-by":"publisher","first-page":"4125","DOI":"10.1109\/TPAMI.2020.2991965","volume":"43","author":"D Damen","year":"2020","unstructured":"Damen, D., et al.: The epic-kitchens dataset: collection, challenges and baselines. IEEE Trans. Pattern Anal. Mach. Intell. 43(11), 4125\u20134141 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3_CR17","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/s11263-021-01531-2","volume":"130","author":"D Damen","year":"2022","unstructured":"Damen, D., et al.: Rescaling egocentric vision: collection, pipeline and challenges for epic-kitchens-100. Int. J. Comput. Vision (IJCV) 130, 33\u201355 (2022). https:\/\/doi.org\/10.1007\/s11263-021-01531-2","journal-title":"Int. J. Comput. Vision (IJCV)"},{"key":"3_CR18","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"3_CR19","unstructured":"Dong, N., Zhang, Y., Ding, M., Lee, G.H.: Open world DETR: transformer based open world object detection. arXiv preprint arXiv:2212.02969 (2022)"},{"key":"3_CR20","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Du, Y., Wei, F., Zhang, Z., Shi, M., Gao, Y., Li, G.: Learning to prompt for open-vocabulary object detection with vision-language model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14084\u201314093 (2022)","DOI":"10.1109\/CVPR52688.2022.01369"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Fan, C.: EgoVQA-an egocentric video question answering benchmark dataset. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00536"},{"key":"3_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"314","DOI":"10.1007\/978-3-642-33718-5_23","volume-title":"Computer Vision \u2013 ECCV 2012","author":"A Fathi","year":"2012","unstructured":"Fathi, A., Li, Y., Rehg, J.M.: Learning to recognize daily actions using gaze. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7572, pp. 314\u2013327. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33718-5_23"},{"key":"3_CR24","unstructured":"Grauman, K., et\u00a0al.: Ego4D: around the world in 3,000 hours of egocentric video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18995\u201319012 (2022)"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Grenander, U.: Elements of Pattern Theory. JHU Press (1996)","DOI":"10.56021\/9780801851872"},{"key":"3_CR26","unstructured":"Gu, X., Lin, T.Y., Kuo, W., Cui, Y.: Open-vocabulary object detection via vision and language knowledge distillation. In: International Conference on Learning Representations (2021). https:\/\/api.semanticscholar.org\/CorpusID:238744187"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Han, S., et al.: MEgATrack: monochrome egocentric articulated hand-tracking for virtual reality. ACM Trans. Graph. (ToG) 39(4), 87-1 (2020)","DOI":"10.1145\/3386569.3392452"},{"key":"3_CR28","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision (2021)"},{"key":"3_CR29","unstructured":"Jiang, J., Ahn, S.: Generative neurosymbolic machines. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems, vol.\u00a033, pp. 12572\u201312582. Curran Associates, Inc. (2020). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/94c28dcfc97557df0df6d1f7222fc384-Paper.pdf"},{"key":"3_CR30","unstructured":"Kay, W., et\u00a0al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"3_CR31","first-page":"18661","volume":"33","author":"P Khosla","year":"2020","unstructured":"Khosla, P., et al.: Supervised contrastive learning. Adv. Neural. Inf. Process. Syst. 33, 18661\u201318673 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR32","doi-asserted-by":"crossref","unstructured":"Kundu, S., Aakur, S.N.: IS-GGT: iterative scene graph generation with generative transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6292\u20136301 (2023)","DOI":"10.1109\/CVPR52729.2023.00609"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Li, H., Cai, Y., Zheng, W.S.: Deep dual relation modeling for egocentric interaction recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7932\u20137941 (2019)","DOI":"10.1109\/CVPR.2019.00812"},{"key":"3_CR34","unstructured":"Li, Y., et al.: Supervision exists everywhere: a data efficient contrastive language-image pre-training paradigm (2022)"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Li, Y., Fathi, A., Rehg, J.M.: Learning to predict gaze in egocentric video. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3216\u20133223 (2013)","DOI":"10.1109\/ICCV.2013.399"},{"key":"3_CR36","first-page":"7575","volume":"35","author":"KQ Lin","year":"2022","unstructured":"Lin, K.Q., et al.: Egocentric video-language pretraining. Adv. Neural. Inf. Process. Syst. 35, 7575\u20137586 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR37","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"3_CR38","doi-asserted-by":"crossref","unstructured":"Lu, Z., Grauman, K.: Story-driven summarization for egocentric video. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2714\u20132721 (2013)","DOI":"10.1109\/CVPR.2013.350"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Ma, M., Fan, H., Kitani, K.M.: Going deeper into first-person activity recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1894\u20131903 (2016)","DOI":"10.1109\/CVPR.2016.209"},{"key":"3_CR40","doi-asserted-by":"crossref","unstructured":"Maguire, M.J., Dove, G.O.: Speaking of events: event word learning and event representation. In: Understanding Events: How Humans See, Represent, and Act on Events, pp. 193\u2013218 (2008)","DOI":"10.1093\/acprof:oso\/9780195188370.003.0009"},{"key":"3_CR41","unstructured":"Menon, S., Vondrick, C.: Visual classification via description from large language models. arXiv preprint arXiv:2210.07183 (2022)"},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Miech, A., Alayrac, J.B., Smaira, L., Laptev, I., Sivic, J., Zisserman, A.: End-to-end learning of visual representations from uncurated instructional videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9879\u20139889 (2020)","DOI":"10.1109\/CVPR42600.2020.00990"},{"key":"3_CR43","doi-asserted-by":"crossref","unstructured":"Miech, A., Zhukov, D., Alayrac, J.B., Tapaswi, M., Laptev, I., Sivic, J.: HowTo100M: learning a text-video embedding by watching hundred million narrated video clips. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2630\u20132640 (2019)","DOI":"10.1109\/ICCV.2019.00272"},{"key":"3_CR44","doi-asserted-by":"crossref","unstructured":"Mounir, R., Shahabaz, A., Gula, R., Theuerkauf, J., Sarkar, S.: Towards automated ethogramming: Cognitively-inspired event segmentation for streaming wildlife video monitoring. Int. J. Comput. Vision 1\u201331 (2023)","DOI":"10.1007\/s11263-023-01781-2"},{"key":"3_CR45","first-page":"25192","volume":"34","author":"M Nye","year":"2021","unstructured":"Nye, M., Tessler, M., Tenenbaum, J., Lake, B.M.: Improving coherence and consistency in neural sequence models with dual-system, neuro-symbolic reasoning. Adv. Neural. Inf. Process. Syst. 34, 25192\u201325204 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3_CR46","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: GloVe: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"3_CR47","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"3_CR48","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Improving language understanding by generative pre-training (2018)"},{"issue":"8","key":"3_CR49","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI blog 1(8), 9 (2019)","journal-title":"OpenAI blog"},{"key":"3_CR50","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S., Rothrock, B., Matthies, L.: Pooled motion features for first-person videos. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7298691"},{"key":"3_CR51","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Gupta, A., Schmid, C., Farhadi, A., Alahari, K.: Actor and observer: joint modeling of first and third-person videos. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7396\u20137404 (2018)","DOI":"10.1109\/CVPR.2018.00772"},{"key":"3_CR52","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. Adv. Neural Inf. Process. Syst. 27 (2014)"},{"key":"3_CR53","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1016\/j.patrec.2016.01.028","volume":"72","author":"FD de Souza","year":"2016","unstructured":"de Souza, F.D., Sarkar, S., Srivastava, A., Su, J.: Pattern theory for representation and inference of semantic structures in videos. Pattern Recogn. Lett. 72, 41\u201351 (2016)","journal-title":"Pattern Recogn. Lett."},{"key":"3_CR54","doi-asserted-by":"crossref","unstructured":"Speer, R., Chin, J., Havasi, C.: ConceptNet 5.5: an open multilingual graph of general knowledge. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a031 (2017)","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"3_CR55","doi-asserted-by":"publisher","unstructured":"Speer, R., Lowry-Duda, J.: Luminoso at SemEval-2018 task 10: distinguishing attributes using text corpora and relational knowledge. In: Proceedings of the 12th International Workshop on Semantic Evaluation, pp. 985\u2013989. Association for Computational Linguistics, New Orleans (2018). https:\/\/doi.org\/10.18653\/v1\/S18-1162, https:\/\/aclanthology.org\/S18-1162","DOI":"10.18653\/v1\/S18-1162"},{"key":"3_CR56","doi-asserted-by":"crossref","unstructured":"Sudhakaran, S., Escalera, S., Lanz, O.: LSTA: long short-term attention for egocentric action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.01019"},{"key":"3_CR57","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"3_CR58","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 3551\u20133558 (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"3_CR59","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhu, L., Wang, H., Yang, Y.: Interactive prototype learning for egocentric action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 8168\u20138177 (2021)","DOI":"10.1109\/ICCV48922.2021.00806"},{"key":"3_CR60","unstructured":"Wu, T., et al.: ZeroC: a neuro-symbolic model for zero-shot concept recognition and acquisition at inference time. In: Koyejo, S., Mohamed, S., Agarwal, A., Belgrave, D., Cho, K., Oh, A. (eds.) Advances in Neural Information Processing Systems, vol.\u00a035, pp. 9828\u20139840. Curran Associates, Inc. (2022). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/3ff48dde82306fe8f26f3e51dd1054d7-Paper-Conference.pdf"},{"key":"3_CR61","doi-asserted-by":"crossref","unstructured":"Xie, S., Sun, C., Huang, J., Tu, Z., Murphy, K.: Rethinking spatiotemporal feature learning: Speed-accuracy trade-offs in video classification. In: Proceedings of the European Conference on Computer Vision, pp. 305\u2013321 (2018)","DOI":"10.1007\/978-3-030-01267-0_19"},{"key":"3_CR62","unstructured":"Yu, J., Wang, Z., Vasudevan, V., Yeung, L., Seyedhosseini, M., Wu, Y.: CoCa: contrastive captioners are image-text foundation models (2022)"},{"key":"3_CR63","doi-asserted-by":"crossref","unstructured":"Zellers, R., Bisk, Y., Farhadi, A., Choi, Y.: From recognition to cognition: visual commonsense reasoning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00688"},{"key":"3_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, Y.C., Li, Y., Rehg, J.M.: First-person action decomposition and zero-shot learning. In: IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 121\u2013129 (2017)","DOI":"10.1109\/WACV.2017.21"},{"key":"3_CR65","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Misra, I., Kr\u00e4henb\u00fchl, P., Girdhar, R.: Learning video representations from large language models. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00637"},{"key":"3_CR66","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Ni, B., Hong, R., Yang, X., Tian, Q.: Cascaded interactional targeting network for egocentric video analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016)","DOI":"10.1109\/CVPR.2016.210"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73202-7_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,20]],"date-time":"2024-11-20T15:05:33Z","timestamp":1732115133000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73202-7_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,21]]},"ISBN":["9783031732010","9783031732027"],"references-count":66,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73202-7_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,21]]},"assertion":[{"value":"21 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}