{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:00:49Z","timestamp":1767322849831,"version":"3.48.0"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032113160","type":"print"},{"value":"9783032113177","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-11317-7_37","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:58:41Z","timestamp":1767322721000},"page":"445-456","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Benchmark of\u00a0Egocentric Scene Graph Prediction Methods for\u00a0Understanding Human-Object Interactions"],"prefix":"10.1007","author":[{"given":"Asfand","family":"Yaar","sequence":"first","affiliation":[]},{"given":"Ivan","family":"Rodin","sequence":"additional","affiliation":[]},{"given":"Giovanni Maria","family":"Farinella","sequence":"additional","affiliation":[]},{"given":"Antonino","family":"Furnari","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"37_CR1","doi-asserted-by":"crossref","unstructured":"Cong, Y., Liao, W., Ackermann, H., Rosenhahn, B., Yang, M.Y.: Spatial-temporal transformer for dynamic scene graph generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16372\u201316382 (2021)","DOI":"10.1109\/ICCV48922.2021.01606"},{"key":"37_CR2","doi-asserted-by":"crossref","unstructured":"Damen, D., et\u00a0al.: Scaling egocentric vision: The epic-kitchens dataset. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 720\u2013736 (2018)","DOI":"10.1007\/978-3-030-01225-0_44"},{"issue":"1","key":"37_CR3","first-page":"65","volume":"47","author":"AG Del Molino","year":"2016","unstructured":"Del Molino, A.G., Tan, C., Lim, J.H., Tan, A.H.: Summarization of egocentric videos: a comprehensive survey. IEEE Trans. Hum. Mach. Syst. 47(1), 65\u201376 (2016)","journal-title":"IEEE Trans. Hum. Mach. Syst."},{"key":"37_CR4","doi-asserted-by":"crossref","unstructured":"Feng, S., Mostafa, H., Nassar, M., Majumdar, S., Tripathi, S.: Exploiting long-term dependencies for generating dynamic scene graphs. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5130\u20135139 (2023)","DOI":"10.1109\/WACV56688.2023.00510"},{"key":"37_CR5","unstructured":"Grauman, K., et\u00a0al.: Ego4d: Around the world in 3,000 hours of egocentric video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18995\u201319012 (2022)"},{"key":"37_CR6","doi-asserted-by":"publisher","unstructured":"Herzig, R., Bar, A., Xu, H., Chechik, G., Darrell, T., Globerson, A.: Learning canonical representations for scene graph to image generation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12371, pp. 210\u2013227. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58574-7_13","DOI":"10.1007\/978-3-030-58574-7_13"},{"key":"37_CR7","doi-asserted-by":"crossref","unstructured":"Johnson, J., Gupta, A., Fei-Fei, L.: Image generation from scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1219\u20131228 (2018)","DOI":"10.1109\/CVPR.2018.00133"},{"key":"37_CR8","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"37_CR9","doi-asserted-by":"crossref","unstructured":"Kochakarn, P., De\u00a0Martini, D., Omeiza, D., Kunze, L.: Explainable action prediction through self-supervision on scene graphs. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 1479\u20131485. IEEE (2023)","DOI":"10.1109\/ICRA48891.2023.10161132"},{"key":"37_CR10","doi-asserted-by":"crossref","unstructured":"Li, R., Zhang, S., He, X.: Sgtr: End-to-end scene graph generation with transformer. In: proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19486\u201319496 (2022)","DOI":"10.1109\/CVPR52688.2022.01888"},{"key":"37_CR11","doi-asserted-by":"crossref","unstructured":"Lu, Y., et al.: Context-aware scene graph generation with seq2seq transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15931\u201315941 (2021)","DOI":"10.1109\/ICCV48922.2021.01563"},{"key":"37_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108245","volume":"242","author":"AV Malawade","year":"2022","unstructured":"Malawade, A.V., Yu, S.Y., Hsu, B., Kaeley, H., Karra, A., Al Faruque, M.A.: Roadscene2vec: a tool for extracting and embedding road scene-graphs. Knowl.-Based Syst. 242, 108245 (2022)","journal-title":"Knowl.-Based Syst."},{"key":"37_CR13","unstructured":"Min, K.: Intel labs at ego4d challenge 2022: a better baseline for audio-visual diarization. arXiv preprint arXiv:2210.07764 (2022)"},{"key":"37_CR14","unstructured":"Min, K.: Sthg: Spatial-temporal heterogeneous graph learning for advanced audio-visual diarization. arXiv preprint arXiv:2306.10608 (2023)"},{"key":"37_CR15","doi-asserted-by":"crossref","unstructured":"Nag, S., Min, K., Tripathi, S., Roy-Chowdhury, A.K.: Unbiased scene graph generation in videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22803\u201322813 (2023)","DOI":"10.1109\/CVPR52729.2023.02184"},{"key":"37_CR16","doi-asserted-by":"crossref","unstructured":"Nagarajan, T., Li, Y., Feichtenhofer, C., Grauman, K.: Ego-topo: environment affordances from egocentric video. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 163\u2013172 (2020)","DOI":"10.1109\/CVPR42600.2020.00024"},{"key":"37_CR17","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-CNN: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"37_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103252","volume":"211","author":"I Rodin","year":"2021","unstructured":"Rodin, I., Furnari, A., Mavroeidis, D., Farinella, G.M.: Predicting the future from first person (egocentric) vision: a survey. Comput. Vis. Image Underst. 211, 103252 (2021)","journal-title":"Comput. Vis. Image Underst."},{"key":"37_CR19","doi-asserted-by":"crossref","unstructured":"Rodin, I., Furnari, A., Min, K., Tripathi, S., Farinella, G.M.: Action scene graphs for long-form understanding of egocentric videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18622\u201318632 (2024)","DOI":"10.1109\/CVPR52733.2024.01762"},{"key":"37_CR20","doi-asserted-by":"crossref","unstructured":"Shit, S., et\u00a0al.: Relationformer: a unified framework for image-to-graph generation. In: European Conference On Computer Vision, pp. 422\u2013439. Springer (2022)","DOI":"10.1007\/978-3-031-19836-6_24"},{"key":"37_CR21","doi-asserted-by":"crossref","unstructured":"Singh, K.P., Salvador, J., Weihs, L., Kembhavi, A.: Scene graph contrastive learning for embodied navigation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10884\u201310894 (2023)","DOI":"10.1109\/ICCV51070.2023.00999"},{"key":"37_CR22","doi-asserted-by":"crossref","unstructured":"Wang, X., Gupta, A.: Videos as space-time region graphs. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 399\u2013417 (2018)","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"37_CR23","doi-asserted-by":"publisher","first-page":"8821","DOI":"10.1109\/TIP.2020.3001693","volume":"29","author":"Y Wu","year":"2020","unstructured":"Wu, Y., Bourahla, O.E.F., Li, X., Wu, F., Tian, Q., Zhou, X.: Adaptive graph representation learning for video person re-identification. IEEE Trans. Image Process. 29, 8821\u20138830 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"37_CR24","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Liang, X., Wang, X., Yeung, D.Y., Gupta, A.: Temporal dynamic graph LSTM for action-driven video object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1801\u20131810 (2017)","DOI":"10.1109\/ICCV.2017.200"}],"container-title":["Lecture Notes in Computer Science","Image Analysis and Processing - ICIAP 2025 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-11317-7_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:58:44Z","timestamp":1767322724000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-11317-7_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032113160","9783032113177"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-11317-7_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image Analysis and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iciap2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iciap.org\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}