{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T05:13:36Z","timestamp":1768281216353,"version":"3.49.0"},"reference-count":101,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"EU project ELSA - European Lighthouse on Secure and Safe AI","award":["101070617"],"award-info":[{"award-number":["101070617"]}]},{"name":"European Union Next-GenerationEU"},{"name":"FAIR &#x2013; Future Artificial Intelligence Research"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1109\/tpami.2025.3621326","type":"journal-article","created":{"date-parts":[[2025,10,14]],"date-time":"2025-10-14T17:40:04Z","timestamp":1760463604000},"page":"1917-1931","source":"Crossref","is-referenced-by-count":0,"title":["Hier-EgoPack: Hierarchical Egocentric Video Understanding With Diverse Task Perspectives"],"prefix":"10.1109","volume":"48","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-3204-3207","authenticated-orcid":false,"given":"Simone Alberto","family":"Peirone","sequence":"first","affiliation":[{"name":"Department of Control and Computer Engineering, Politecnico di Torino, Turin, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9372-032X","authenticated-orcid":false,"given":"Francesca","family":"Pistilli","sequence":"additional","affiliation":[{"name":"Department of Control and Computer Engineering, Politecnico di Torino, Turin, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4151-9602","authenticated-orcid":false,"given":"Antonio","family":"Alliegro","sequence":"additional","affiliation":[{"name":"Department of Control and Computer Engineering, Politecnico di Torino, Turin, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8229-7159","authenticated-orcid":false,"given":"Tatiana","family":"Tommasi","sequence":"additional","affiliation":[{"name":"Department of Control and Computer Engineering, Politecnico di Torino, Turin, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1212-3465","authenticated-orcid":false,"given":"Giuseppe","family":"Averta","sequence":"additional","affiliation":[{"name":"Department of Control and Computer Engineering, Politecnico di Torino, Turin, Italy"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00333"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00601"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.579"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00229"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01730"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00247"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2015.2409731"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02095-7"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2991965"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.30"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00547"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01531-2"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02042"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.11.081"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992889"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01325"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2017.10.004"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01404"},{"key":"ref21","first-page":"28618","article-title":"SpotEM: Efficient video search for episodic memory","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ramakrishnan"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72624-8_6"},{"key":"ref23","first-page":"46212","article-title":"EgoSchema: A diagnostic benchmark for very long-form video language understanding","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Mangalam"},{"key":"ref24","first-page":"3343","article-title":"EgoTaskQA: Understanding human tasks in egocentric videos","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Jia"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00020"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01431"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00642"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01256"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-01998-9"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3183112"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01047"},{"key":"ref32","first-page":"7575","article-title":"Egocentric video-language pretraining","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lin"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00487"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02209"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00637"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3566695"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.679"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01357"},{"key":"ref39","article-title":"AntGPT: Can large language models help long-term action anticipation from videos?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhao"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00795"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.517"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02247"},{"key":"ref43","first-page":"34892","article-title":"Visual instruction tuning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref44","article-title":"LLaVA-OneVision: Easy visual task transfer","author":"Li","year":"2025","journal-title":"Trans. Mach. Learn. Res."},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.342"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-demo.49"},{"key":"ref47","article-title":"GPT-4o system card","author":"Hurst","year":"2024"},{"key":"ref48","article-title":"Qwen2.5-VL technical report","author":"Bai","year":"2025"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3084827"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09825-6"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.10.013"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.11"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3326362"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3323220"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/s10822-016-9938-8"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313488"},{"key":"ref57","first-page":"8459","article-title":"Learning to simulate complex physics with graph networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Sanchez-Gonzalez"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00719"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093361"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093404"},{"key":"ref61","article-title":"All about knowledge graphs for actions","author":"Ghosh","year":"2020"},{"key":"ref62","article-title":"Egocentric object manipulation graphs","author":"Dessalene","year":"2020"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3055233"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00024"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007379606734"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00794"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3007841"},{"key":"ref69","first-page":"27503","article-title":"Efficiently identifying task groupings for multi-task learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Fifty"},{"key":"ref70","first-page":"31333","article-title":"A unified sequence interface for vision tasks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01591"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01824"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01711"},{"key":"ref74","first-page":"521","article-title":"Learning with whom to share in multi-task feature learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kang"},{"key":"ref75","first-page":"3854","article-title":"Learning to branch for multi-task learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Guo"},{"key":"ref76","first-page":"9120","article-title":"Which tasks should be learned together in multi-task learning?","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Standley"},{"key":"ref77","first-page":"8728","article-title":"AdaShare: Learning what to share for efficient deep multi-task learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Sun"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00540"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00806"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00781"},{"key":"ref81","first-page":"794","article-title":"GradNorm: Gradient normalization for adaptive loss balancing in deep multitask networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chen"},{"key":"ref82","article-title":"Gradient adversarial training of neural networks","author":"Sinha","year":"2018"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_17"},{"key":"ref84","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yu"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_31"},{"key":"ref86","article-title":"An overview of multi-task learning in deep neural networks","author":"Ruder","year":"2017"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01340"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01563"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref90","article-title":"NMS threshold matters for ego4D moment queries\u20132nd place solution to the ego4D moment queries challenge 2023","author":"Sui","year":"2023"},{"key":"ref91","first-page":"2999","article-title":"Focal loss for dense object detection","volume-title":"Proc. Int. Conf. Comput. Vis.","author":"Ross"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1911.08287"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"ref94","first-page":"11313","article-title":"Semi-supervised classification with graph convolutional networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kipf"},{"key":"ref95","article-title":"Graph attention networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Veli\u010dkovi\u0107"},{"key":"ref96","first-page":"1025","article-title":"Inductive representation learning on large graphs","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Hamilton"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00113"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01238"},{"key":"ref99","article-title":"Action sensitivity learning for the ego4D episodic memory challenge 2023","author":"Shao","year":"2023"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00599"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73007-8_9"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/34\/11345188\/11202655.pdf?arnumber=11202655","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T22:01:07Z","timestamp":1768255267000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11202655\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":101,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2025.3621326","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]}}}