{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:17:16Z","timestamp":1775578636962,"version":"3.50.1"},"reference-count":63,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,8,1]],"date-time":"2023-08-01T00:00:00Z","timestamp":1690848000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2018AAA0102002"],"award-info":[{"award-number":["2018AAA0102002"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61925204"],"award-info":[{"award-number":["61925204"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62222207"],"award-info":[{"award-number":["62222207"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072245"],"award-info":[{"award-number":["62072245"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172212"],"award-info":[{"award-number":["62172212"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20211520"],"award-info":[{"award-number":["BK20211520"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1109\/tpami.2023.3261659","type":"journal-article","created":{"date-parts":[[2023,3,27]],"date-time":"2023-03-27T18:47:51Z","timestamp":1679942871000},"page":"10317-10330","source":"Crossref","is-referenced-by-count":62,"title":["Progressive Instance-Aware Feature Learning for Compositional Action Recognition"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0694-9458","authenticated-orcid":false,"given":"Rui","family":"Yan","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4831-9451","authenticated-orcid":false,"given":"Lingxi","family":"Xie","sequence":"additional","affiliation":[{"name":"Huawei Inc., Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4902-4663","authenticated-orcid":false,"given":"Xiangbo","family":"Shu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1549-3317","authenticated-orcid":false,"given":"Liyan","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, MIIT Key Laboratory of Pattern Analysis and Machine Intelligence, Collaborative Innovation Center of Novel Software Technology and Industrialization, Nanjing University of Aeronautics and Astronautics, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9008-222X","authenticated-orcid":false,"given":"Jinhui","family":"Tang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, Jiangsu, China"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-7687.2007.00569.x"},{"key":"ref57","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01025"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00089"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01283"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6990"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00378"},{"key":"ref52","first-page":"4967","article-title":"A simple neural network module for relational reasoning","author":"santoro","year":"2017","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00113"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1989.1.2.270"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_49"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3034233"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00033"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00186"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.39"},{"key":"ref48","article-title":"Representation learning with contrastive predictive coding","author":"oord","year":"2018"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_19"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33765-9_15"},{"key":"ref41","first-page":"515","article-title":"Learning to decompose and disentangle representations for video prediction","author":"hsieh","year":"2018","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01258-8_6"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v29i1.9605"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475472"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref9","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref6","first-page":"399","article-title":"Videos as space-time region graphs","author":"wang","year":"2018","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref40","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in atari games","author":"oh","year":"2015","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref35","first-page":"1","article-title":"Learning long-term visual dynamics with region proposal interaction networks","author":"qi","year":"2021","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_20"},{"key":"ref37","first-page":"843","article-title":"Unsupervised learning of video representations using LSTMs","author":"srivastava","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref36","article-title":"Video (language) modeling: A baseline for generative models of natural videos","author":"ranzato","year":"2014"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_7"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00710"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref2","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"ref39","first-page":"1","article-title":"Deep multi-scale video prediction beyond mean square error","author":"mathieu","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref38","first-page":"4414","article-title":"Unsupervised learning of disentangled representations from video","author":"denton","year":"2017","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref24","article-title":"CATER: A diagnostic dataset for compositional actions and temporal reasoning","author":"girdhar","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"ref26","first-page":"1","article-title":"Measuring compositional generalization: A comprehensive method on realistic data","author":"keysers","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_46"},{"key":"ref20","article-title":"The kinetics human action video dataset","author":"kay","year":"2017"},{"key":"ref63","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"finn","year":"2017","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00081"},{"key":"ref27","first-page":"2873","article-title":"Generalization without systematicity: On the compositional skills of sequence-to-sequence recurrent networks","author":"lake","year":"2018","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00269"},{"key":"ref60","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Proc Annu Conf Neural Inf Process Syst"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01251"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10169863\/10082892.pdf?arnumber=10082892","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,17]],"date-time":"2023-07-17T17:42:53Z","timestamp":1689615773000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10082892\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8]]},"references-count":63,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3261659","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,8]]}}}