{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:22:40Z","timestamp":1771698160744,"version":"3.50.1"},"reference-count":58,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1109\/cvpr52688.2022.02031","type":"proceedings-article","created":{"date-parts":[[2022,9,27]],"date-time":"2022-09-27T15:56:41Z","timestamp":1664294201000},"page":"20951-20960","source":"Crossref","is-referenced-by-count":40,"title":["JRDB-Act: A Large-scale Dataset for Spatio-temporal Action, Social Group and Activity Detection"],"prefix":"10.1109","author":[{"given":"Mahsa","family":"Ehsanpour","sequence":"first","affiliation":[{"name":"The University of Adelaide"}]},{"given":"Fatemeh","family":"Saleh","sequence":"additional","affiliation":[{"name":"Samsung AI Center"}]},{"given":"Silvio","family":"Savarese","sequence":"additional","affiliation":[{"name":"Stan ford University"}]},{"given":"Ian","family":"Reid","sequence":"additional","affiliation":[{"name":"The University of Adelaide"}]},{"given":"Hamid","family":"Rezatofighi","sequence":"additional","affiliation":[{"name":"Monash University"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Youtube-8m: A large-scale video classification benchmark","author":"Abu-El-Haija","year":"2016","journal-title":"arXiv preprint"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00089"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33765-9_16"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.220"},{"key":"ref7","first-page":"1282","article-title":"What are they doing?: Collective activity classification using spatio-temporal relationship among people","author":"Choi","year":"2009","journal-title":"ICCVW"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995707"},{"key":"ref9","first-page":"720","article-title":"Scaling egocentric vision: The epic-kitchens dataset","author":"Damen","year":"2018","journal-title":"ECCV"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_47"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58558-7_35"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_11"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"ref16","article-title":"A better baseline for ava","author":"Girdhar","year":"2018","journal-title":"arXiv preprint"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00033"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.70711"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"ref20","first-page":"6047","article-title":"Ava: A video dataset of spatio-temporally localized atomic visual actions","author":"Chunhui","year":"2018","journal-title":"CVPR"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00332"},{"key":"ref22","first-page":"1971","article-title":"A hierarchical deep temporal model for group activity recognition","author":"Mostafa","year":"2016","journal-title":"CVPR"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2016.10.018"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.396"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref26","article-title":"The kinetics human action video dataset","author":"Kay","year":"2017","journal-title":"arXiv preprint"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/iccv.2005.85"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref29","author":"Ang","year":"2020","journal-title":"The ava-kinetics localized human actions video dataset"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_19"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01341"},{"key":"ref32","article-title":"Who did what at where and when: simultaneous multi-person tracking and activity recognition","author":"Li","year":"2018","journal-title":"arXiv preprint"},{"key":"ref33","first-page":"10991","article-title":"Overcoming classifier im-balance for long-tail object detection with balanced group softmax","author":"Li","year":"2020","journal-title":"CVPR"},{"key":"ref34","author":"Marszalek","year":"2009","journal-title":"Actions in context. Citeseer"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3070543"},{"key":"ref36","first-page":"849","article-title":"On spectral clustering: Analysis and an algorithm","author":"Andrew","year":"2002","journal-title":"NIPS"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_39"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"ref40","first-page":"1","article-title":"Action mach a spatio-temporal maximum average correlation height filter for action recognition","author":"Mikel","year":"2008","journal-title":"CVPR"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247801"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341635"},{"key":"ref44","first-page":"585","article-title":"Asynchronous temporal fields for action recognition","author":"Gunnar","year":"2017","journal-title":"CVPR"},{"key":"ref45","first-page":"510","article-title":"Hollywood in homes: Crowdsourcing data collection for activity understanding","author":"Gunnar","year":"2016","journal-title":"ECCV"},{"key":"ref46","article-title":"Two-stream convolutional networks for action recognition in videos","author":"Simonyan","year":"2014","journal-title":"arXiv preprint"},{"key":"ref47","article-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild","author":"Soomro","year":"2012","journal-title":"arXiv preprint"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_20"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00130"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00037"},{"key":"ref51","first-page":"478","article-title":"Unsupervised deep embedding for clustering analysis","author":"Xie","year":"2016","journal-title":"ICML"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.617"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-017-1013-y"},{"key":"ref54","first-page":"2442","article-title":"Discriminative subvolume search for efficient action detection","author":"Yuan","year":"2009","journal-title":"CVPR"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"ref56","first-page":"1601","article-title":"Self-tuning spectral clustering","author":"Zelnik-Manor","year":"2005","journal-title":"NIPS"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00876"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_49"}],"event":{"name":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"New Orleans, LA, USA","start":{"date-parts":[[2022,6,18]]},"end":{"date-parts":[[2022,6,24]]}},"container-title":["2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9878378\/9878366\/09879064.pdf?arnumber=9879064","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T20:52:33Z","timestamp":1706043153000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9879064\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6]]},"references-count":58,"URL":"https:\/\/doi.org\/10.1109\/cvpr52688.2022.02031","relation":{},"subject":[],"published":{"date-parts":[[2022,6]]}}}