{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T22:09:51Z","timestamp":1773180591355,"version":"3.50.1"},"reference-count":228,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2022M710393"],"award-info":[{"award-number":["2022M710393"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2023M733387"],"award-info":[{"award-number":["2023M733387"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2022TQ0035"],"award-info":[{"award-number":["2022TQ0035"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2023TQ0344"],"award-info":[{"award-number":["2023TQ0344"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1109\/tpami.2023.3330794","type":"journal-article","created":{"date-parts":[[2023,11,6]],"date-time":"2023-11-06T19:19:38Z","timestamp":1699298378000},"page":"2171-2190","source":"Crossref","is-referenced-by-count":51,"title":["Temporal Action Localization in the Deep Learning Era: A Survey"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9266-4685","authenticated-orcid":false,"given":"Binglu","family":"Wang","sequence":"first","affiliation":[{"name":"School of Information and Electronics, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6974-7327","authenticated-orcid":false,"given":"Yongqiang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5922-877X","authenticated-orcid":false,"given":"Le","family":"Yang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi&#x2019;an, China"}]},{"given":"Teng","family":"Long","sequence":"additional","affiliation":[{"name":"School of Information and Electronics, Beijing Institute of Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2924-946X","authenticated-orcid":false,"given":"Xuelong","family":"Li","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi&#x2019;an, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.119"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.678"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2794265"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s00464-017-5878-1"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2815998"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2951680"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1360\/SSI-2020-0165"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.121"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1360\/SSI-2020-0340"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2017.10.001"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.4218\/etrij.2018-0520"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2017.00014"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.5244\/c.31.52"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00124"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00719"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01241"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_35"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00560"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58526-6_43"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475298"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3078324"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02202"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2986861"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3193611"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-022-07102-x"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2009.11.014"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2017.01.010"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01594-9"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.293"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123343"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.155"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.93"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.617"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12234"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00043"},{"key":"ref40","first-page":"5152","article-title":"Temporal Gaussian mixture layer for videos","volume-title":"Proc. Mach. Learn. Res.","volume":"97","author":"Piergiovanni"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58598-3_32"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00399"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5893"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00333"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3090167"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01348"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_29"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2022.3180925"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3237597"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6829"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00372"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00055"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00877"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00109"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16322"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16323"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.381"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00139"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_17"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01473-9"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2001.990935"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.70711"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459279"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126316"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247808"},{"key":"ref67","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Krizhevsky"},{"key":"ref68","article-title":"Very deep convolutional networks for large-scale image recognition","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Simonyan"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00224"},{"key":"ref73","article-title":"RGB stream is enough for temporal action detection","author":"Wang","year":"2021"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2921539"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-36718-3_40"},{"key":"ref76","article-title":"AFO-TAD: Anchor-free one-stage detector for temporal action detection","author":"Tang","year":"2019"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3016486"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_10"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3351044"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01575"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00706"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3089361"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00369"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_30"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.392"},{"key":"ref86","first-page":"1","article-title":"Mid-level fusion for end-to-end temporal activity detection in untrimmed video","volume-title":"Proc. Brit. Mach. Vis. Conf.","author":"Rahman"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413860"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20212"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3014555"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01326"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00630"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01320"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414253"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_7"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3195321"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01025"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.214"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_5"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1145\/3206025.3206029"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01340"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1145\/3552458.3556443"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3050067"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01351"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i1.19900"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.317"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.675"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2959977"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.3042077"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16363"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58604-1_8"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01327"},{"key":"ref112","article-title":"Activity graph transformer for temporal action localization","author":"Nawhal","year":"2021"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01938"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2018.8545487"},{"key":"ref115","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.02.085"},{"key":"ref116","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2866370"},{"key":"ref117","first-page":"9923","article-title":"Low-fidelity end-to-end video encoder pre-training for temporal action localization","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.211"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3044218"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.342"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00972"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_47"},{"key":"ref123","article-title":"A pursuit of temporal accuracy in general activity detection","author":"Xiong","year":"2017"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2868668"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20062-5_37"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1406.1078"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00898"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref132","article-title":"Marginalized average attentional network for weakly-supervised learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yuan"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_3"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6793"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00984"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3132058"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3178957"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01355"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_12"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3287208"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00356"},{"key":"ref142","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Simonyan"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01531-2"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_17"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00747"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240511"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00400"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413687"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16256"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16280"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00611"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00750"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475261"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00790"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01335"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3076172"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2022.3193752"},{"key":"ref158","article-title":"ACM-Net: Action context modeling network for weakly-supervised temporal action localization","author":"Qu","year":"2021"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2022.3213478"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107686"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3089355"},{"key":"ref162","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3073235"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00336"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01929"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00327"},{"key":"ref166","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108718"},{"key":"ref167","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3137649"},{"key":"ref168","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3174344"},{"key":"ref169","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02203"},{"key":"ref170","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01417"},{"key":"ref171","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00237"},{"key":"ref172","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01026"},{"key":"ref173","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01810"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25237"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019070"},{"key":"ref176","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00012"},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2022.3185485"},{"key":"ref178","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.107831"},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01937"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6760"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20216"},{"key":"ref182","article-title":"Convex combination consistency between neighbors for weakly-supervised action localization","author":"Liu","year":"2022"},{"key":"ref183","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3163459"},{"key":"ref184","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00089"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3189662"},{"key":"ref186","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3089323"},{"key":"ref187","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00562"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6986"},{"key":"ref189","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2962815"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00984"},{"key":"ref191","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","author":"Soomro","year":"2012"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3217368"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00876"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00269"},{"key":"ref196","article-title":"YouTube-8M: A large-scale video classification benchmark","author":"Abu-El-Haija","year":"2016"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00096"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2857768"},{"key":"ref199","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20080-9_33"},{"key":"ref200","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01416"},{"key":"ref201","article-title":"LLaMA: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref202","article-title":"BLIP-2: Bootstrapping language-image pre-training with frozen image encoders and large language models","author":"Li","year":"2023"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00717"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3099407"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00194"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00792"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20062-5_38"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01347"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_37"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_25"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00125"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_9"},{"key":"ref213","article-title":"Cross-attentional audio-visual fusion for weakly-supervised action localization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lee"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3061289"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.563"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01082"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1145\/3532626"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00299"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_16"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.167"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref222","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00021"},{"key":"ref223","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00259"},{"key":"ref224","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00689"},{"key":"ref225","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.2975749"},{"key":"ref226","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-88313-5_13"},{"key":"ref227","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2015.23113"},{"key":"ref228","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","volume-title":"Proc. Artif. Intell. Statist.","author":"McMahan"},{"key":"ref229","doi-asserted-by":"publisher","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10461350\/10310147.pdf?arnumber=10310147","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T17:45:55Z","timestamp":1725903955000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10310147\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4]]},"references-count":228,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3330794","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,4]]}}}