{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T11:08:49Z","timestamp":1772881729364,"version":"3.50.1"},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T00:00:00Z","timestamp":1648771200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T00:00:00Z","timestamp":1648771200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T00:00:00Z","timestamp":1648771200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002241","name":"Japan Science and Technology Agency (JST) Advanced Intelligence Project (AIP) Accelerated Program","doi-asserted-by":"publisher","award":["JPMJCR20U1"],"award-info":[{"award-number":["JPMJCR20U1"]}],"id":[{"id":"10.13039\/501100002241","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001863","name":"New Energy and Industrial Technology Development Organization","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001863","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001691","name":"Japan Society for the Promotion of Science (JSPS) KAKENHI","doi-asserted-by":"publisher","award":["JP20H04205"],"award-info":[{"award-number":["JP20H04205"]}],"id":[{"id":"10.13039\/501100001691","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2022,4]]},"DOI":"10.1109\/tcsvt.2021.3081761","type":"journal-article","created":{"date-parts":[[2021,5,19]],"date-time":"2021-05-19T21:43:35Z","timestamp":1621460615000},"page":"2043-2056","source":"Crossref","is-referenced-by-count":8,"title":["Spatio-Temporal Perturbations for Video Attribution"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7085-3813","authenticated-orcid":false,"given":"Zhenqiang","family":"Li","sequence":"first","affiliation":[{"name":"Institute of Industrial Science, The University of Tokyo, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6557-7175","authenticated-orcid":false,"given":"Weimin","family":"Wang","sequence":"additional","affiliation":[{"name":"DUT-RU International School of Information Science and Engineering, Dalian University of Technology, Dalian, China"}]},{"given":"Zuoyue","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science, ETH Zurich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8067-6227","authenticated-orcid":false,"given":"Yifei","family":"Huang","sequence":"additional","affiliation":[{"name":"Institute of Industrial Science, The University of Tokyo, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0097-4537","authenticated-orcid":false,"given":"Yoichi","family":"Sato","sequence":"additional","affiliation":[{"name":"Institute of Industrial Science, The University of Tokyo, Tokyo, Japan"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939778"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00020"},{"key":"ref33","first-page":"9737","article-title":"A benchmark for interpretability methods in deep neural networks","author":"hooker","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst (NeurIPS)"},{"key":"ref32","first-page":"3145","article-title":"Learning important features through propagating activation differences","author":"shrikumar","year":"2017","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref31","article-title":"Striving for simplicity: The all convolutional net","author":"springenberg","year":"2014","journal-title":"arXiv 1412 6806"},{"key":"ref30","article-title":"Investigating the influence of noise and distractors on the interpretation of neural networks","author":"kindermans","year":"2016","journal-title":"arXiv 1611 07270"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00097"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00970"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00505"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref62","article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"arXiv 1212 0402"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"ref63","first-page":"720","article-title":"Scaling egocentric vision: The epic-kitchens dataset","author":"damen","year":"2018","journal-title":"Proc Eur Conf Comput Vis (ECCV)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803153"},{"key":"ref64","first-page":"853","article-title":"Why can&#x2019;t i dance in the mall? Learning to mitigate scene bias in action recognition","author":"choi","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst (NeurIPS)"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00156"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_32"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00116"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref1","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst (NeurIPS)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00920"},{"key":"ref22","first-page":"3319","article-title":"Axiomatic attribution for deep networks","author":"sundararajan","year":"2017","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3054303"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0130140"},{"key":"ref23","article-title":"SmoothGrad: Removing noise by adding noise","author":"smilkov","year":"2017","journal-title":"arXiv 1706 03825"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-017-1059-x"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00524"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2599820"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref58","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref57","article-title":"Intriguing properties of neural networks","author":"szegedy","year":"2013","journal-title":"arXiv 1312 6199"},{"key":"ref56","article-title":"Adversarial examples in the physical world","author":"kurakin","year":"2016","journal-title":"arXiv 1607 02533"},{"key":"ref55","article-title":"Explaining and harnessing adversarial examples","author":"goodfellow","year":"2014","journal-title":"arXiv 1412 6572"},{"key":"ref54","first-page":"427","article-title":"Deep neural networks are easily fooled: High confidence predictions for unrecognizable images","author":"nguyen","year":"2015","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit (CVPR)"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.120"},{"key":"ref52","article-title":"IROF: A low resource evaluation metric for explanation methods","author":"rieger","year":"2020","journal-title":"arXiv 2003 08747"},{"key":"ref10","doi-asserted-by":"crossref","first-page":"1411","DOI":"10.1109\/TCSVT.2008.2002831","article-title":"A robust passage retrieval algorithm for video question answering","volume":"18","author":"wu","year":"2008","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2883305"},{"key":"ref40","article-title":"Rise: Randomized input sampling for explanation of black-box models","author":"petsiuk","year":"2018","journal-title":"Proc Brit Mach Vis Conf (BMVC)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2870954"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2014.2308642"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2870832"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.371"},{"key":"ref17","first-page":"1803","article-title":"How to explain individual classification decisions","volume":"11","author":"baehrens","year":"2010","journal-title":"J Mach Learn Res"},{"key":"ref18","article-title":"Deep inside convolutional networks: Visualising image classification models and saliency maps","author":"simonyan","year":"2013","journal-title":"arXiv 1312 6034"},{"key":"ref19","first-page":"1","article-title":"This looks like that: Deep learning for interpretable image recognition","volume":"32","author":"chen","year":"2019","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2816960"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00911"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2867286"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00688"},{"key":"ref46","first-page":"6967","article-title":"Real time image saliency for black box classifiers","author":"dabkowski","year":"2017","journal-title":"Proc Adv Neural Inf Process Syst (NeurIPS)"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00886"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-28954-6_16"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298872"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00931"},{"key":"ref41","first-page":"1","article-title":"Visualizing deep networks by optimizing with integrated gradients","author":"qi","year":"2019","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit Workshops (CVPRW)"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2014.2333151"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00304"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/76\/9749157\/09435317.pdf?arnumber=9435317","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,16]],"date-time":"2022-05-16T20:11:34Z","timestamp":1652731894000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9435317\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4]]},"references-count":65,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2021.3081761","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,4]]}}}