{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T08:52:52Z","timestamp":1765356772885,"version":"3.37.3"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Project of China","award":["2018AAA0101900"],"award-info":[{"award-number":["2018AAA0101900"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U19B2043","61976185"],"award-info":[{"award-number":["U19B2043","61976185"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004731","name":"Natural Science Foundation of Zhejiang Province","doi-asserted-by":"publisher","award":["LR19F020002","LZ17F020001"],"award-info":[{"award-number":["LR19F020002","LZ17F020001"]}],"id":[{"id":"10.13039\/501100004731","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Chinese Knowledge Center for Engineering Sciences, and Technology"},{"name":"Joint Research Program of ZJU"},{"name":"Hikvision Research Institute"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/tmm.2020.3025661","type":"journal-article","created":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T22:51:34Z","timestamp":1600815094000},"page":"3454-3466","source":"Crossref","is-referenced-by-count":13,"title":["Explore Video Clip Order With Self-Supervised and Curriculum Learning for Video Applications"],"prefix":"10.1109","volume":"23","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6142-9914","authenticated-orcid":false,"given":"Jun","family":"Xiao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5678-4487","authenticated-orcid":false,"given":"Lin","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3404-1305","authenticated-orcid":false,"given":"Dejing","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1584-7290","authenticated-orcid":false,"given":"Chengjiang","family":"Long","sequence":"additional","affiliation":[]},{"given":"Jian","family":"Shao","sequence":"additional","affiliation":[]},{"given":"Shifeng","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5269-7821","authenticated-orcid":false,"given":"Shiliang","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Yueting","family":"Zhuang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018545"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00413"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2749159"},{"key":"ref31","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"wang","year":"0","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_47"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2558148"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.149"},{"key":"ref34","article-title":"Unsupervised representation learning by predicting image rotations","author":"gidaris","year":"0","journal-title":"Proc 6th Int Conf Learn Representations"},{"key":"ref28","first-page":"3468","article-title":"Spatiotemporal residual networks for video action recognition","author":"feichtenhofer","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.79"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/1291233.1291311"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01058"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/11744047_33"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.5244\/C.22.99"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247806"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"ref50","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","author":"paszke","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"article-title":"Revisiting small batch training for deep neural networks","year":"2018","author":"masters","key":"ref51"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019127"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123427"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.751"},{"key":"ref54","first-page":"613","article-title":"Generating videos with scene dynamics","author":"vondrick","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2935678"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6840"},{"key":"ref11","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00685"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.167"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.96"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.278"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_32"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.607"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2666540"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"article-title":"UCF101: A dataset of 101 human actions classes from videos in the wild","year":"2012","author":"soomro","key":"ref49"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2013.2271746"},{"key":"ref46","article-title":"Self-paced curriculum learning","author":"jiang","year":"0","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1189"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"ref47","first-page":"1189","article-title":"Self-paced learning for latent variable models","author":"kumar","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00994"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00658"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299188"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6046\/9296985\/09204376.pdf?arnumber=9204376","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:51:20Z","timestamp":1652194280000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9204376\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":59,"URL":"https:\/\/doi.org\/10.1109\/tmm.2020.3025661","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"type":"print","value":"1520-9210"},{"type":"electronic","value":"1941-0077"}],"subject":[],"published":{"date-parts":[[2021]]}}}