{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:43:12Z","timestamp":1767339792158,"version":"3.28.0"},"reference-count":34,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1109\/wacv45572.2020.9093278","type":"proceedings-article","created":{"date-parts":[[2020,5,15]],"date-time":"2020-05-15T03:41:09Z","timestamp":1589514069000},"page":"651-659","source":"Crossref","is-referenced-by-count":35,"title":["Temporal Contrastive Pretraining for Video Action Recognition"],"prefix":"10.1109","author":[{"given":"Guillaume","family":"LORRE","sequence":"first","affiliation":[]},{"given":"Jaonary","family":"RABARISOA","sequence":"additional","affiliation":[]},{"given":"Astrid","family":"ORCESI","sequence":"additional","affiliation":[]},{"given":"Samia","family":"AINOUZ","sequence":"additional","affiliation":[]},{"given":"Stephane","family":"CANU","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01058"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00840"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00413"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.319"},{"key":"ref34","article-title":"Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks","volume":"abs 1607 2586","author":"xue","year":"2016","journal-title":"CoRR"},{"key":"ref10","article-title":"The kinetics human action video dataset","volume":"abs 1705 6950","author":"kay","year":"2017","journal-title":"CoRR"},{"key":"ref11","article-title":"Self-supervised video representation learning with space-time cubic puzzles","volume":"abs 1811 9795","author":"kim","year":"2018","journal-title":"CoRR"},{"key":"ref12","article-title":"Fast optical flow using dense inverse search","volume":"abs 1603 3590","author":"kroeger","year":"2016","journal-title":"CoRR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.79"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.194"},{"key":"ref16","article-title":"Deep multiscale video prediction beyond mean square error","volume":"abs 1511 5440","author":"mathieu","year":"2015","journal-title":"CoRR"},{"key":"ref17","article-title":"Deep multiscale video prediction beyond mean square error","volume":"abs 1511 5440","author":"mathieu","year":"2016","journal-title":"CoRR"},{"key":"ref18","article-title":"Unsupervised learning using sequential verification for action recognition","volume":"abs 1603 8561","author":"misra","year":"2016","journal-title":"CoRR"},{"key":"ref19","article-title":"Spatio-temporal video autoencoder with differentiable memory","volume":"abs 1511 6309","author":"patraucean","year":"2015","journal-title":"CoRR"},{"key":"ref28","article-title":"Anticipating the future by watching unlabeled video","volume":"abs 1504 8023","author":"vondrick","year":"2015","journal-title":"CoRR"},{"key":"ref4","article-title":"Selfsupervised video representation learning with odd-one-out networks","volume":"abs 1611 6646","author":"fernando","year":"2016","journal-title":"CoRR"},{"key":"ref27","article-title":"Representation learning with contrastive predictive coding","volume":"abs 1807 3748","author":"van den oord","year":"2018","journal-title":"CoRR"},{"key":"ref3","article-title":"Learning representations by maximizing mutual information across views","volume":"abs 1906 910","author":"bachman","year":"2019","journal-title":"CoRR"},{"key":"ref6","article-title":"Deep residual learning for image recognition","volume":"abs 1512 3385","author":"he","year":"2015","journal-title":"CoRR"},{"key":"ref29","article-title":"Generating videos with scene dynamics","volume":"abs 1609 2612","author":"vondrick","year":"2016","journal-title":"CoRR"},{"key":"ref5","article-title":"Unsupervised learning for physical interaction through video prediction","volume":"abs 1605 7157","author":"finn","year":"2016","journal-title":"CoRR"},{"key":"ref8","article-title":"Self-supervised spatiotemporal feature learning by video geometric transformations","volume":"abs 1811 11387","author":"jing","year":"2018","journal-title":"CoRR"},{"key":"ref7","article-title":"Learning deep representations by mutual information estimation and maximization","author":"hjelm","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref2","article-title":"Stochastic variational video prediction","volume":"abs 1710 11252","author":"babaeizadeh","year":"2017","journal-title":"CoRR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref1","article-title":"Video jigsaw: Unsupervised learning of spatiotemporal context for video action recognition","volume":"abs 1808 7507","author":"ahsan","year":"2018","journal-title":"CoRR"},{"key":"ref20","article-title":"Sdcnet: Video prediction using spatially-displaced convolution","volume":"abs 1811 684","author":"reda","year":"2018","journal-title":"CoRR"},{"key":"ref22","article-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"CoRR"},{"key":"ref21","article-title":"Two-stream convolutional networks for action recognition in videos","volume":"abs 1406 2199","author":"simonyan","year":"2014","journal-title":"CoRR"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.5201\/ipol.2013.26"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"ref26","article-title":"C3D: generic features for video analysis","volume":"abs 1412 767","author":"tran","year":"2014","journal-title":"CoRR"},{"key":"ref25","article-title":"Contrastive multiview coding","volume":"abs 1906 5849","author":"tian","year":"2019","journal-title":"CoRR"}],"event":{"name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","start":{"date-parts":[[2020,3,1]]},"location":"Snowmass Village, CO, USA","end":{"date-parts":[[2020,3,5]]}},"container-title":["2020 IEEE Winter Conference on Applications of Computer Vision (WACV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9087828\/9093261\/09093278.pdf?arnumber=9093278","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,30]],"date-time":"2022-06-30T15:18:15Z","timestamp":1656602295000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9093278\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3]]},"references-count":34,"URL":"https:\/\/doi.org\/10.1109\/wacv45572.2020.9093278","relation":{},"subject":[],"published":{"date-parts":[[2020,3]]}}}