{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T14:36:27Z","timestamp":1723300587822},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T00:00:00Z","timestamp":1683244800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T00:00:00Z","timestamp":1683244800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2023,10]]},"DOI":"10.1007\/s11760-023-02605-z","type":"journal-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T18:01:51Z","timestamp":1683309711000},"page":"3775-3782","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-supervised pretext task collaborative multi-view contrastive learning for video action recognition"],"prefix":"10.1007","volume":"17","author":[{"given":"Shuai","family":"Bi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengping","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengyao","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hehao","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jirui","family":"Di","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,5,5]]},"reference":[{"key":"2605_CR1","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 7794-7803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"2605_CR2","doi-asserted-by":"publisher","first-page":"1565","DOI":"10.1007\/s11760-020-01696-2","volume":"14","author":"El Mouatasim","year":"2020","unstructured":"Mouatasim, El.: A. Fast gradient descent algorithm for image classification with neural networks. Signal, Image Video Process 14, 1565\u20131572 (2020)","journal-title":"Signal, Image Video Process"},{"key":"2605_CR3","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1007\/s11760-020-01781-6","volume":"15","author":"Q Huang","year":"2021","unstructured":"Huang, Q., Zhou, F., Qin, R., Zhao, Y.: View transform graph attention recurrent networks for skeleton-based action recognition. Signal, Image Video Process 15, 599\u2013606 (2021)","journal-title":"Signal, Image Video Process"},{"key":"2605_CR4","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, L., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Commun. ACM. 60, 84\u201390 (2017)","journal-title":"Commun. ACM."},{"key":"2605_CR5","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 6299-6308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"2605_CR6","doi-asserted-by":"crossref","unstructured":"Noroozi, M., Favaro, P.: Unsupervised learning of visual representations by solving jigsaw puzzles. In: European Conference on Computer Vision. pp. 69-84 (2016)","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"2605_CR7","doi-asserted-by":"crossref","unstructured":"Kim, D., Cho, D., Kweon, IS.: Self-supervised video representation learning with space-time cubic puzzles. In: Proceedings of the AAAI Conference on Artificial Intelligence. pp. 8545-8552 (2019)","DOI":"10.1609\/aaai.v33i01.33018545"},{"key":"2605_CR8","doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S., Lin, D.: Unsupervised feature learning via non-parametric instance discrimination. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 3733-3742 (2018)","DOI":"10.1109\/CVPR.2018.00393"},{"key":"2605_CR9","doi-asserted-by":"crossref","unstructured":"Misra, I., Maaten, L.: Self-supervised learning of pretext-invariant representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6707-6717 (2020)","DOI":"10.1109\/CVPR42600.2020.00674"},{"key":"2605_CR10","doi-asserted-by":"crossref","unstructured":"Luo, D., Liu, C., Zhou, Y., Yang, D., Ma, C.: Video cloze procedure for self-supervised spatiotemporal learning. In: Proceedings of the AAAI Conference on Artificial Intelligence. pp. 11701-11708 (2020)","DOI":"10.1609\/aaai.v34i07.6840"},{"key":"2605_CR11","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, AA.: Colorful image colorization. In: European Conference on Computer Vision. pp. 649-666 (2016)","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"2605_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 9729-9738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2605_CR13","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning. pp. 1597-1607 (2020)"},{"key":"2605_CR14","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple siamese representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 15750-15758 (2021)","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"2605_CR15","doi-asserted-by":"crossref","unstructured":"Lee, HY., Huang, JB., Singh, M., Yang, MH.: Unsupervised representation learning by sorting sequences. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 667-676 (2017)","DOI":"10.1109\/ICCV.2017.79"},{"key":"2605_CR16","doi-asserted-by":"crossref","unstructured":"Yao, Y., Liu, C., Luo, D., Zhou, Y., Ye, Q.: Video playback rate perception for self-supervised spatiotemporal representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6548-6557 (2020)","DOI":"10.1109\/CVPR42600.2020.00658"},{"key":"2605_CR17","doi-asserted-by":"crossref","unstructured":"Qian, R., Meng, T., Gong, B., Yang, MH., Wang, H., Belongie, S., Cui, Y.: Spatiotemporal contrastive video representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6964-6974 (2021)","DOI":"10.1109\/CVPR46437.2021.00689"},{"key":"2605_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2022.103406","author":"I Dave","year":"2022","unstructured":"Dave, I., Gupta, R., Rizve, M.: Tclr: Temporal contrastive learning for video representation. Comput. Vis. Image Underst. (2022). https:\/\/doi.org\/10.1016\/j.cviu.2022.103406","journal-title":"Comput. Vis. Image Underst."},{"key":"2605_CR19","first-page":"5679","volume":"33","author":"T Han","year":"2020","unstructured":"Han, T., Xie, W., Zisserman, A.: Self-supervised co-training for video representation learning. Adv. Neural. Inf. Process. Syst. 33, 5679\u20135690 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2605_CR20","doi-asserted-by":"crossref","unstructured":"Tao, L., Wang, X., Yamasaki, T.: Self-supervised video representation learning using inter-intra contrastive framework. In: Proceedings of the 28th ACM International Conference on Multimedia. pp. 2193-2201 (2020)","DOI":"10.1145\/3394171.3413694"},{"key":"2605_CR21","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 4489-4497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"2605_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 770-778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2605_CR23","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 6450-6459 (2016)"},{"key":"2605_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Crandall, D.: Hierarchically decoupled spatial-temporal contrast for self-supervised video representation learning. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. pp. 3235-3245 (2022)","DOI":"10.1109\/WACV51458.2022.00105"},{"key":"2605_CR25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3057833","author":"J Wang","year":"2021","unstructured":"Wang, J., Jiao, L., Bao, S., He, W., Liu, Y.: Self-supervised video representation learning by uncovering spatiotemporal statistics. IEEE Trans. Pattern Anal. Mach. Intell. (2021). https:\/\/doi.org\/10.1109\/TPAMI.2021.3057833","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2605_CR26","doi-asserted-by":"crossref","unstructured":"Liu, C., Yao, Y., Luo, D., Zhou, Y., Ye, Q.: Self-Supervised Motion Perception for Spatiotemporal Representation Learning. In: IEEE Transactions on Neural Networks and Learning Systems. pp. 1-15 (2022)","DOI":"10.1109\/TNNLS.2022.3160860"},{"key":"2605_CR27","doi-asserted-by":"crossref","unstructured":"Chen, P., Huang, D., He, D., Long, X., Zeng, R., Wen, S., Gan, C.: RSPNET: Relative speed perception for unsupervised video representation learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, 35. pp. 1045-1053 (2022)","DOI":"10.1609\/aaai.v35i2.16189"},{"key":"2605_CR28","doi-asserted-by":"publisher","DOI":"10.1007\/s11760-022-02357-2","author":"S Bi","year":"2022","unstructured":"Bi, S., Hu, Z., Zhao, M., Li, S., Sun, Z.: Spatiotemporal consistency enhancement self-supervised representation learning for action recognition. Signal, Image Video Process. (2022). https:\/\/doi.org\/10.1007\/s11760-022-02357-2","journal-title":"Signal, Image Video Process."},{"key":"2605_CR29","doi-asserted-by":"crossref","unstructured":"Guo, S., Xiong, Z., Zhong, Y., Wang, L., Guo, X., Han, B., Huang, W.: Cross-architecture self-supervised video representation Learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 19270-19279 (2022)","DOI":"10.1109\/CVPR52688.2022.01867"},{"key":"2605_CR30","doi-asserted-by":"publisher","first-page":"1978","DOI":"10.1109\/TIP.2022.3147032","volume":"31","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Wang, K., Liu, L., Lan, H., Lin, L.: TCGL: temporal contrastive graph for self-supervised video representation learning. IEEE Trans. Image Process. 31, 1978\u20131993 (2022)","journal-title":"IEEE Trans. Image Process."}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02605-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-023-02605-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02605-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,17]],"date-time":"2023-08-17T11:19:58Z","timestamp":1692271198000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-023-02605-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,5]]},"references-count":30,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,10]]}},"alternative-id":["2605"],"URL":"https:\/\/doi.org\/10.1007\/s11760-023-02605-z","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,5,5]]},"assertion":[{"value":"10 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 February 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 April 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declared that they have no conflicts of interest to this work.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"All the experimental subjects in this study do not include any animals or people and do not violate ethics.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"All the authors agreed to publish the article.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}