{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T18:44:03Z","timestamp":1774550643567,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031262920","type":"print"},{"value":"9783031262937","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-26293-7_40","type":"book-chapter","created":{"date-parts":[[2023,3,10]],"date-time":"2023-03-10T20:02:47Z","timestamp":1678478567000},"page":"679-695","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Compressed Vision for\u00a0Efficient Video Understanding"],"prefix":"10.1007","author":[{"given":"Olivia","family":"Wiles","sequence":"first","affiliation":[]},{"given":"Jo\u00e3o","family":"Carreira","sequence":"additional","affiliation":[]},{"given":"Iain","family":"Barr","sequence":"additional","affiliation":[]},{"given":"Andrew","family":"Zisserman","sequence":"additional","affiliation":[]},{"given":"Mateusz","family":"Malinowski","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,3,11]]},"reference":[{"key":"40_CR1","unstructured":"Gueguen, L., Sergeev, A., Kadlec, B., Liu, R., Yosinski, J.: Faster neural networks straight from JPEG. In: Advances in Neural Information Processing Systems (NeurIPS) (2018)"},{"key":"40_CR2","doi-asserted-by":"crossref","unstructured":"Ehrlich, M., Davis, L.S.: Deep residual learning in the jpeg transform domain. In: Proceedings of International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00358"},{"key":"40_CR3","doi-asserted-by":"crossref","unstructured":"Wu, C.Y., Zaheer, M., Hu, H., Manmatha, R., Smola, A.J., Kr\u00e4henb\u00fchl, P.: Compressed video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6026\u20136035 (2018)","DOI":"10.1109\/CVPR.2018.00631"},{"key":"40_CR4","doi-asserted-by":"crossref","unstructured":"Xu, K., Qin, M., Sun, F., Wang, Y., Chen, Y.K., Ren, F.: Learning in the frequency domain. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00181"},{"key":"40_CR5","doi-asserted-by":"crossref","unstructured":"Patrick, M., et al.: Space-time crop & attend: improving cross-modal video representation learning. In: International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.01039"},{"key":"40_CR6","unstructured":"Nash, C., et al.: Transframer: arbitrary frame prediction with generative models. arXiv preprint arXiv:2203.09494 (2022)"},{"key":"40_CR7","doi-asserted-by":"crossref","unstructured":"Oyallon, E., Belilovsky, E., Zagoruyko, S., Valko, M.: Compressing the input for CNNs with the first-order scattering transform. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01240-3_19"},{"key":"40_CR8","unstructured":"Dubois, Y., Bloem-Reddy, B., Ullrich, K., Maddison, C.J.: Lossy compression for lossless prediction. In: Advances in Neural Information Processing Systems (NeurIPS) (2021)"},{"key":"40_CR9","unstructured":"Mnih, A., Gregor, K.: Neural variational inference and learning in belief networks. In: International Conference on Machine Learning, pp. 1791\u20131799. PMLR (2014)"},{"key":"40_CR10","unstructured":"Oord, A.V.D., Vinyals, O., Kavukcuoglu, K.: Neural discrete representation learning. arXiv preprint arXiv:1711.00937 (2017)"},{"key":"40_CR11","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. arXiv preprint arXiv:2012.09841 (2020)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"40_CR12","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. arXiv preprint arXiv:2102.12092 (2021)"},{"key":"40_CR13","unstructured":"Walker, J., Razavi, A., Oord, A.V.D.: Predicting video with VQVAE. arXiv preprint arXiv:2103.01950 (2021)"},{"key":"40_CR14","unstructured":"Yan, W., Zhang, Y., Abbeel, P., Srinivas, A.: VideoGPT: video generation using VQ-VAE and transformers. arXiv preprint arXiv:2104.10157 (2021)"},{"key":"40_CR15","doi-asserted-by":"crossref","unstructured":"Chai, L., Zhu, J.Y., Shechtman, E., Isola, P., Zhang, R.: Ensembling with deep generative views. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.01475"},{"key":"40_CR16","unstructured":"Jahanian, A., Chai, L., Isola, P.: On the \u201csteerability\u201d of generative adversarial networks. In: International Conference on Learning Representations (ICLR) (2020)"},{"key":"40_CR17","unstructured":"H\u00e4rk\u00f6nen, E., Hertzmann, A., Lehtinen, J., Paris, S.: GANspace: discovering interpretable GAN controls. In: Advances in Neural Information Processing Systems (NeurIPS) (2020)"},{"key":"40_CR18","unstructured":"DeVries, T., Taylor, G.W.: Dataset augmentation in feature space. arXiv preprint arXiv:1702.05538 (2017)"},{"key":"40_CR19","doi-asserted-by":"crossref","unstructured":"Bello, I., Zoph, B., Vaswani, A., Shlens, J., Le, Q.V.: Attention augmented convolutional networks. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3286\u20133295 (2019)","DOI":"10.1109\/ICCV.2019.00338"},{"key":"40_CR20","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? arXiv preprint arXiv:2102.05095 (2021)"},{"key":"40_CR21","doi-asserted-by":"crossref","unstructured":"Fan, H., et al.: Multiscale vision transformers. arXiv preprint arXiv:2104.11227 (2021)","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"40_CR22","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Huang, L., Huang, C., Wei, Y., Liu, W.: CCNet: criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 603\u2013612 (2019)","DOI":"10.1109\/ICCV.2019.00069"},{"key":"40_CR23","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (NeurIPS) (2017)"},{"key":"40_CR24","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"40_CR25","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo Vadis, action recognition? A new model and the kinetics dataset. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"40_CR26","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: SlowFast networks for video recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"40_CR27","doi-asserted-by":"crossref","unstructured":"Stroud, J., Ross, D., Sun, C., Deng, J., Sukthankar, R.: D3D: distilled 3D networks for video action recognition. In: The IEEE Winter Conference on Applications of Computer Vision, pp. 625\u2013634 (2020)","DOI":"10.1109\/WACV45572.2020.9093274"},{"key":"40_CR28","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"40_CR29","unstructured":"Xie, S., Sun, C., Huang, J., Tu, Z., Murphy, K.: Rethinking spatiotemporal feature learning for video understanding. arXiv preprint arXiv:1712.04851 (2017)"},{"key":"40_CR30","unstructured":"Kay, W., et al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"40_CR31","doi-asserted-by":"crossref","unstructured":"Gu, C., et al.: AVA: a video dataset of Spatio-temporally localized atomic visual actions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6047\u20136056 (2018)","DOI":"10.1109\/CVPR.2018.00633"},{"key":"40_CR32","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: International Conference on Computer Vision (ICCV) (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"40_CR33","unstructured":"Sigurdsson, G.A., Gupta, A., Schmid, C., Farhadi, A., Alahari, K.: Charades-ego: a large-scale dataset of paired third and first person videos. arXiv preprint arXiv:1804.09626 (2018)"},{"key":"40_CR34","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: UCF101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"40_CR35","unstructured":"Tay, Y., et al.: Long range arena: a benchmark for efficient transformers. arXiv preprint arXiv:2011.04006 (2020)"},{"key":"40_CR36","unstructured":"Kitaev, N., Kaiser, \u0141., Levskaya, A.: Reformer: the efficient transformer. arXiv preprint arXiv:2001.04451 (2020)"},{"key":"40_CR37","unstructured":"Wang, S., Li, B., Khabsa, M., Fang, H., Ma, H.: Linformer: self-attention with linear complexity. arXiv preprint arXiv:2006.04768 (2020)"},{"key":"40_CR38","unstructured":"Zaheer, M., et al.: Big bird: transformers for longer sequences. arXiv preprint arXiv:2007.14062 (2020)"},{"key":"40_CR39","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"40_CR40","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2020)"},{"key":"40_CR41","unstructured":"Carreira, J., Noland, E., Banki-Horvath, A., Hillier, C., Zisserman, A.: A short note about kinetics-600. arXiv preprint arXiv:1808.01340 (2018)"},{"key":"40_CR42","doi-asserted-by":"crossref","unstructured":"Tang, Y., et al.: COIN: a large-scale dataset for comprehensive instructional video analysis. In: Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00130"},{"issue":"11","key":"40_CR43","first-page":"120","volume":"25","author":"G Bradski","year":"2000","unstructured":"Bradski, G.: The openCV library. Dr. Dobb\u2019s J. Softw. Tools 25(11), 120\u2013123 (2000)","journal-title":"Dr. Dobb\u2019s J. Softw. Tools"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-26293-7_40","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,10]],"date-time":"2023-03-10T20:12:21Z","timestamp":1678479141000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-26293-7_40"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031262920","9783031262937"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-26293-7_40","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"11 March 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Macao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 December 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.accv2022.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT Microsoft","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"836","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"277","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"33% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"For the ACCV 2022 workshops 25 papers have been accepted from 40 submissions","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}