{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:38:03Z","timestamp":1742913483573,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819949137"},{"type":"electronic","value":"9789819949144"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-981-99-4914-4_4","type":"book-chapter","created":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T21:02:35Z","timestamp":1690837355000},"page":"43-55","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dynamic Circular Convolution for\u00a0Image Classification"],"prefix":"10.1007","author":[{"given":"Xuan-Thuy","family":"Vo","sequence":"first","affiliation":[]},{"given":"Duy-Linh","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Adri","family":"Priadana","sequence":"additional","affiliation":[]},{"given":"Kang-Hyun","family":"Jo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,8,1]]},"reference":[{"key":"4_CR1","unstructured":"Chi, L., Jiang, B., Mu, Y.: Fast Fourier convolution. In: Advances in Neural Information Processing Systems, vol. 33, pp. 4479\u20134488 (2020)"},{"key":"4_CR2","unstructured":"Chu, X., et al.: Conditional positional encodings for vision transformers. arXiv preprint arXiv:2102.10882 (2021)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"4_CR4","unstructured":"Dai, Z., Liu, H., Le, Q.V., Tan, M.: CoAtNet:: marrying convolution and attention for all data sizes. In: Advances in Neural Information Processing Systems, vol. 34, pp. 3965\u20133977 (2021)"},{"key":"4_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16 $$\\times $$ 16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"4_CR6","unstructured":"Guibas, J., Mardani, M., Li, Z., Tao, A., Anandkumar, A., Catanzaro, B.: Efficient token mixing for transformers via adaptive Fourier neural operators. In: International Conference on Learning Representations (2021)"},{"key":"4_CR7","unstructured":"Han, K., Xiao, A., Wu, E., Guo, J., Xu, C., Wang, Y.: Transformer in transformer. In: Advances in Neural Information Processing Systems, vol. 34, pp. 15908\u201315919 (2021)"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Heo, B., Yun, S., Han, D., Chun, S., Choe, J., Oh, S.J.: Rethinking spatial dimensions of vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11936\u201311945 (2021)","DOI":"10.1109\/ICCV48922.2021.01172"},{"key":"4_CR10","unstructured":"Howard, A.G., et al.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"4_CR12","unstructured":"Krizhevsky, A., Hinton, G., et al.: Learning multiple layers of features from tiny images (2009)"},{"issue":"6","key":"4_CR13","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Commun. ACM 60(6), 84\u201390 (2017)","journal-title":"Commun. ACM"},{"key":"4_CR14","unstructured":"Li, J., et al.: Next-ViT: next generation vision transformer for efficient deployment in realistic industrial scenarios. arXiv preprint arXiv:2207.05501 (2022)"},{"key":"4_CR15","unstructured":"Li, Y., et al.: EfficientFormer: Vision transformers at MobileNet speed. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=NXHXoYMLIG"},{"key":"4_CR16","unstructured":"Li, Y., Zhang, K., Cao, J., Timofte, R., Van Gool, L.: LocalViT: bringing locality to vision transformers. arXiv preprint arXiv:2104.05707 (2021)"},{"key":"4_CR17","unstructured":"Liu, H., Dai, Z., So, D., Le, Q.V.: Pay attention to MLPs. In: Advances in Neural Information Processing Systems, vol. 34, pp. 9204\u20139215 (2021)"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer V2: scaling up capacity and resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12009\u201312019 (2022)","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"4_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A ConvNet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"4_CR21","unstructured":"Mehta, S., Rastegari, M.: MobileViT: light-weight, general-purpose, and mobile-friendly vision transformer. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=vh-0sUt8HlG"},{"key":"4_CR22","unstructured":"Mehta, S., Rastegari, M.: Separable self-attention for mobile vision transformers. arXiv preprint arXiv:2206.02680 (2022)"},{"key":"4_CR23","unstructured":"Min, J., Zhao, Y., Luo, C., Cho, M.: Peripheral vision transformer. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=nE8IJLT7nW-"},{"key":"4_CR24","unstructured":"Rao, Y., et al.: HorNet: efficient high-order spatial interactions with recursive gated convolutions. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"4_CR25","unstructured":"Rao, Y., Zhao, W., Zhu, Z., Lu, J., Zhou, J.: Global filter networks for image classification. In: Advances in Neural Information Processing Systems, vol. 34, pp. 980\u2013993 (2021)"},{"issue":"3","key":"4_CR26","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"4_CR28","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Suvorov, R., et al.: Resolution-robust large mask inpainting with Fourier convolutions. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2149\u20132159 (2022)","DOI":"10.1109\/WACV51458.2022.00323"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"4_CR31","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"4_CR32","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"3","key":"4_CR34","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang, W., et al.: PVT v2: improved baselines with pyramid vision transformer. Comput. Vis. Media 8(3), 415\u2013424 (2022)","journal-title":"Comput. Vis. Media"},{"key":"4_CR35","doi-asserted-by":"publisher","unstructured":"Wightman, R.: PyTorch image models (2019). https:\/\/doi.org\/10.5281\/zenodo.4414861. https:\/\/github.com\/rwightman\/pytorch-image-models","DOI":"10.5281\/zenodo.4414861"},{"key":"4_CR36","unstructured":"Xu, Y., Zhang, Q., Zhang, J., Tao, D.: ViTAE: vision transformer advanced by exploring intrinsic inductive bias. In: Advances in Neural Information Processing Systems, vol. 34, pp. 28522\u201328535 (2021)"},{"key":"4_CR37","unstructured":"Yang, J., Li, C., Dai, X., Gao, J.: Focal modulation networks. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=ePhEbo039l"},{"key":"4_CR38","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: MetaFormer is actually what you need for vision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10819\u201310829 (2022)","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"4_CR39","unstructured":"Yu, W., et al.: MetaFormer baselines for vision. arXiv preprint arXiv:2210.13452 (2022)"},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Tokens-to-token ViT: training vision transformers from scratch on ImageNet. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 558\u2013567 (2021)","DOI":"10.1109\/ICCV48922.2021.00060"}],"container-title":["Communications in Computer and Information Science","Frontiers of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-4914-4_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,31]],"date-time":"2023-07-31T21:06:53Z","timestamp":1690837613000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-4914-4_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9789819949137","9789819949144"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-4914-4_4","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"1 August 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IW-FCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Frontiers of Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Yeozu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 February 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 February 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iwfcv2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iwfcv2023.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}