{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T07:48:04Z","timestamp":1772783284643,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":80,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Zhongshan science and technology development project","award":["2020AG016"],"award-info":[{"award-number":["2020AG016"]}]},{"name":"Natural Science Foundation of China","award":["62171139"],"award-info":[{"award-number":["62171139"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612122","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:26:54Z","timestamp":1698391614000},"page":"2477-2486","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":14,"title":["PVG: Progressive Vision Graph for Vision Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1036-5076","authenticated-orcid":false,"given":"JiaFu","family":"Wu","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0242-6481","authenticated-orcid":false,"given":"Jian","family":"Li","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8891-6766","authenticated-orcid":false,"given":"Jiangning","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9204-5676","authenticated-orcid":false,"given":"Boshen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2650-4146","authenticated-orcid":false,"given":"Mingmin","family":"Chi","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6592-8411","authenticated-orcid":false,"given":"Yabiao","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4216-8090","authenticated-orcid":false,"given":"Chengjie","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent Youtu Lab, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"international conference on machine learning. PMLR, 21--29","author":"Abu-El-Haija Sami","year":"2019","unstructured":"Sami Abu-El-Haija, Bryan Perozzi, Amol Kapoor, Nazanin Alipourfard, Kristina Lerman, Hrayr Harutyunyan, Greg Ver Steeg, and Aram Galstyan. 2019. Mixhop: Higher-order graph convolutional architectures via sparsified neighborhood mixing. In international conference on machine learning. PMLR, 21--29."},{"key":"e_1_3_2_1_2_1","volume-title":"Diffusion-convolutional neural networks. Advances in neural information processing systems","author":"Atwood James","year":"2016","unstructured":"James Atwood and Don Towsley. 2016. Diffusion-convolutional neural networks. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_3_1","volume-title":"Spectral networks and locally connected networks on graphs. arXiv preprint arXiv:1312.6203","author":"Bruna Joan","year":"2013","unstructured":"Joan Bruna, Wojciech Zaremba, Arthur Szlam, and Yann LeCun. 2013. Spectral networks and locally connected networks on graphs. arXiv preprint arXiv:1312.6203 (2013)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00698"},{"key":"e_1_3_2_1_5_1","volume-title":"Regionvit: Regional-to-local attention for vision transformers. arXiv preprint arXiv:2106.02689","author":"Chen Chun-Fu","year":"2021","unstructured":"Chun-Fu Chen, Rameswar Panda, and Quanfu Fan. 2021a. Regionvit: Regional-to-local attention for vision transformers. arXiv preprint arXiv:2106.02689 (2021)."},{"key":"e_1_3_2_1_6_1","volume-title":"Cyclemlp: A mlp-like architecture for dense prediction. arXiv preprint arXiv:2107.10224","author":"Chen Shoufa","year":"2021","unstructured":"Shoufa Chen, Enze Xie, Chongjian Ge, Runjian Chen, Ding Liang, and Ping Luo. 2021b. Cyclemlp: A mlp-like architecture for dense prediction. arXiv preprint arXiv:2107.10224 (2021)."},{"key":"e_1_3_2_1_7_1","volume-title":"Iterative deep graph learning for graph neural networks: Better and robust node embeddings. Advances in neural information processing systems","author":"Chen Yu","year":"2020","unstructured":"Yu Chen, Lingfei Wu, and Mohammed Zaki. 2020. Iterative deep graph learning for graph neural networks: Better and robust node embeddings. Advances in neural information processing systems, Vol. 33 (2020), 19314--19326."},{"key":"e_1_3_2_1_8_1","volume-title":"Graphflow: Exploiting conversation flow with graph neural networks for conversational machine comprehension. arXiv preprint arXiv:1908.00059","author":"Chen Yu","year":"2019","unstructured":"Yu Chen, Lingfei Wu, and Mohammed J Zaki. 2019b. Graphflow: Exploiting conversation flow with graph neural networks for conversational machine comprehension. arXiv preprint arXiv:1908.00059 (2019)."},{"key":"e_1_3_2_1_9_1","volume-title":"Reinforcement learning based graph-to-sequence model for natural question generation. arXiv preprint arXiv:1908.04942","author":"Chen Yu","year":"2019","unstructured":"Yu Chen, Lingfei Wu, and Mohammed J Zaki. 2019c. Reinforcement learning based graph-to-sequence model for natural question generation. arXiv preprint arXiv:1908.04942 (2019)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00532"},{"key":"e_1_3_2_1_11_1","first-page":"3965","article-title":"Coatnet: Marrying convolution and attention for all data sizes","volume":"34","author":"Dai Zihang","year":"2021","unstructured":"Zihang Dai, Hanxiao Liu, Quoc V Le, and Mingxing Tan. 2021. Coatnet: Marrying convolution and attention for all data sizes. Advances in Neural Information Processing Systems, Vol. 34 (2021), 3965--3977.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","volume-title":"Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems","author":"Defferrard Micha\u00ebl","year":"2016","unstructured":"Micha\u00ebl Defferrard, Xavier Bresson, and Pierre Vandergheynst. 2016. Convolutional neural networks on graphs with fast localized spectral filtering. Advances in neural information processing systems, Vol. 29 (2016)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11963--11975","author":"Ding Xiaohan","year":"2022","unstructured":"Xiaohan Ding, Xiangyu Zhang, Jungong Han, and Guiguang Ding. 2022. Scaling up your kernels to 31x31: Revisiting large kernel design in cnns. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11963--11975."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01181"},{"key":"e_1_3_2_1_16_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_17_1","volume-title":"Container: Context aggregation network. arXiv preprint arXiv:2106.01401","author":"Gao Peng","year":"2021","unstructured":"Peng Gao, Jiasen Lu, Hongsheng Li, Roozbeh Mottaghi, and Aniruddha Kembhavi. 2021. Container: Context aggregation network. arXiv preprint arXiv:2106.01401 (2021)."},{"key":"e_1_3_2_1_18_1","volume-title":"Diffusion improves graph learning. Advances in neural information processing systems","author":"Gasteiger Johannes","year":"2019","unstructured":"Johannes Gasteiger, Stefan Wei\u00dfenberger, and Stephan G\u00fcnnemann. 2019. Diffusion improves graph learning. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_19_1","volume-title":"Digital selection and analogue amplification coexist in a cortex-inspired silicon circuit. nature","author":"Hahnloser Richard HR","year":"2000","unstructured":"Richard HR Hahnloser, Rahul Sarpeshkar, Misha A Mahowald, Rodney J Douglas, and H Sebastian Seung. 2000. Digital selection and analogue amplification coexist in a cortex-inspired silicon circuit. nature, Vol. 405, 6789 (2000), 947--951."},{"key":"e_1_3_2_1_20_1","volume-title":"Inductive representation learning on large graphs. Advances in neural information processing systems","author":"Hamilton Will","year":"2017","unstructured":"Will Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_21_1","volume-title":"Vision gnn: An image is worth graph of nodes. arXiv preprint arXiv:2206.00272","author":"Han Kai","year":"2022","unstructured":"Kai Han, Yunhe Wang, Jianyuan Guo, Yehui Tang, and Enhua Wu. 2022. Vision gnn: An image is worth graph of nodes. arXiv preprint arXiv:2206.00272 (2022)."},{"key":"e_1_3_2_1_22_1","volume-title":"Reference Twice: A Simple and Unified Baseline for Few-Shot Instance Segmentation. arXiv preprint arXiv:2301.01156","author":"Han Yue","year":"2023","unstructured":"Yue Han, Jiangning Zhang, Zhucun Xue, Chao Xu, Xintian Shen, Yabiao Wang, Chengjie Wang, Yong Liu, and Xiangtai Li. 2023. Reference Twice: A Simple and Unified Baseline for Few-Shot Instance Segmentation. arXiv preprint arXiv:2301.01156 (2023)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_24_1","volume-title":"Deep convolutional networks on graph-structured data. arXiv preprint arXiv:1506.05163","author":"Henaff Mikael","year":"2015","unstructured":"Mikael Henaff, Joan Bruna, and Yann LeCun. 2015. Deep convolutional networks on graph-structured data. arXiv preprint arXiv:1506.05163 (2015)."},{"key":"e_1_3_2_1_25_1","volume-title":"Bridging nonlinearities and stochastic regularizers with gaussian error linear units. CoRR, abs\/1606.08415","author":"Hendrycks Dan","year":"2016","unstructured":"Dan Hendrycks and Kevin Gimpel. 2016. Bridging nonlinearities and stochastic regularizers with gaussian error linear units. CoRR, abs\/1606.08415, Vol. 3 (2016)."},{"key":"e_1_3_2_1_26_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_1"},{"key":"e_1_3_2_1_28_1","volume-title":"Adaptive sampling towards fast graph representation learning. Advances in neural information processing systems","author":"Huang Wenbing","year":"2018","unstructured":"Wenbing Huang, Tong Zhang, Yu Rong, and Junzhou Huang. 2018. Adaptive sampling towards fast graph representation learning. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01175"},{"key":"e_1_3_2_1_30_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and Max Welling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2018.2879624"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00936"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00520"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475372"},{"key":"e_1_3_2_1_37_1","volume-title":"Uniformer: Unified transformer for efficient spatiotemporal representation learning. arXiv preprint arXiv:2201.04676","author":"Li Kunchang","year":"2022","unstructured":"Kunchang Li, Yali Wang, Peng Gao, Guanglu Song, Yu Liu, Hongsheng Li, and Yu Qiao. 2022b. Uniformer: Unified transformer for efficient spatiotemporal representation learning. arXiv preprint arXiv:2201.04676 (2022)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11604"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11691"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00522"},{"key":"e_1_3_2_1_41_1","volume-title":"Faster, Accurate, and Domain Agnostic Semantic Segmentation via Semantic Flow. ArXiv","author":"Li Xiangtai","year":"2022","unstructured":"Xiangtai Li, Jiangning Zhang, Yibo Yang, Guangliang Cheng, Kuiyuan Yang, Yu Tong, and Dacheng Tao. 2022c. SFNet: Faster, Accurate, and Domain Agnostic Semantic Segmentation via Semantic Flow. ArXiv, Vol. abs\/2207.04415 (2022)."},{"key":"e_1_3_2_1_42_1","volume-title":"Localvit: Bringing locality to vision transformers. arXiv preprint arXiv:2104.05707","author":"Li Yawei","year":"2021","unstructured":"Yawei Li, Kai Zhang, Jiezhang Cao, Radu Timofte, and Luc Van Gool. 2021a. Localvit: Bringing locality to vision transformers. arXiv preprint arXiv:2104.05707 (2021)."},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings, Part V 13","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part V 13. Springer, 740--755."},{"key":"e_1_3_2_1_44_1","volume-title":"Isometric propagation network for generalized zero-shot learning. arXiv preprint arXiv:2102.02038","author":"Liu Lu","year":"2021","unstructured":"Lu Liu, Tianyi Zhou, Guodong Long, Jing Jiang, Xuanyi Dong, and Chengqi Zhang. 2021b. Isometric propagation network for generalized zero-shot learning. arXiv preprint arXiv:2102.02038 (2021)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"e_1_3_2_1_48_1","volume-title":"International Conference on Machine Learning. PMLR, 6468--6478","author":"Luo Yadan","year":"2020","unstructured":"Yadan Luo, Zijian Wang, Zi Huang, and Mahsa Baktashmotlagh. 2020. Progressive graph learning for open-set domain adaptation. In International Conference on Machine Learning. PMLR, 6468--6478."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00846"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.576"},{"key":"e_1_3_2_1_51_1","volume-title":"International conference on machine learning. PMLR","author":"Niepert Mathias","year":"2016","unstructured":"Mathias Niepert, Mohamed Ahmed, and Konstantin Kutzkov. 2016. Learning convolutional neural networks for graphs. In International conference on machine learning. PMLR, 2014--2023."},{"key":"e_1_3_2_1_52_1","volume-title":"Dropedge: Towards deep graph convolutional networks on node classification. arXiv preprint arXiv:1907.10903","author":"Rong Yu","year":"2019","unstructured":"Yu Rong, Wenbing Huang, Tingyang Xu, and Junzhou Huang. 2019. Dropedge: Towards deep graph convolutional networks on node classification. arXiv preprint arXiv:1907.10903 (2019)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00531"},{"key":"e_1_3_2_1_54_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_56_1","volume-title":"International conference on machine learning. PMLR, 10347--10357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In International conference on machine learning. PMLR, 10347--10357."},{"key":"e_1_3_2_1_57_1","volume-title":"Graph attention networks. arXiv preprint arXiv:1710.10903","author":"Petar Velivc","year":"2017","unstructured":"Petar Velivc kovi\u0107, Guillem Cucurull, Arantxa Casanova, Adriana Romero, Pietro Lio, and Yoshua Bengio. 2017. Graph attention networks. arXiv preprint arXiv:1710.10903 (2017)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939753"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00297"},{"key":"e_1_3_2_1_60_1","volume-title":"CrossFormer: A Versatile Vision Transformer Hinging on Cross-scale Attention. arXiv preprint arXiv:2303.06908","author":"Wang Wenxiao","year":"2023","unstructured":"Wenxiao Wang, Wei Chen, Qibo Qiu, Long Chen, Boxi Wu, Binbin Lin, Xiaofei He, and Wei Liu. 2023. CrossFormer: A Versatile Vision Transformer Hinging on Cross-scale Attention. arXiv preprint arXiv:2303.06908 (2023)."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"e_1_3_2_1_62_1","volume-title":"Dynamic graph cnn for learning on point clouds. Acm Transactions On Graphics (tog)","author":"Wang Yue","year":"2019","unstructured":"Yue Wang, Yongbin Sun, Ziwei Liu, Sanjay E Sarma, Michael M Bronstein, and Justin M Solomon. 2019. Dynamic graph cnn for learning on point clouds. Acm Transactions On Graphics (tog), Vol. 38, 5 (2019), 1--12."},{"key":"e_1_3_2_1_63_1","volume-title":"Resnet strikes back: An improved training procedure in timm. arXiv preprint arXiv:2110.00476","author":"Wightman Ross","year":"2021","unstructured":"Ross Wightman, Hugo Touvron, and Herv\u00e9 J\u00e9gou. 2021. Resnet strikes back: An improved training procedure in timm. arXiv preprint arXiv:2110.00476 (2021)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"e_1_3_2_1_65_1","volume-title":"Henghui Ding, Yibo Yang, Xia Li, Jiangning Zhang, Yunhai Tong, Xudong Jiang","author":"Wu Jianzong","year":"2023","unstructured":"Jianzong Wu, Xiangtai Li, Shilin Xu Haobo Yuan, Henghui Ding, Yibo Yang, Xia Li, Jiangning Zhang, Yunhai Tong, Xudong Jiang, Bernard Ghanem, et al. 2023. Towards Open Vocabulary Learning: A Survey. arXiv preprint arXiv:2306.15880 (2023)."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00910"},{"key":"e_1_3_2_1_67_1","volume-title":"International conference on machine learning. PMLR, 5453--5462","author":"Xu Keyulu","year":"2018","unstructured":"Keyulu Xu, Chengtao Li, Yonglong Tian, Tomohiro Sonobe, Ken-ichi Kawarabayashi, and Stefanie Jegelka. 2018. Representation learning on graphs with jumping knowledge networks. In International conference on machine learning. PMLR, 5453--5462."},{"key":"e_1_3_2_1_68_1","first-page":"28522","article-title":"Vitae: Vision transformer advanced by exploring intrinsic inductive bias","volume":"34","author":"Xu Yufei","year":"2021","unstructured":"Yufei Xu, Qiming Zhang, Jing Zhang, and Dacheng Tao. 2021. Vitae: Vision transformer advanced by exploring intrinsic inductive bias. Advances in Neural Information Processing Systems, Vol. 34 (2021), 28522--28535.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_69_1","volume-title":"Focal self-attention for local-global interactions in vision transformers. arXiv preprint arXiv:2107.00641","author":"Yang Jianwei","year":"2021","unstructured":"Jianwei Yang, Chunyuan Li, Pengchuan Zhang, Xiyang Dai, Bin Xiao, Lu Yuan, and Jianfeng Gao. 2021. Focal self-attention for local-global interactions in vision transformers. arXiv preprint arXiv:2107.00641 (2021)."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01340"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6964"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"e_1_3_2_1_73_1","volume-title":"Rethinking Mobile Block for Efficient Neural Models. ICCV","author":"Zhang Jiangning","year":"2023","unstructured":"Jiangning Zhang, Xiangtai Li, Jian Li, Liang Liu, Zhucun Xue, Boshen Zhang, Zhengkai Jiang, Tianxin Huang, Yabiao Wang, and Chengjie Wang. 2023. Rethinking Mobile Block for Efficient Neural Models. ICCV (2023)."},{"key":"e_1_3_2_1_74_1","volume-title":"Eatformer: Improving vision transformer inspired by evolutionary algorithm. arXiv preprint arXiv:2206.09325","author":"Zhang Jiangning","year":"2022","unstructured":"Jiangning Zhang, Xiangtai Li, Yabiao Wang, Chengjie Wang, Yibo Yang, Yong Liu, and Dacheng Tao. 2022. Eatformer: Improving vision transformer inspired by evolutionary algorithm. arXiv preprint arXiv:2206.09325 (2022)."},{"key":"e_1_3_2_1_75_1","first-page":"26674","article-title":"Analogous to evolutionary algorithm: Designing a unified sequence model","volume":"34","author":"Zhang Jiangning","year":"2021","unstructured":"Jiangning Zhang, Chao Xu, Jian Li, Wenzhou Chen, Yabiao Wang, Ying Tai, Shuo Chen, Chengjie Wang, Feiyue Huang, and Yong Liu. 2021. Analogous to evolutionary algorithm: Designing a unified sequence model. NeurIPS, Vol. 34 (2021), 26674--26688.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00378"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00276"},{"key":"e_1_3_2_1_78_1","volume-title":"Pairnorm: Tackling oversmoothing in gnns. arXiv preprint arXiv:1909.12223","author":"Zhao Lingxiao","year":"2019","unstructured":"Lingxiao Zhao and Leman Akoglu. 2019. Pairnorm: Tackling oversmoothing in gnns. arXiv preprint arXiv:1909.12223 (2019)."},{"key":"e_1_3_2_1_79_1","first-page":"16410","article-title":"Gradinit: Learning to initialize neural networks for stable and efficient training","volume":"34","author":"Zhu Chen","year":"2021","unstructured":"Chen Zhu, Renkun Ni, Zheng Xu, Kezhi Kong, W Ronny Huang, and Tom Goldstein. 2021. Gradinit: Learning to initialize neural networks for stable and efficient training. Advances in Neural Information Processing Systems, Vol. 34 (2021), 16410--16422.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_80_1","volume-title":"International conference on learning representations.","author":"Zhu Hao","year":"2021","unstructured":"Hao Zhu and Piotr Koniusz. 2021. Simple spectral graph convolution. In International conference on learning representations."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612122","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612122","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:53:12Z","timestamp":1755820392000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612122"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":80,"alternative-id":["10.1145\/3581783.3612122","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612122","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}