{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:20:51Z","timestamp":1765340451232,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"DOI":"10.13039\/100007219","name":"Natural Science Foundation of Shanghai","doi-asserted-by":"publisher","award":["24ZR1490400"],"award-info":[{"award-number":["24ZR1490400"]}],"id":[{"id":"10.13039\/100007219","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372117, 62402120, 62472102"],"award-info":[{"award-number":["62372117, 62402120, 62472102"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754916","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"2968-2976","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Scaling Laws for Data-Efficient Visual Transfer Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8006-9757","authenticated-orcid":false,"given":"Wenxuan","family":"Yang","sequence":"first","affiliation":[{"name":"College of Computer Science and Artificial Intelligence, Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8661-1704","authenticated-orcid":false,"given":"Qingqv","family":"Wei","sequence":"additional","affiliation":[{"name":"College of Computer Science and Artificial Intelligence, Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5577-5773","authenticated-orcid":false,"given":"Chenxi","family":"Ma","sequence":"additional","affiliation":[{"name":"College of Computer Science and Artificial Intelligence, Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7677-4772","authenticated-orcid":false,"given":"Weimin","family":"Tan","sequence":"additional","affiliation":[{"name":"College of Computer Science and Artificial Intelligence, Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0256-9682","authenticated-orcid":false,"given":"Bo","family":"Yan","sequence":"additional","affiliation":[{"name":"College of Computer Science and Artificial Intelligence, Shanghai Key Laboratory of Intelligent Information Processing, Fudan University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"International Conference on Machine Learning. PMLR, 265-279","author":"Aghajanyan Armen","year":"2023","unstructured":"Armen Aghajanyan, Lili Yu, Alexis Conneau, Wei-Ning Hsu, Karen Hambardzumyan, Susan Zhang, Stephen Roller, Naman Goyal, Omer Levy, and Luke Zettlemoyer. 2023. Scaling laws for generative mixed-modal language models. In International Conference on Machine Learning. PMLR, 265-279."},{"key":"e_1_3_2_1_3_1","first-page":"16406","article-title":"Getting vit in shape: Scaling laws for compute-optimal model design","volume":"36","author":"Alabdulmohsin Ibrahim M","year":"2023","unstructured":"Ibrahim M Alabdulmohsin, Xiaohua Zhai, Alexander Kolesnikov, and Lucas Beyer. 2023. Getting vit in shape: Scaling laws for compute-optimal model design. Advances in Neural Information Processing Systems, Vol. 36 (2023), 16406-16425.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01065"},{"key":"e_1_3_2_1_5_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877-1901."},{"key":"e_1_3_2_1_6_1","volume-title":"Distillation Scaling Laws. arXiv preprint arXiv:2502.08606","author":"Busbridge Dan","year":"2025","unstructured":"Dan Busbridge, Amitis Shidani, Floris Weers, Jason Ramapuram, Etai Littwin, and Russ Webb. 2025. Distillation Scaling Laws. arXiv preprint arXiv:2502.08606 (2025)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00489"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_9_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_10_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research, Vol. 23, 120 (2022), 1-39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_11_1","volume-title":"International Conference on Machine Learning. PMLR, 11117-11143","author":"Geiping Jonas","year":"2023","unstructured":"Jonas Geiping and Tom Goldstein. 2023. Cramming: Training a Language Model on a single GPU in one day. In International Conference on Machine Learning. PMLR, 11117-11143."},{"key":"e_1_3_2_1_12_1","volume-title":"Overview of the Transformer-based Models for NLP Tasks. In 2020 15th Conference on computer science and information systems (FedCSIS). IEEE, 179-183","author":"Gillioz Anthony","year":"2020","unstructured":"Anthony Gillioz, Jacky Casas, Elena Mugellini, and Omar Abou Khaled. 2020. Overview of the Transformer-based Models for NLP Tasks. In 2020 15th Conference on computer science and information systems (FedCSIS). IEEE, 179-183."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Kai Han Yunhe Wang Hanting Chen Xinghao Chen Jianyuan Guo Zhenhua Liu Yehui Tang An Xiao Chunjing Xu Yixing Xu et al. 2022. A survey on vision transformer. IEEE transactions on pattern analysis and machine intelligence Vol. 45 1 (2022) 87-110.","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"e_1_3_2_1_15_1","unstructured":"Tom Henighan Jared Kaplan Mor Katz Mark Chen Christopher Hesse Jacob Jackson Heewoo Jun Tom B Brown Prafulla Dhariwal Scott Gray et al. 2020. Scaling laws for autoregressive generative modeling. arXiv preprint arXiv:2010.14701 (2020)."},{"key":"e_1_3_2_1_16_1","volume-title":"Scaling laws for transfer. arXiv preprint arXiv:2102.01293","author":"Hernandez Danny","year":"2021","unstructured":"Danny Hernandez, Jared Kaplan, Tom Henighan, and Sam McCandlish. 2021. Scaling laws for transfer. arXiv preprint arXiv:2102.01293 (2021)."},{"key":"e_1_3_2_1_17_1","volume-title":"Yang Yang, and Yanqi Zhou.","author":"Hestness Joel","year":"2017","unstructured":"Joel Hestness, Sharan Narang, Newsha Ardalani, Gregory Diamos, Heewoo Jun, Hassan Kianinejad, Md Mostofa Ali Patwary, Yang Yang, and Yanqi Zhou. 2017. Deep learning scaling is predictable. Empirically. arXiv, Vol. 1712 (2017), 2."},{"key":"e_1_3_2_1_18_1","volume-title":"Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, et al.","author":"Hoffmann Jordan","year":"2022","unstructured":"Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza Rutherford, Diego de Las Casas, Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, et al., 2022. Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)."},{"key":"e_1_3_2_1_19_1","volume-title":"International conference on machine learning. PMLR, 4904-4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904-4916."},{"key":"e_1_3_2_1_20_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_1_21_1","volume-title":"Fahad Shahbaz Khan, and Mubarak Shah.","author":"Khan Salman","year":"2022","unstructured":"Salman Khan, Muzammal Naseer, Munawar Hayat, Syed Waqas Zamir, Fahad Shahbaz Khan, and Mubarak Shah. 2022. Transformers in vision: A survey. ACM computing surveys (CSUR), Vol. 54, 10s (2022), 1-41."},{"key":"e_1_3_2_1_22_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Alina Kuznetsova Hassan Rom Neil Alldrin Jasper Uijlings Ivan Krasin Jordi Pont-Tuset Shahab Kamali Stefan Popov Matteo Malloci Alexander Kolesnikov et al. 2020. The open images dataset v4: Unified image classification object detection and visual relationship detection at scale. International journal of computer vision Vol. 128 7 (2020) 1956-1981.","DOI":"10.1007\/s11263-020-01316-z"},{"key":"e_1_3_2_1_24_1","volume-title":"A survey of transformers. AI open","author":"Lin Tianyang","year":"2022","unstructured":"Tianyang Lin, Yuxin Wang, Xiangyang Liu, and Xipeng Qiu. 2022. A survey of transformers. AI open, Vol. 3 (2022), 111-132."},{"key":"e_1_3_2_1_25_1","first-page":"740","volume-title":"Switzerland","author":"Lin Tsung-Yi","year":"2014","unstructured":"Tsung-Yi Lin, Michael Maire, Serge Belongie, James Hays, Pietro Perona, Deva Ramanan, Piotr Doll\u00e1r, and C Lawrence Zitnick. 2014. Microsoft coco: Common objects in context. In Computer vision-ECCV 2014: 13th European conference, zurich, Switzerland, September 6-12, 2014, proceedings, part v 13. Springer, 740-755."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2020.105760"},{"key":"e_1_3_2_1_27_1","first-page":"72983","article-title":"Scaling open-vocabulary object detection","volume":"36","author":"Minderer Matthias","year":"2023","unstructured":"Matthias Minderer, Alexey Gritsenko, and Neil Houlsby. 2023. Scaling open-vocabulary object detection. Advances in Neural Information Processing Systems, Vol. 36 (2023), 72983-73007.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_28_1","first-page":"50358","article-title":"Scaling data-constrained language models","volume":"36","author":"Muennighoff Niklas","year":"2023","unstructured":"Niklas Muennighoff, Alexander Rush, Boaz Barak, Teven Le Scao, Nouamane Tazi, Aleksandra Piktus, Sampo Pyysalo, Thomas Wolf, and Colin A Raffel. 2023. Scaling data-constrained language models. Advances in Neural Information Processing Systems, Vol. 36 (2023), 50358-50376.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.3390\/info14040242"},{"key":"e_1_3_2_1_30_1","volume-title":"International conference on machine learning. PMLR, 5142-5151","author":"Phuong Mary","year":"2019","unstructured":"Mary Phuong and Christoph Lampert. 2019. Towards understanding knowledge distillation. In International conference on machine learning. PMLR, 5142-5151."},{"key":"e_1_3_2_1_31_1","volume-title":"Scaling laws for the few-shot adaptation of pre-trained image classifiers. arXiv preprint arXiv:2110.06990","author":"Prato Gabriele","year":"2021","unstructured":"Gabriele Prato, Simon Guiroy, Ethan Caballero, Irina Rish, and Sarath Chandar. 2021. Scaling laws for the few-shot adaptation of pre-trained image classifiers. arXiv preprint arXiv:2110.06990 (2021)."},{"key":"e_1_3_2_1_32_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_33_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog Vol. 1 8 (2019) 9."},{"key":"e_1_3_2_1_34_1","volume-title":"A constructive prediction of the generalization error across scales. arXiv preprint arXiv:1909.12673","author":"Rosenfeld Jonathan S","year":"2019","unstructured":"Jonathan S Rosenfeld, Amir Rosenfeld, Yonatan Belinkov, and Nir Shavit. 2019. A constructive prediction of the generalization error across scales. arXiv preprint arXiv:1909.12673 (2019)."},{"key":"e_1_3_2_1_35_1","first-page":"19523","article-title":"Beyond neural scaling laws: beating power law scaling via data pruning","volume":"35","author":"Sorscher Ben","year":"2022","unstructured":"Ben Sorscher, Robert Geirhos, Shashank Shekhar, Surya Ganguli, and Ari Morcos. 2022. Beyond neural scaling laws: beating power law scaling via data pruning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 19523-19536.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.97"},{"key":"e_1_3_2_1_37_1","volume-title":"International conference on machine learning. PMLR, 6105-6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105-6114."},{"key":"e_1_3_2_1_38_1","volume-title":"William Fedus, Jinfeng Rao, Sharan Narang, Vinh Q Tran, Dani Yogatama, and Donald Metzler.","author":"Tay Yi","year":"2022","unstructured":"Yi Tay, Mostafa Dehghani, Samira Abnar, Hyung Won Chung, William Fedus, Jinfeng Rao, Sharan Narang, Vinh Q Tran, Dani Yogatama, and Donald Metzler. 2022. Scaling laws vs model architectures: How does inductive bias influence scaling? arXiv preprint arXiv:2207.10551 (2022)."},{"key":"e_1_3_2_1_39_1","volume-title":"Sharan Narang, Dani Yogatama, Ashish Vaswani, and Donald Metzler.","author":"Tay Yi","year":"2021","unstructured":"Yi Tay, Mostafa Dehghani, Jinfeng Rao, William Fedus, Samira Abnar, Hyung Won Chung, Sharan Narang, Dani Yogatama, Ashish Vaswani, and Donald Metzler. 2021. Scale efficiently: Insights from pre-training and fine-tuning transformers. arXiv preprint arXiv:2109.10686 (2021)."},{"key":"e_1_3_2_1_40_1","volume-title":"International conference on machine learning. PMLR, 10347-10357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In International conference on machine learning. PMLR, 10347-10357."},{"key":"e_1_3_2_1_41_1","first-page":"46830","article-title":"Image captioners are scalable vision learners too","volume":"36","author":"Tschannen Michael","year":"2023","unstructured":"Michael Tschannen, Manoj Kumar, Andreas Steiner, Xiaohua Zhai, Neil Houlsby, and Lucas Beyer. 2023. Image captioners are scalable vision learners too. Advances in Neural Information Processing Systems, Vol. 36 (2023), 46830-46855.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_42_1","volume-title":"International Conference on Machine Learning. PMLR, 36193-36204","author":"Wang Peihao","year":"2023","unstructured":"Peihao Wang, Rameswar Panda, and Zhangyang Wang. 2023. Data efficient neural scaling law via model reusing. In International Conference on Machine Learning. PMLR, 36193-36204."},{"key":"e_1_3_2_1_43_1","volume-title":"Billion-scale semi-supervised learning for image classification. arXiv preprint arXiv:1905.00546","author":"Yalniz I Zeki","year":"2019","unstructured":"I Zeki Yalniz, Herv\u00e9 J\u00e9gou, Kan Chen, Manohar Paluri, and Dhruv Mahajan. 2019. Billion-scale semi-supervised learning for image classification. arXiv preprint arXiv:1905.00546 (2019)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754916","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:18:13Z","timestamp":1765340293000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754916"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":44,"alternative-id":["10.1145\/3746027.3754916","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754916","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}