{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T14:49:25Z","timestamp":1743086965035,"version":"3.40.3"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031730092"},{"type":"electronic","value":"9783031730108"}],"license":[{"start":{"date-parts":[[2024,11,10]],"date-time":"2024-11-10T00:00:00Z","timestamp":1731196800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,10]],"date-time":"2024-11-10T00:00:00Z","timestamp":1731196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73010-8_2","type":"book-chapter","created":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T13:12:00Z","timestamp":1731157920000},"page":"18-34","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Training with\u00a0Denoised Neural Weights"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3912-097X","authenticated-orcid":false,"given":"Yifan","family":"Gong","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3882-5484","authenticated-orcid":false,"given":"Zheng","family":"Zhan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1240-4785","authenticated-orcid":false,"given":"Yanyu","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0179-467X","authenticated-orcid":false,"given":"Yerlan","family":"Idelbayev","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9662-4552","authenticated-orcid":false,"given":"Andrey","family":"Zharkov","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4958-601X","authenticated-orcid":false,"given":"Kfir","family":"Aberman","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3465-1592","authenticated-orcid":false,"given":"Sergey","family":"Tulyakov","sequence":"additional","affiliation":[]},{"given":"Yanzhi","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0511-7473","authenticated-orcid":false,"given":"Jian","family":"Ren","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,10]]},"reference":[{"key":"2_CR1","unstructured":"Bachlechner, T., Majumder, B.P., Mao, H., Cottrell, G., McAuley, J.: Rezero is all you need: Fast convergence at large depth. In: Uncertainty in Artificial Intelligence, pp. 1352\u20131361. PMLR (2021)"},{"key":"2_CR2","unstructured":"Bellec, G., Kappel, D., Maass, W., Legenstein, R.: Deep rewiring: training very sparse deep networks. arXiv preprint arXiv:1711.05136 (2017)"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: InstructPix2Pix: learning to follow image editing instructions. arXiv preprint arXiv:2211.09800 (2022)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"2_CR4","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR5","unstructured":"Chiang, W.L., et\u00a0al.: Vicuna: an open-source chatbot impressing GPT-4 with 90%* ChatGPT quality (2023). https:\/\/vicuna.lmsys.org. Accessed 14 Apr 2023"},{"key":"2_CR6","unstructured":"Cordonnier, J.B., Loukas, A., Jaggi, M.: On the relationship between self-attention and convolutional layers. arXiv preprint arXiv:1911.03584 (2019)"},{"key":"2_CR7","first-page":"19964","volume":"33","author":"S De","year":"2020","unstructured":"De, S., Smith, S.: Batch normalization biases residual blocks towards the identity function in deep networks. Adv. Neural. Inf. Process. Syst. 33, 19964\u201319975 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR8","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: QLoRA: efficient finetuning of quantized LLMs. Adv. Neural. Inf. Process. Syst. 36 (2024)"},{"key":"2_CR9","unstructured":"Dettmers, T., Zettlemoyer, L.: Sparse networks from scratch: faster training without losing performance. arXiv preprint arXiv:1907.04840 (2019)"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"d\u2019Ascoli, S., Touvron, H., Leavitt, M.L., Morcos, A.S., Biroli, G., Sagun, L.: ConViT: improving vision transformers with soft convolutional inductive biases. In: International Conference on Machine Learning, pp. 2286\u20132296. PMLR (2021)","DOI":"10.1088\/1742-5468\/ac9830"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Erko\u00e7, Z., Ma, F., Shan, Q., Nie\u00dfner, M., Dai, A.: HyperDiffusion: generating implicit neural fields with weight-space diffusion. arXiv preprint arXiv:2303.17015 (2023)","DOI":"10.1109\/ICCV51070.2023.01315"},{"key":"2_CR12","unstructured":"Evci, U., Gale, T., Menick, J., Castro, P.S., Elsen, E.: Rigging the lottery: making all tickets winners. In: International Conference on Machine Learning, pp. 2943\u20132952. PMLR (2020)"},{"key":"2_CR13","doi-asserted-by":"crossref","unstructured":"Geng, Z., et\u00a0al.: InstructDiffusion: a generalist modeling interface for vision tasks. arXiv preprint arXiv:2309.03895 (2023)","DOI":"10.1109\/CVPR52733.2024.01208"},{"key":"2_CR14","unstructured":"Gong, Y., et\u00a0al.: E2GAN: efficient training of efficient GANs for image-to-image translation. arXiv preprint arXiv:2401.06127 (2024)"},{"issue":"11","key":"2_CR15","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"2_CR16","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"2_CR17","unstructured":"Huang, X.S., Perez, F., Ba, J., Volkovs, M.: Improving transformer optimization through better initialization. In: International Conference on Machine Learning, pp. 4475\u20134483. PMLR (2020)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134 (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"2_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Kong, Z., et\u00a0al.: Peeling the onion: hierarchical reduction of data redundancy for efficient vision transformer training. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 8360\u20138368 (2023)","DOI":"10.1609\/aaai.v37i7.26008"},{"key":"2_CR22","unstructured":"Lee, N., Ajanthan, T., Torr, P.H.: SNIP: single-shot network pruning based on connection sensitivity. arXiv preprint arXiv:1810.02340 (2018)"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Mokady, R., Hertz, A., Aberman, K., Pritch, Y., Cohen-Or, D.: Null-text inversion for editing real images using guided diffusion models. arXiv preprint arXiv:2211.09794 (2022)","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"2_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1007\/978-3-030-58545-7_19","volume-title":"Computer Vision \u2013 ECCV 2020","author":"T Park","year":"2020","unstructured":"Park, T., Efros, A.A., Zhang, R., Zhu, J.-Y.: Contrastive learning for unpaired image-to-image translation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020, Part IX. LNCS, vol. 12354, pp. 319\u2013345. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_19"},{"key":"2_CR25","doi-asserted-by":"crossref","unstructured":"Parmar, G., Kumar\u00a0Singh, K., Zhang, R., Li, Y., Lu, J., Zhu, J.Y.: Zero-shot image-to-image translation. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591513"},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Parmar, G., Zhang, R., Zhu, J.Y.: On aliased resizing and surprising subtleties in GAN evaluation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11410\u201311420 (2022)","DOI":"10.1109\/CVPR52688.2022.01112"},{"key":"2_CR27","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Ruiz, N., et al.: HyperDreamBooth: hypernetworks for fast personalization of text-to-image models. arXiv preprint arXiv:2307.06949 (2023)","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"2_CR30","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"2_CR31","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"2_CR32","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"2_CR33","first-page":"1796","volume":"33","author":"X Sun","year":"2020","unstructured":"Sun, X., et al.: Ultra-low precision 4-bit training of deep neural networks. Adv. Neural. Inf. Process. Syst. 33, 1796\u20131807 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR34","first-page":"6377","volume":"33","author":"H Tanaka","year":"2020","unstructured":"Tanaka, H., Kunin, D., Yamins, D.L., Ganguli, S.: Pruning neural networks without any data by iteratively conserving synaptic flow. Adv. Neural. Inf. Process. Syst. 33, 6377\u20136389 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR35","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural. Inf. Process. Syst. 30 (2017)"},{"key":"2_CR36","unstructured":"Venkataramani, S., et\u00a0al.: RaPiD: AI accelerator for ultra-low precision training and inference. In: 2021 ACM\/IEEE 48th Annual International Symposium on Computer Architecture (ISCA), pp. 153\u2013166. IEEE (2021)"},{"key":"2_CR37","unstructured":"Wang, C., Zhang, G., Grosse, R.: Picking winning tickets before training by preserving gradient flow. arXiv preprint arXiv:2002.07376 (2020)"},{"key":"2_CR38","unstructured":"Wang, K., et al.: Neural network diffusion (2024)"},{"key":"2_CR39","first-page":"20366","volume":"35","author":"Z Wang","year":"2022","unstructured":"Wang, Z., et al.: SparCL: sparse continual learning on the edge. Adv. Neural. Inf. Process. Syst. 35, 20366\u201320380 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR40","unstructured":"Wortsman, M., Dettmers, T., Zettlemoyer, L., Morcos, A., Farhadi, A., Schmidt, L.: Stable and low-precision training for large-scale vision-language models. Adv. Neural. Inf. Process. Syst. 36 (2024)"},{"key":"2_CR41","first-page":"20838","volume":"34","author":"G Yuan","year":"2021","unstructured":"Yuan, G., et al.: MEST: Accurate and fast memory-economic sparse training framework on the edge. Adv. Neural. Inf. Process. Syst. 34, 20838\u201320850 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR42","unstructured":"Zhang, H., Dauphin, Y.N., Ma, T.: Fixup initialization: residual learning without normalization. arXiv preprint arXiv:1901.09321 (2019)"},{"key":"2_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, L., Agrawala, M.: Adding conditional control to text-to-image diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"2_CR44","unstructured":"Zhao, S., et al.: Large scale image completion via co-modulated generative adversarial networks. arXiv preprint arXiv:2103.10428 (2021)"},{"key":"2_CR45","doi-asserted-by":"crossref","unstructured":"Zhu, J.Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73010-8_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T14:02:18Z","timestamp":1731160938000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73010-8_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,10]]},"ISBN":["9783031730092","9783031730108"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73010-8_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,10]]},"assertion":[{"value":"10 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}