{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:38:12Z","timestamp":1742913492201,"version":"3.40.3"},"publisher-location":"Cham","reference-count":63,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031736674"},{"type":"electronic","value":"9783031736681"}],"license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73668-1_9","type":"book-chapter","created":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T02:03:09Z","timestamp":1733018589000},"page":"144-160","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["An Optimal Control View of\u00a0LoRA and\u00a0Binary Controller Design for\u00a0Vision Transformers"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5735-4454","authenticated-orcid":false,"given":"Chi","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4164-9474","authenticated-orcid":false,"given":"Jingpu","family":"Cheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3903-3737","authenticated-orcid":false,"given":"Qianxiao","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,1]]},"reference":[{"key":"9_CR1","doi-asserted-by":"crossref","unstructured":"Aghajanyan, A., Zettlemoyer, L., Gupta, S.: Intrinsic dimensionality explains the effectiveness of language model fine-tuning. arXiv preprint arXiv:2012.13255 (2020)","DOI":"10.18653\/v1\/2021.acl-long.568"},{"issue":"2","key":"9_CR2","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1137\/1022026","volume":"22","author":"Z Artstein","year":"1980","unstructured":"Artstein, Z.: Discrete and continuous bang-bang and facial spaces or: look for the extreme points. SIAM Rev. 22(2), 172\u2013185 (1980)","journal-title":"SIAM Rev."},{"key":"9_CR3","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Bellman, R., Glicksberg, I., Gross, O.: On the \u201cbang-bang\u201d control problem. Q. Appl. Math. 14(1), 11\u201318 (1956)","DOI":"10.1090\/qam\/78516"},{"key":"9_CR5","unstructured":"Benning, M., Celledoni, E., Ehrhardt, M.J., Owren, B., Sch\u00f6nlieb, C.B.: Deep learning as optimal control problems: models and numerical methods. arXiv preprint arXiv:1904.05657 (2019)"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Bishop, R.C.D.R.H.: Modern control systems (2011)","DOI":"10.1016\/B978-0-08-096634-2.00034-7"},{"key":"9_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"446","DOI":"10.1007\/978-3-319-10599-4_29","volume-title":"Computer Vision \u2013 ECCV 2014","author":"L Bossard","year":"2014","unstructured":"Bossard, L., Guillaumin, M., Van Gool, L.: Food-101 \u2013 mining discriminative components with random forests. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 446\u2013461. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_29"},{"key":"9_CR8","unstructured":"Brock, A., De, S., Smith, S.L., Simonyan, K.: High-performance large-scale image recognition without normalization. In: International Conference on Machine Learning, pp. 1059\u20131071. PMLR (2021)"},{"key":"9_CR9","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown, T., et al.: Language models are few-shot learners. Adv. Neural. Inf. Process. Syst. 33, 1877\u20131901 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR10","doi-asserted-by":"crossref","unstructured":"Chang, B., Meng, L., Haber, E., Ruthotto, L., Begert, D., Holtham, E.: Reversible architectures for arbitrarily deep residual neural networks. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11668"},{"key":"9_CR11","first-page":"16664","volume":"35","author":"S Chen","year":"2022","unstructured":"Chen, S., et al.: Adaptformer: adapting vision transformers for scalable visual recognition. Adv. Neural. Inf. Process. Syst. 35, 16664\u201316678 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Chen, T., Zhang, Z., Ouyang, X., Liu, Z., Shen, Z., Wang, Z.: \u201cbnn-bn=?\u201d: training binary neural networks without batch normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4619\u20134629 (2021)","DOI":"10.1109\/CVPRW53098.2021.00520"},{"issue":"2","key":"9_CR13","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1002\/oca.4660030201","volume":"3","author":"FL Chernousko","year":"1982","unstructured":"Chernousko, F.L., Lyubushin, A.: Method of successive approximations for solution of optimal control problems. Optimal Control Appl. Methods 3(2), 101\u2013114 (1982)","journal-title":"Optimal Control Appl. Methods"},{"key":"9_CR14","unstructured":"Courbariaux, M., Bengio, Y., David, J.P.: Binaryconnect: training deep neural networks with binary weights during propagations. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"9_CR15","unstructured":"Craig, J.J.: Introduction to Robotics. Pearson Education (2006)"},{"key":"9_CR16","unstructured":"Dettmers, T., Pagnoni, A., Holtzman, A., Zettlemoyer, L.: Qlora: efficient finetuning of quantized LLMs. arXiv preprint arXiv:2305.14314 (2023)"},{"key":"9_CR17","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"9_CR18","unstructured":"Franklin, G.F., Powell, J.D., Emami-Naeini, A., Powell, J.D.: Feedback Control of Dynamic Systems, vol.\u00a04. Prentice Hall, Upper Saddle River (2002)"},{"key":"9_CR19","unstructured":"Gelfand, I.M., Silverman, R.A., et\u00a0al.: Calculus of variations. Courier (2000)"},{"issue":"1","key":"9_CR20","doi-asserted-by":"publisher","DOI":"10.1088\/1361-6420\/aa9a90","volume":"34","author":"E Haber","year":"2017","unstructured":"Haber, E., Ruthotto, L.: Stable architectures for deep neural networks. Inverse Prob. 34(1), 014004 (2017)","journal-title":"Inverse Prob."},{"key":"9_CR21","unstructured":"Houlsby, N., et al.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"9_CR22","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"9_CR23","unstructured":"Hubara, I., Courbariaux, M., Soudry, D., El-Yaniv, R., Bengio, Y.: Binarized neural networks. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"9_CR24","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456. PMLR (2015)"},{"key":"9_CR25","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"709","DOI":"10.1007\/978-3-031-19827-4_41","volume-title":"European Conference on Computer Vision","author":"M Jia","year":"2022","unstructured":"Jia, M., et al.: Visual prompt tuning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13693, pp. 709\u2013727. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19827-4_41"},{"key":"9_CR26","first-page":"1022","volume":"34","author":"R Karimi Mahabadi","year":"2021","unstructured":"Karimi Mahabadi, R., Henderson, J., Ruder, S.: Compacter: efficient low-rank hypercomplex adapter layers. Adv. Neural. Inf. Process. Syst. 34, 1022\u20131035 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Kerimkulov, B., \u0160i\u0161ka, D., Szpruch, L.: A modified MSA for stochastic control problems. Appl. Math. Optim. 1\u201320 (2021)","DOI":"10.1007\/s00245-021-09750-2"},{"key":"9_CR28","unstructured":"Kirk, D.E.: Optimal control theory: an introduction. Courier Corporation (2004)"},{"key":"9_CR29","unstructured":"Kone\u010dn\u1ef3, J., McMahan, B., Ramage, D.: Federated optimization: distributed optimization beyond the datacenter. arXiv preprint arXiv:1511.03575 (2015)"},{"key":"9_CR30","unstructured":"Krizhevsky, A., Hinton, G., et\u00a0al.: Learning multiple layers of features from tiny images (2009)"},{"key":"9_CR31","unstructured":"Kwakernaak, H., Sivan, R.: Linear Optimal Control Systems, vol.\u00a01. Wiley-Interscience, New York (1972)"},{"key":"9_CR32","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. arXiv preprint arXiv:2104.08691 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"9_CR33","unstructured":"Li, F., Liu, B., Wang, X., Zhang, B., Yan, J.: Ternary weight networks. arXiv preprint arXiv:1605.04711 (2016)"},{"issue":"165","key":"9_CR34","first-page":"1","volume":"18","author":"Q Li","year":"2018","unstructured":"Li, Q., Chen, L., Tai, C., Weinan, E.: Maximum principle based algorithms for deep learning. J. Mach. Learn. Res. 18(165), 1\u201329 (2018)","journal-title":"J. Mach. Learn. Res."},{"key":"9_CR35","unstructured":"Li, Q., Hao, S.: An optimal control approach to deep learning and applications to discrete-weight neural networks. In: International Conference on Machine Learning, pp. 2985\u20132994. PMLR (2018)"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190 (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"9_CR37","doi-asserted-by":"crossref","unstructured":"Liberzon, D.: Calculus of Variations and Optimal Control Theory: A Concise Introduction. Princeton University Press, Princeton (2011)","DOI":"10.2307\/j.ctvcm4g0s"},{"key":"9_CR38","first-page":"7474","volume":"33","author":"M Lin","year":"2020","unstructured":"Lin, M., et al.: Rotated binary neural network. Adv. Neural. Inf. Process. Syst. 33, 7474\u20137485 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR39","doi-asserted-by":"crossref","unstructured":"Lin, Z., Madotto, A., Fung, P.: Exploring versatile generative language model via parameter-efficient transfer learning. arXiv preprint arXiv:2004.03829 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.41"},{"key":"9_CR40","unstructured":"Luo, G., et al.: Towards efficient visual adaption via structural re-parameterization. arXiv preprint arXiv:2302.08106 (2023)"},{"issue":"1","key":"9_CR41","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/0041-5553(82)90160-4","volume":"22","author":"A Lyubushin","year":"1982","unstructured":"Lyubushin, A.: Modifications of the method of successive approximations for solving optimal control problems. USSR Comput. Math. Math. Phys. 22(1), 29\u201334 (1982)","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"9_CR42","unstructured":"McMahan, B., Moore, E., Ramage, D., Hampson, S., y\u00a0Arcas, B.A.: Communication-efficient learning of deep networks from decentralized data. In: Artificial Intelligence and Statistics, pp. 1273\u20131282. PMLR (2017)"},{"key":"9_CR43","unstructured":"Netzer, Y., Wang, T., Coates, A., Bissacco, A., Wu, B., Ng, A.Y.: Reading digits in natural images with unsupervised feature learning (2011)"},{"key":"9_CR44","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-56393-0","volume-title":"Model-Reference Adaptive Control","author":"NT Nguyen","year":"2018","unstructured":"Nguyen, N.T.: Model-Reference Adaptive Control. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-56393-0"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Pontryagin, L.S.: Mathematical Theory of Optimal Processes. Routledge, Milton Park (2018)","DOI":"10.1201\/9780203749319"},{"key":"9_CR46","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et\u00a0al.: Improving language understanding by generative pre-training (2018)"},{"issue":"8","key":"9_CR47","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"9_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1007\/978-3-319-46493-0_32","volume-title":"Computer Vision \u2013 ECCV 2016","author":"M Rastegari","year":"2016","unstructured":"Rastegari, M., Ordonez, V., Redmon, J., Farhadi, A.: XNOR-Net: ImageNet classification using binary convolutional neural networks. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 525\u2013542. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_32"},{"key":"9_CR49","unstructured":"Rebuffi, S.A., Bilen, H., Vedaldi, A.: Learning multiple visual domains with residual adapters. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"9_CR50","doi-asserted-by":"crossref","unstructured":"Sohn, K., et al.: Visual prompt tuning for generative transfer learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19840\u201319851 (2023)","DOI":"10.1109\/CVPR52729.2023.01900"},{"issue":"2","key":"9_CR51","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1137\/0302013","volume":"2","author":"L Sonneborn","year":"1964","unstructured":"Sonneborn, L., Van Vleck, F.: The bang-bang principle for linear control systems. J. Soc. Ind. Appl. Math. Ser. A Control 2(2), 151\u2013159 (1964)","journal-title":"J. Soc. Ind. Appl. Math. Ser. A Control"},{"issue":"3","key":"9_CR52","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/37.588098","volume":"17","author":"HJ Sussmann","year":"1997","unstructured":"Sussmann, H.J., Willems, J.C.: 300 years of optimal control: from the brachystochrone to the maximum principle. IEEE Control Syst. Mag. 17(3), 32\u201344 (1997)","journal-title":"IEEE Control Syst. Mag."},{"key":"9_CR53","unstructured":"Touvron, H., Vedaldi, A., Douze, M., J\u00e9gou, H.: Fixing the train-test resolution discrepancy. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"issue":"5","key":"9_CR54","first-page":"1","volume":"1","author":"E Weinan","year":"2017","unstructured":"Weinan, E.: A proposal on machine learning via dynamical systems. Commun. Math. Stat. 1(5), 1\u201311 (2017)","journal-title":"Commun. Math. Stat."},{"key":"9_CR55","unstructured":"Xu, Y., et al.: QA-LORA: quantization-aware low-rank adaptation of large language models. arXiv preprint arXiv:2309.14717 (2023)"},{"key":"9_CR56","unstructured":"Zeng, Y., Lee, K.: The expressive power of low-rank adaptation. arXiv preprint arXiv:2310.17513 (2023)"},{"key":"9_CR57","unstructured":"Zhang, C., Ekanut, S., Zhen, L., Li, Z.: Augmented multi-party computation against gradient leakage in federated learning. IEEE Trans. Big Data (2022)"},{"key":"9_CR58","unstructured":"Zhang, C., Jingpu, C., Xu, Y., Li, Q.: Parameter-efficient fine-tuning with controls. In: Forty-First International Conference on Machine Learning (2024)"},{"key":"9_CR59","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103575","volume":"301","author":"C Zhang","year":"2021","unstructured":"Zhang, C., Li, Q.: Distributed optimization for degenerate loss functions arising from over-parameterization. Artif. Intell. 301, 103575 (2021)","journal-title":"Artif. Intell."},{"key":"9_CR60","doi-asserted-by":"crossref","unstructured":"Zhang, C., et al.: Generative gradient inversion via over-parameterized networks in federated learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5126\u20135135 (2023)","DOI":"10.1109\/ICCV51070.2023.00473"},{"key":"9_CR61","unstructured":"Zhang, D., Zhang, T., Lu, Y., Zhu, Z., Dong, B.: You only propagate once: accelerating adversarial training via maximal principle. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"9_CR62","unstructured":"Zhou, S., Wu, Y., Ni, Z., Zhou, X., Wen, H., Zou, Y.: Dorefa-net: training low bitwidth convolutional neural networks with low bitwidth gradients. arXiv preprint arXiv:1606.06160 (2016)"},{"key":"9_CR63","unstructured":"Zhu, C., Han, S., Mao, H., Dally, W.J.: Trained ternary quantization. arXiv preprint arXiv:1612.01064 (2016)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73668-1_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T02:07:28Z","timestamp":1733018848000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73668-1_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,1]]},"ISBN":["9783031736674","9783031736681"],"references-count":63,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73668-1_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,1]]},"assertion":[{"value":"1 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}