{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:01Z","timestamp":1750309501125,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T00:00:00Z","timestamp":1719100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,23]]},"DOI":"10.1145\/3649329.3655664","type":"proceedings-article","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:27:22Z","timestamp":1731007642000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Zeroth-Order Optimization of Optical Neural Networks with Linear Combination Natural Gradient and Calibrated Model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4831-9286","authenticated-orcid":false,"given":"Hiroshi","family":"Sawada","sequence":"first","affiliation":[{"name":"NTT Corporation, Soraku-gun, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5578-842X","authenticated-orcid":false,"given":"Kazuo","family":"Aoyama","sequence":"additional","affiliation":[{"name":"NTT Corporation, Soraku-gun, Kyoto, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0689-934X","authenticated-orcid":false,"given":"Kohei","family":"Ikeda","sequence":"additional","affiliation":[{"name":"NTT Corporation, Atsugi, Kanagawa, Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1162\/089976698300017746"},{"volume-title":"International Conference on Machine Learning. PMLR, 1120--1128","author":"Arjovsky M.","key":"e_1_3_2_1_2_1","unstructured":"M. Arjovsky, A. Shah, and Y. Bengio. 2016. Unitary evolution recurrent neural networks. In International Conference on Machine Learning. PMLR, 1120--1128."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-022-04714-0"},{"key":"e_1_3_2_1_4_1","first-page":"8982","article-title":"Amortized proximal optimization","volume":"35","author":"Bae J.","year":"2022","unstructured":"J. Bae, P. Vicol, J. Z. HaoChen, and R. B. Grosse. 2022. Amortized proximal optimization. Advances in Neural Information Processing Systems 35 (2022), 8982--8997.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_5_1","unstructured":"S. Bandyopadhyay A. Sludds S. Krastanov R. Hamerly N. Harris D. Bunandar M. Streshinsky M. Hochberg and D. Englund. 2022. Single chip photonic deep neural network with accelerated training. arXiv preprint arXiv:2208.01623 (2022)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/JLT.2022.3193658"},{"volume-title":"Pattern Recognition and Machine Learning","author":"Bishop C. M.","key":"e_1_3_2_1_7_1","unstructured":"C. M. Bishop. 2006. Pattern Recognition and Machine Learning. Springer."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2764-0"},{"volume-title":"Neural Networks: Tricks of the Trade","author":"Bottou L.","key":"e_1_3_2_1_9_1","unstructured":"L. Bottou. 2012. Stochastic gradient descent tricks. In Neural Networks: Tricks of the Trade: Second Edition. Springer, 421--436."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1364\/OPTICA.3.001460"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1364\/OE.27.014009"},{"key":"e_1_3_2_1_12_1","unstructured":"I. Goodfellow Y. Bengio and A. Courville. 2016. Deep learning. MIT press."},{"key":"e_1_3_2_1_13_1","volume-title":"Proc. AAAI Conf. Artificial Intelligence","volume":"35","author":"Gu J.","unstructured":"J. Gu, C. Feng, Z. Zhao, Z. Ying, R. T. Chen, and D. Z. Pan. 2021. Efficient on-chip learning for optical neural networks through power-aware sparse zeroth-order optimization. In Proc. AAAI Conf. Artificial Intelligence, Vol. 35. 7583--7591."},{"volume-title":"Proc. 57th ACM\/IEEE Design Automation Conference (DAC). 1--6.","author":"Gu J.","key":"e_1_3_2_1_14_1","unstructured":"J. Gu, Z. Zhao, C. Feng, W. Li, R. T. Chen, and D. Z. Pan. 2020. FLOPS: Efficient on-chip learning for optical neural networks through stochastic zeroth-order optimization. In Proc. 57th ACM\/IEEE Design Automation Conference (DAC). 1--6."},{"key":"e_1_3_2_1_15_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma D. P.","year":"2014","unstructured":"D. P. Kingma and J. Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_16_1","unstructured":"F. Kunstner P. Hennig and L. Balles. 2019. Limitations of the empirical Fisher approximation for natural gradient descent. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"e_1_3_2_1_18_1","unstructured":"Y. LeCun and C. Cortes. 2010. MNIST handwritten digit database. http:\/\/yann.lecun.com\/exdb\/mnist"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2020.3003837"},{"volume-title":"Proc. AAAI Conf. Artificial Intelligence. 4528--4535","author":"Maduranga K. D. G.","key":"e_1_3_2_1_20_1","unstructured":"K. D. G. Maduranga, K. E. Helfrich, and Q. Ye. 2019. Complex unitary recurrent neural networks using scaled Cayley transform. In Proc. AAAI Conf. Artificial Intelligence. 4528--4535."},{"volume-title":"International Conference on Machine Learning. PMLR, 4264--4273","author":"Maheswaranathan N.","key":"e_1_3_2_1_21_1","unstructured":"N. Maheswaranathan, L. Metz, G. Tucker, D. Choi, and J. Sohl-Dickstein. 2019. Guided evolutionary strategies: Augmenting random search with surrogate gradients. In International Conference on Machine Learning. PMLR, 4264--4273."},{"volume-title":"Proc. ACM Conference on Foundations of Genetic Algorithms XIII. 150--162","author":"Malag\u00f2 L.","key":"e_1_3_2_1_22_1","unstructured":"L. Malag\u00f2 and G. Pistone. 2015. Information geometry of the Gaussian distribution in view of stochastic optimization. In Proc. ACM Conference on Foundations of Genetic Algorithms XIII. 150--162."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455862"},{"volume-title":"International Conference on Machine Learning. PMLR, 2408--2417","author":"Martens J.","key":"e_1_3_2_1_24_1","unstructured":"J. Martens and R. Grosse. 2015. Optimizing neural networks with Kronecker-factored approximate curvature. In International Conference on Machine Learning. PMLR, 2408--2417."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1364\/OE.22.003145"},{"key":"e_1_3_2_1_26_1","unstructured":"R. Pascanu and Y. Bengio. 2013. Revisiting natural gradient for deep networks. arXiv preprint arXiv:1301.3584 (2013)."},{"key":"e_1_3_2_1_27_1","unstructured":"A. Paszke S. Gross F. Massa A. Lerer J. Bradbury G. Chanan T. Killeen Z. Lin N. Gimelshein L. Antiga et al. 2019. PyTorch: An imperative style high-performance deep learning library. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.73.58"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1038\/nphoton.2017.93"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1021\/acsphotonics.1c00419"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","unstructured":"S. K. Vadlamani D. Englund and R. Hamerly. 2023. Transferable learning on analog hardware. Science Advances 9 28 (2023) eadh3436. 10.1126\/sciadv.adh3436","DOI":"10.1126\/sciadv.adh3436"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTQE.2019.2930455"},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. AAAI Conf. Artificial Intelligence","volume":"34","author":"Zhao P.","unstructured":"P. Zhao, P.-Y. Chen, S. Wang, and X. Lin. 2020. Towards query-efficient black-box adversary with zeroth-order natural gradient descent. In Proc. AAAI Conf. Artificial Intelligence, Vol. 34. 6909--6916."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1021\/acsphotonics.9b01673"}],"event":{"name":"DAC '24: 61st ACM\/IEEE Design Automation Conference","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"San Francisco CA USA","acronym":"DAC '24"},"container-title":["Proceedings of the 61st ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655664","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3655664","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:48Z","timestamp":1750295868000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3655664"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,23]]},"references-count":34,"alternative-id":["10.1145\/3649329.3655664","10.1145\/3649329"],"URL":"https:\/\/doi.org\/10.1145\/3649329.3655664","relation":{},"subject":[],"published":{"date-parts":[[2024,6,23]]},"assertion":[{"value":"2024-11-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}