{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,3]],"date-time":"2026-07-03T16:59:32Z","timestamp":1783097972201,"version":"3.54.6"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030676605","type":"print"},{"value":"9783030676612","type":"electronic"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-67661-2_1","type":"book-chapter","created":{"date-parts":[[2021,2,24]],"date-time":"2021-02-24T07:10:26Z","timestamp":1614150626000},"page":"3-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["ADMMiRNN: Training RNN with Stable Convergence via an Efficient ADMM Approach"],"prefix":"10.1007","author":[{"given":"Yu","family":"Tang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhigang","family":"Kan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dequan","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Linbo","family":"Qiao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingjing","family":"Xiao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiquan","family":"Lai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dongsheng","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2021,2,25]]},"reference":[{"issue":"2","key":"1_CR1","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., Frasconi, P., et al.: Learning long-term dependencies with gradient descent is difficult. IEEE Trans. Neural Netw. 5(2), 157\u2013166 (1994)","journal-title":"IEEE Trans. Neural Netw."},{"key":"1_CR2","unstructured":"Boyd, S., Parikh, N., Chu, E., Peleato, B., Eckstein, J., et al.: Distributed optimization and statistical learning via the alternating direction method of multipliers. Found. Trends\u00ae Mach. Learn. 3(1), 1\u2013122 (2011)"},{"key":"1_CR3","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12(Jul), 2121\u20132159 (2011)"},{"issue":"2","key":"1_CR4","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Gabay, D.: Augmented Lagrangian methods: applications to the solution of boundary-value problems, chapter applications of the method of multipliers to variational inequalities, vol. 3, p. 4. North-Holland, Amsterdam (1983)","DOI":"10.1016\/S0168-2024(08)70034-1"},{"issue":"1","key":"1_CR6","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/0898-1221(76)90003-1","volume":"2","author":"D Gabay","year":"1976","unstructured":"Gabay, D., Mercier, B.: A dual algorithm for the solution of nonlinear variational problems via finite element approximation. Comput. Math. Appl. 2(1), 17\u201340 (1976)","journal-title":"Comput. Math. Appl."},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Glowinski, R., Le Tallec, P.: Augmented Lagrangian and operator-splitting methods in nonlinear mechanics, vol. 9. SIAM (1989)","DOI":"10.1137\/1.9781611970838"},{"issue":"1","key":"1_CR8","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1137\/130905010","volume":"35","author":"D Goldfarb","year":"2014","unstructured":"Goldfarb, D., Qin, Z.: Robust low-rank tensor recovery: models and algorithms. SIAM J. Matrix Anal. Appl. 35(1), 225\u2013253 (2014)","journal-title":"SIAM J. Matrix Anal. Appl."},{"key":"1_CR9","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press (2016)"},{"key":"1_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1007\/978-3-540-74690-4_56","volume-title":"Artificial Neural Networks \u2013 ICANN 2007","author":"A Graves","year":"2007","unstructured":"Graves, A., Fern\u00e1ndez, S., Schmidhuber, J.: Multi-dimensional recurrent neural networks. In: de S\u00e1, J.M., Alexandre, L.A., Duch, W., Mandic, D. (eds.) ICANN 2007. LNCS, vol. 4668, pp. 549\u2013558. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-74690-4_56"},{"issue":"8","key":"1_CR11","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1_CR12","unstructured":"Kingma, D., Ba, J.: Adam: a method for stochastic optimization. Comput. Sci. (2014)"},{"key":"1_CR13","doi-asserted-by":"crossref","unstructured":"Lai, S., Xu, L., Liu, K., Zhao, J.: Recurrent convolutional neural networks for text classification. In: AAAI, vol. 333, pp. 2267\u20132273 (2015)","DOI":"10.1609\/aaai.v29i1.9513"},{"issue":"7553","key":"1_CR14","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun, Y., Bengio, Y., Hinton, G.: Deep learning. Nature 521(7553), 436\u2013444 (2015)","journal-title":"Nature"},{"issue":"11","key":"1_CR15","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P., et al.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"1_CR16","unstructured":"Monteiro, R.D., Svaiter, B.F.: Iteration-complexity of block-decomposition algorithms and the alternating minimization augmented Lagrangian method. Manuscript, School of Industrial and Systems Engineering, Georgia Institute of Technology, Atlanta, GA, pp. 30332\u20130205 (2010)"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Nguyen, T.H., Cho, K., Grishman, R.: Joint event extraction via recurrent neural networks. In: Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 300\u2013309 (2016)","DOI":"10.18653\/v1\/N16-1034"},{"key":"1_CR18","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: International Conference on Machine Learning, pp. 1310\u20131318 (2013)"},{"issue":"1","key":"1_CR19","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/S0893-6080(98)00116-6","volume":"12","author":"N Qian","year":"1999","unstructured":"Qian, N.: On the momentum term in gradient descent learning algorithms. Neural Netw. 12(1), 145\u2013151 (1999)","journal-title":"Neural Netw."},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Ann. Math. Stat. 400\u2013407 (1951)","DOI":"10.1214\/aoms\/1177729586"},{"issue":"5","key":"1_CR21","doi-asserted-by":"publisher","first-page":"877","DOI":"10.1137\/0314056","volume":"14","author":"RT Rockafellar","year":"1976","unstructured":"Rockafellar, R.T.: Monotone operators and the proximal point algorithm. SIAM J. Control Optim. 14(5), 877\u2013898 (1976)","journal-title":"SIAM J. Control Optim."},{"issue":"20","key":"1_CR22","doi-asserted-by":"publisher","first-page":"5380","DOI":"10.1109\/TSP.2018.2868269","volume":"66","author":"T Sun","year":"2018","unstructured":"Sun, T., Jiang, H., Cheng, L., Zhu, W.: Iteratively linearized reweighted alternating direction method of multipliers for a class of nonconvex problems. IEEE Trans. Signal Process. 66(20), 5380\u20135391 (2018)","journal-title":"IEEE Trans. Signal Process."},{"key":"1_CR23","unstructured":"Sutskever, I., Martens, J., Dahl, G., Hinton, G.: On the importance of initialization and momentum in deep learning. In: International Conference Machine Learning, pp. 1139\u20131147 (2013)"},{"key":"1_CR24","unstructured":"Taylor, G., Burmeister, R., Xu, Z., Singh, B., Patel, A., Goldstein, T.: Training neural networks without gradients: a scalable ADMM approach. In: International Conference on Machine Learning, pp. 2722\u20132731 (2016)"},{"key":"1_CR25","unstructured":"Tieleman, T., Hinton, G.: Lecture 6.5-rmsprop, coursera: neural networks for machine learning. University of Toronto, Technical Report (2012)"},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Wang, J., Yu, F., Chen, X., Zhao, L.: ADMM for efficient deep learning with global convergence. In: Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 111\u2013119 (2019)","DOI":"10.1145\/3292500.3330936"},{"key":"1_CR27","unstructured":"Wang, J., Zhao, L., Wu, L.: Multi-convex inequality-constrained alternating direction method of multipliers. arXiv preprint arXiv:1902.10882 (2019)"},{"key":"1_CR28","doi-asserted-by":"crossref","unstructured":"Zou, F., Shen, L., Jie, Z., Zhang, W., Liu, W.: A sufficient condition for convergences of ADAM and RMSPROP. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11127\u201311135 (2019)","DOI":"10.1109\/CVPR.2019.01138"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-67661-2_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,23]],"date-time":"2025-02-23T23:04:18Z","timestamp":1740351858000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-67661-2_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030676605","9783030676612"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-67661-2_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"25 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ghent","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Belgium","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd2020.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"945","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"195","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4,5","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4,4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference took place virtually due to the COVID-19 pandemic","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}