{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,26]],"date-time":"2025-04-26T04:08:06Z","timestamp":1745640486241,"version":"3.40.4"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031873263","type":"print"},{"value":"9783031873270","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-87327-0_20","type":"book-chapter","created":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T16:07:59Z","timestamp":1745597279000},"page":"422-439","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dynamic Multi-head Attention"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9503-9084","authenticated-orcid":false,"given":"Fernando Fradique","family":"Duarte","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0513-158X","authenticated-orcid":false,"given":"Nuno","family":"Lau","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7099-1247","authenticated-orcid":false,"given":"Artur","family":"Pereira","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4709-1718","authenticated-orcid":false,"given":"Lu\u00eds Paulo","family":"Reis","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,26]]},"reference":[{"key":"20_CR1","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: 3rd International Conference on Learning Representations, ICLR 2015. San Diego, CA, USA (2015)"},{"issue":"7","key":"20_CR2","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1145\/3448250","volume":"64","author":"Y Bengio","year":"2021","unstructured":"Bengio, Y., LeCun, Y., Hinton, G.E.: Deep learning for AI. Commun. ACM 64(7), 58\u201365 (2021)","journal-title":"Commun. ACM"},{"key":"20_CR3","unstructured":"Sermanet, P., Chintala, S., LeCun, Y.: Convolutional neural networks applied to house numbers digit classification. In: Proceedings of the 21st International Conference on Pattern Recognition, ICPR 2012, pp. 3288\u20133291. IEEE, Tsukuba, Japan (2012)"},{"key":"20_CR4","unstructured":"Srivastava, N., Mansimov, E., Salakhutdinov, R.: Unsupervised learning of video representations using LSTMs. In: Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, pp. 843\u2013852. JMLR.org, Lille, France (2015)"},{"key":"20_CR5","unstructured":"Xu, K., et al.: Show, attend and tell: neural image caption generation with visual attention. In: Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, pp. 2048\u20132057. JMLR.org, Lille, France (2015)"},{"key":"20_CR6","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Garnett (ed.) Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, pp. 5998\u20136008. Long Beach, CA, USA (2017)"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A., Hinton, G.E.: Speech recognition with deep recurrent neural networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2013, pp. 6645\u20136649. IEEE, Vancouver, BC, Canada (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"20_CR8","unstructured":"Humphrey, E.J., Bello, J.P., LeCun, Y.: Moving beyond feature design: deep architectures and automatic feature learning in music informatics. In: Proceedings of the 13th International Society for Music Information Retrieval Conference, ISMIR 2012, pp. 403\u2013408. FEUP Edi\u00e7\u00f5es, Porto, Portugal (2012)"},{"issue":"7540","key":"20_CR9","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","journal-title":"Nature"},{"key":"20_CR10","unstructured":"Zambaldi, V.F., et al.: Deep reinforcement learning with relational inductive biases. In: 7th International Conference on Learning Representations, ICLR 2019. New Orleans, LA, USA (2019)"},{"key":"20_CR11","unstructured":"Mott, A., Zoran, D., Chrzanowski, M., Wierstra, D., Rezende, D.J.: Towards interpretable reinforcement learning using attention augmented agents. In: Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, pp. 12329\u201312338. Vancouver, BC, Canada (2019)"},{"key":"20_CR12","unstructured":"Ha, D., Schmidhuber, J.: World models. arxiv.org\/abs\/1803.10122, Preprint (2018)"},{"key":"20_CR13","unstructured":"Sorokin, I., Seleznev, A., Pavlov, M., Fedorov, A., Ignateva, A.: Deep attention recurrent q-network. arxiv.org\/abs\/1512.01693, Preprint (2015)"},{"issue":"7587","key":"20_CR14","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of Go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Bengio, Y.: Practical recommendations for gradient-based training of deep architectures. CoRR (2012)","DOI":"10.1007\/978-3-642-35289-8_26"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Duarte, F.F., Lau, N., Pereira, A., Reis, L.P.: Dynamically choosing the number of heads in multi-head attention. In: Proceedings of the 16th International Conference on Agents and Artificial Intelligence, ICAART 2024, pp. 358\u2013367. SCITEPRESS, Rome, Italy (2024)","DOI":"10.5220\/0012384500003636"},{"issue":"8","key":"20_CR17","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"20_CR18","unstructured":"Shi, X., Chen, Z., Wang, H., Yeung, D.-Y., Wong, W.-K., Woo, W.: Convolutional LSTM network: a machine learning approach for precipitation nowcasting. In: Advances in Neural Information Processing Systems 28: Annual Conference on Neural Information Processing Systems 2015, pp. 802\u2013810. Montreal, Quebec, Canada (2015)"},{"key":"20_CR19","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Proceedings of the 32nd International Conference on Machine Learning, ICML 2015, pp. 448\u2013456. JMLR, Lille, France (2015)"},{"key":"20_CR20","unstructured":"Ba, L.J., Kiros, J.R., Hinton, G.E.: Layer normalization. arxiv.org\/abs\/1607.06450, Preprint (2016)"},{"key":"20_CR21","first-page":"523","volume":"61","author":"MC Machado","year":"2018","unstructured":"Machado, M.C., Bellemare, M.G., Talvitie, E., Veness, J., Hausknecht, M.J., Bowling, M.: Revisiting the arcade learning environment: evaluation protocols and open problems for general agents. Artif. Intell. 61, 523\u2013562 (2018)","journal-title":"Artif. Intell."},{"key":"20_CR22","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33nd International Conference on Machine Learning, ICML 2016, pp. 1928\u20131937. JMLR, New York City, NY, USA (2016). Author, F.: Article title. Journal 2(5), 99\u2013110 (2016)"},{"key":"20_CR23","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA (2015)"},{"key":"20_CR24","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M.I., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. In: 4th International Conference on Learning Representations, ICLR 2016, San Juan, Puerto Rico (2016)"},{"key":"20_CR25","unstructured":"Brockman, G., et al.: OpenAI gym. CoRR (2016)"}],"container-title":["Lecture Notes in Computer Science","Agents and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-87327-0_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T16:08:16Z","timestamp":1745597296000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-87327-0_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031873263","9783031873270"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-87327-0_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"26 April 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICAART","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Agents and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 February 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 February 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icaart2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icaart.scitevents.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}