{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T06:24:10Z","timestamp":1743661450716,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030322328"},{"type":"electronic","value":"9783030322335"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-32233-5_25","type":"book-chapter","created":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T22:04:51Z","timestamp":1569967491000},"page":"314-326","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Improving Multi-head Attention with Capsule Networks"],"prefix":"10.1007","author":[{"given":"Shuhao","family":"Gu","sequence":"first","affiliation":[]},{"given":"Yang","family":"Feng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,9,30]]},"reference":[{"key":"25_CR1","unstructured":"Ahmed, K., Keskar, N.S., Socher, R.: Weighted transformer network for machine translation. arXiv preprint arXiv:1711.02132 (2017)"},{"key":"25_CR2","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"25_CR3","doi-asserted-by":"crossref","unstructured":"Chen, K., Wang, R., Utiyama, M., Sumita, E., Zhao, T.: Syntax-directed attention for neural machine translation. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11910"},{"key":"25_CR4","doi-asserted-by":"crossref","unstructured":"Cho, K., Van Merri\u00ebnboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., Bengio, Y.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078 (2014)","DOI":"10.3115\/v1\/D14-1179"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Collins, M., Koehn, P., Ku\u010derov\u00e1, I.: Clause restructuring for statistical machine translation. In: Proceedings of the 43rd Annual Meeting on Association for Computational Linguistics, pp. 531\u2013540. Association for Computational Linguistics (2005)","DOI":"10.3115\/1219840.1219906"},{"key":"25_CR6","unstructured":"Edunov, S., Ott, M., Gross, S.: (2017). https:\/\/github.com\/pytorch\/fairseq"},{"key":"25_CR7","unstructured":"Gehring, J., Auli, M., Grangier, D., Yarats, D., Dauphin, Y.N.: Convolutional sequence to sequence learning. arXiv preprint arXiv:1705.03122 (2017)"},{"key":"25_CR8","unstructured":"Gong, J., Qiu, X., Wang, S., Huang, X.: Information aggregation via dynamic routing for sequence encoding. arXiv preprint arXiv:1806.01501 (2018)"},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"25_CR10","unstructured":"Hinton, G.E., Sabour, S., Frosst, N.: Matrix capsules with EM routing (2018)"},{"key":"25_CR11","unstructured":"Kalchbrenner, N., Blunsom, P.: Recurrent continuous translation models. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1700\u20131709 (2013)"},{"key":"25_CR12","doi-asserted-by":"crossref","unstructured":"Li, J., Tu, Z., Yang, B., Lyu, M.R., Zhang, T.: Multi-head attention with disagreement regularization. arXiv preprint arXiv:1810.10183 (2018)","DOI":"10.18653\/v1\/D18-1317"},{"key":"25_CR13","doi-asserted-by":"crossref","unstructured":"Luong, M.-T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation. arXiv preprint arXiv:1508.04025 (2015)","DOI":"10.18653\/v1\/D15-1166"},{"key":"25_CR14","unstructured":"Meng, F., Lu, Z., Li, H., Liu, Q.: Interactive attention for neural machine translation. arXiv preprint arXiv:1610.05011 (2016)"},{"key":"25_CR15","unstructured":"Meng, F., Zhang, J.: DTMT: a novel deep transition architecture for neural machine translation. arXiv preprint arXiv:1812.07807 (2018)"},{"key":"25_CR16","doi-asserted-by":"crossref","unstructured":"Mi, H., Wang, Z., Ittycheriah, A.: Supervised attentions for neural machine translation. arXiv preprint arXiv:1608.00112 (2016)","DOI":"10.18653\/v1\/D16-1249"},{"key":"25_CR17","doi-asserted-by":"crossref","unstructured":"Ott, M., Edunov, S., Grangier, D., Auli, M.: Scaling neural machine translation. arXiv preprint arXiv:1806.00187 (2018)","DOI":"10.18653\/v1\/W18-6301"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, pp. 311\u2013318. Association for Computational Linguistics (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"25_CR19","unstructured":"Sabour, S., Frosst, N., Hinton, G.E.: Dynamic routing between capsules. In: Advances in Neural Information Processing Systems, pp. 3856\u20133866 (2017)"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Sennrich, R., Haddow, B., Birch, A.: Neural machine translation of rare words with subword units. arXiv preprint arXiv:1508.07909 (2015)","DOI":"10.18653\/v1\/P16-1162"},{"key":"25_CR21","doi-asserted-by":"crossref","unstructured":"Shaw, P., Uszkoreit, J., Vaswani, A.: Self-attention with relative position representations. arXiv preprint arXiv:1803.02155 (2018)","DOI":"10.18653\/v1\/N18-2074"},{"key":"25_CR22","doi-asserted-by":"crossref","unstructured":"Shen, T., Zhou, T., Long, G., Jiang, J., Pan, S., Zhang, C.: DiSAN: directional self-attention network for RNN\/CNN-free language understanding. In: Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.11941"},{"key":"25_CR23","unstructured":"Shen, T., Zhou, T., Long, G., Jiang, J., Zhang, C.: Bi-directional block self-attention for fast and memory-efficient sequence modeling. arXiv preprint arXiv:1804.00857 (2018)"},{"key":"25_CR24","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Advances in Neural Information Processing Systems, pp. 3104\u20133112 (2014)"},{"key":"25_CR25","doi-asserted-by":"crossref","unstructured":"Tu, Z., Lu, Z., Liu, Y., Liu, X., Li, H.: Modeling coverage for neural machine translation. arXiv preprint arXiv:1601.04811 (2016)","DOI":"10.18653\/v1\/P16-1008"},{"key":"25_CR26","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"25_CR27","unstructured":"Wang, M., Xie, J., Tan, Z., Su, J., Xiong, D., Bian, C.: Neural machine translation with decoding history enhanced attention. In: Proceedings of the 27th International Conference on Computational Linguistics, pp. 1464\u20131473 (2018)"},{"key":"25_CR28","doi-asserted-by":"crossref","unstructured":"Wang, M., Xie, J., Tan, Z., Su, J., et al.: Towards linear time neural machine translation with capsule networks. arXiv preprint arXiv:1811.00287 (2018)","DOI":"10.18653\/v1\/D19-1074"},{"key":"25_CR29","doi-asserted-by":"crossref","unstructured":"Yang, B., Tu, Z., Wong, D.F., Meng, F., Chao, L.S., Zhang, T.: Modeling localness for self-attention networks. arXiv preprint arXiv:1810.10182 (2018)","DOI":"10.18653\/v1\/D18-1475"},{"key":"25_CR30","doi-asserted-by":"crossref","unstructured":"Yang, M., Zhao, W., Ye, J., Lei, Z., Zhao, Z., Zhang, S.: Investigating capsule networks with dynamic routing for text classification. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 3110\u20133119 (2018)","DOI":"10.18653\/v1\/D18-1350"},{"key":"25_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, N., Deng, S., Sun, Z., Chen, X., Zhang, W., Chen, H.: Attention-based capsule networks with dynamic routing for relation extraction. arXiv preprint arXiv:1812.11321 (2018)","DOI":"10.18653\/v1\/D18-1120"},{"key":"25_CR32","unstructured":"Zhang, Z., Wu, S., Liu, S., Li, M., Zhou, M., Chen, E.: Regularizing neural machine translation by target-bidirectional agreement. arXiv preprint arXiv:1808.04064 (2018)"},{"key":"25_CR33","doi-asserted-by":"crossref","unstructured":"Voita, E., Talbot, D., Moiseev, F., Sennrich, R., Titov, I.: Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy Lifting, the Rest Can Be Pruned arXiv preprint arXiv:1905.09418 (2019)","DOI":"10.18653\/v1\/P19-1580"},{"key":"25_CR34","doi-asserted-by":"crossref","unstructured":"Domhan, T.: How much attention do you need? A granular analysis of neural machine translation architectures. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1799\u20131808 (2018)","DOI":"10.18653\/v1\/P18-1167"},{"key":"25_CR35","doi-asserted-by":"crossref","unstructured":"Li, J., Yang, B., Dou, Z.-Y., Wang, X., Lyu, M.R., Tu, Z.: Information aggregation for multi-head attention with routing-by-agreement. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pp. 3566\u20133575 (2019)","DOI":"10.18653\/v1\/N19-1359"}],"container-title":["Lecture Notes in Computer Science","Natural Language Processing and Chinese Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-32233-5_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T18:06:48Z","timestamp":1710353208000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-32233-5_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030322328","9783030322335"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-32233-5_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"30 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"NLPCC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"CCF International Conference on Natural Language Processing and Chinese Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Dunhuang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 October 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 October 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"nlpcc2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/tcci.ccf.org.cn\/conference\/2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"softconf","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"492","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"56","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"17% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}