{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T11:24:43Z","timestamp":1748604283841,"version":"3.37.3"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T00:00:00Z","timestamp":1713744000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076045"],"award-info":[{"award-number":["62076045"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"High-Level Talent Innovation Support Program","award":["2021RQ066"],"award-info":[{"award-number":["2021RQ066"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s13042-024-02156-w","type":"journal-article","created":{"date-parts":[[2024,4,22]],"date-time":"2024-04-22T09:02:25Z","timestamp":1713776545000},"page":"4435-4444","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Translation model based on discrete Fourier transform and Skipping Sub-Layer methods"],"prefix":"10.1007","volume":"15","author":[{"given":"Yuchen","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuxu","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhuoya","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chao","family":"Che","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaoqian","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,4,22]]},"reference":[{"key":"2156_CR1","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141 and Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"2156_CR2","doi-asserted-by":"crossref","unstructured":"Bapna A, Chen MX, Firat O, Cao Y, and Wu Y (2018) Training deeper neural machine translation models with transparent attention. In: Empirical Methods in Natural Language Processing, pages 3028\u20133033","DOI":"10.18653\/v1\/D18-1338"},{"key":"2156_CR3","doi-asserted-by":"crossref","unstructured":"Wu L, Wang Y, Xia Y, Tian F, Gao F, Qin T, Lai J and Liu T-Y (2019) Depth growing for neural machine translation. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (ACL), pages 5558\u20135563","DOI":"10.18653\/v1\/P19-1558"},{"key":"2156_CR4","doi-asserted-by":"crossref","unstructured":"Wu L, Wang Y, Xia Y, Tian F, Gao F, Qin T, Lai J and Liu T-Y (2019) Improving deep transformer with depth-scaled initialization and merged attention. In: Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, pages 898\u2013909, Hong Kong, China","DOI":"10.18653\/v1\/D19-1083"},{"key":"2156_CR5","unstructured":"Xu H, Liu Q, van Genabith J, Xiong D and Zhang J (2020) Lipschitz-constrained regularization of self-attention mechanism for machine translation. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 1348\u20131359"},{"key":"2156_CR6","unstructured":"Huang XS, Perez F, Ba J and Volkovs M (2020) A reinforcement learning approach. In Advances in Neural Information Processing Systems, Improving knowledge distillation with teacher assistant"},{"key":"2156_CR7","doi-asserted-by":"crossref","unstructured":"Clark K, Khandelwal U, Levy O and Manning CD (2019) What does BERT look at? An analysis of bert\u2019s attention. In: Linzen T, Chrupala G, Belinkov Y and Hupkes D (eds) Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP, BlackboxNLP@ACL 2019, Florence, Italy, August 1, 2019, pages 276\u2013286","DOI":"10.18653\/v1\/W19-4828"},{"key":"2156_CR8","first-page":"4593","volume":"2019","author":"EPI Tenney","year":"2019","unstructured":"Tenney EPI, Das D (2019) Bert rediscovers the classical NLP pipeline. Assoc Comput Linguist 2019:4593\u20134601","journal-title":"Assoc Comput Linguist"},{"key":"2156_CR9","unstructured":"Wu F, Fan A, Baevski A, Dauphin YN and Auli M (2019) Pay less attention with lightweight and dynamic convolutions. In: 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6\u20139, 2019"},{"key":"2156_CR10","unstructured":"So D, Le Q and Liang C (2019) The evolved transformer. In: International Conference on Machine Learning, pages 5877\u20135886. PMLR"},{"key":"2156_CR11","first-page":"224","volume":"33","author":"F Meng","year":"2019","unstructured":"Meng F, Zhang J (2019) Dtmt: a novel deep transition architecture for neural machine translation. Proc AAAI Conf Artif Intell 33:224\u2013231","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2156_CR12","doi-asserted-by":"crossref","unstructured":"Chen K, Wang R, Utiyama M and Sumita E (2019) Neural machine translation with reordering embeddings. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 1787\u20131799","DOI":"10.18653\/v1\/P19-1174"},{"key":"2156_CR13","unstructured":"Liao B, Khadivi S and Hewavitharana S (2021) Back-translation for large-scale multilingual machine translation. arXiv preprint arXiv:2109.08712"},{"key":"2156_CR14","doi-asserted-by":"crossref","unstructured":"Abdulmumin I, Galadanci BS, Ahmad IS and Abdullahi RI (2021) Data selection as an alternative to quality estimation in self-learning for low resource neural machine translation. In: International Conference on Computational Science and Its Applications, pages 311\u2013326. Springer","DOI":"10.1007\/978-3-030-87013-3_24"},{"key":"2156_CR15","doi-asserted-by":"crossref","unstructured":"Shi Y, Wang Y, Wu C, Yeh C-F, Chan J, Zhang F, Le D and Seltzer M (2021) Emformer: efficient memory transformer based acoustic model for low latency streaming speech recognition. In: ICASSP 2021\u20142021 IEEE International Conference on Acoustics, Speech and Signal Processing, pages 6783\u20136787. IEEE","DOI":"10.1109\/ICASSP39728.2021.9414560"},{"issue":"04","key":"2156_CR16","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/BF02551274","volume":"2","author":"G Cybenko","year":"1989","unstructured":"Cybenko G (1989) Approximation by superpositions of a sigmoidal function. Math Control Signals Syst 2(04):303\u2013314","journal-title":"Math Control Signals Syst"},{"issue":"03","key":"2156_CR17","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1109\/18.256500","volume":"39","author":"AR Barron","year":"1993","unstructured":"Barron AR (1993) Universal approximation bounds for superpositions of a sigmoidal function. IEEE Trans Inf Theory 39(03):930\u2013945","journal-title":"IEEE Trans Inf Theory"},{"key":"2156_CR18","doi-asserted-by":"crossref","unstructured":"Minami K-I, Nakajima H and Toyoshima T (1999) Real-time discrimination of ventricular tachyarrhythmia with Fourier-transform neural network. IEEE Trans Biomed Eng 179\u2013185","DOI":"10.1109\/10.740880"},{"issue":"04","key":"2156_CR19","doi-asserted-by":"publisher","first-page":"289","DOI":"10.4236\/jbise.2011.44039","volume":"4","author":"H Gothwal","year":"2011","unstructured":"Gothwal H, Kedawat S, Kumar R (2011) Cardiac arrhythmias detection in an ECG beat signal using fast Fourier transform and artificial neural network. J Biomed Sci Eng 4(04):289","journal-title":"J Biomed Sci Eng"},{"key":"2156_CR20","doi-asserted-by":"crossref","unstructured":"B\u00edla J, Mironovova M (2015) Fast Fourier transform for feature extraction and neural network for classification of electrocardiogram signals. In: Future Generation Communication Technology (FGCT 2015), Luton, United Kingdom, pages 1\u20136","DOI":"10.1109\/FGCT.2015.7300244"},{"issue":"06","key":"2156_CR21","doi-asserted-by":"publisher","first-page":"1213","DOI":"10.1007\/s10845-012-0657-2","volume":"24","author":"KWZ Zhang","year":"2013","unstructured":"Zhang KWZ, Wang Y (2013) Fault diagnosis and prognosis using wavelet packet decomposition, Fourier transform and artificial neural network. J Intell Manuf 24(06):1213\u20131227","journal-title":"J Intell Manuf"},{"key":"2156_CR22","unstructured":"Choromanski K, Likhosherstov V, Dohan D, Song X, Davis J, Sarl\u00f3s T, Belanger D, Colwell LJ and Weller A (2020) Masked language modeling for proteins via linearly scalable long-context transformers. Comput Res Repository"},{"key":"2156_CR23","first-page":"5492","volume":"33","author":"ND Goodman","year":"2020","unstructured":"Goodman ND, Tamkin A, Jurafsky D (2020) Language through a prism: a spectral approach for multiscale language representations. Adv Neural Inf Process Syst 33:5492\u20135504","journal-title":"Adv Neural Inf Process Syst"},{"key":"2156_CR24","unstructured":"Cohan A, Beltagy I, Peters ME (2020) Longformer: the long-document transformer. Adv Neural Inf Process Syst"},{"key":"2156_CR25","unstructured":"Zaheer M, Guruganesh G, Dubey KA, Ainslie J, Alberti C, Onta\u00f1\u00f3n S, Pham P, Ravula A, Wang Q, Yang L and Ahmed A (2020) Big bird: transformers for longer sequences. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 6516\u20136532"},{"key":"2156_CR26","unstructured":"Tay Y, Dehghani M, Abnar S, Shen Y, Bahri D, Pham P, Rao J, Yang L, Ruder S and Metzler D (2020) Long range arena: a benchmark for efficient transformers. Comput Res Repository"},{"key":"2156_CR27","first-page":"21665","volume":"33","author":"ND Goodman","year":"2020","unstructured":"Goodman ND, Tamkin A, Jurafsky D (2020) Fast transformers with clustered attention. Adv Neural Inf Process Syst 33:21665\u201321674","journal-title":"Adv Neural Inf Process Syst"},{"key":"2156_CR28","unstructured":"Choromanski K, Likhosherstov V, Dohan D, Song X, Gane A, Sarl\u00f3s T, Hawkins P, Davis J, Mohiuddin A, Kaiser L, Belanger D, Colwell LJ and Weller A. Rethinking attention with performers. Comput Res Repository (2020)"},{"key":"2156_CR29","doi-asserted-by":"crossref","unstructured":"Shaw P, Uszkoreit J and Vaswani A (2018) Self-attention with relative position representations. arXiv preprint arXiv:1803.02155","DOI":"10.18653\/v1\/N18-2074"},{"key":"2156_CR30","doi-asserted-by":"crossref","unstructured":"Lee-Thorp J, Ainslie J, Eckstein I and Ontanon S (2021) Fnet: Mixing tokens with Fourier transforms. arXiv preprint arXiv:2105.03824","DOI":"10.18653\/v1\/2022.naacl-main.319"},{"key":"2156_CR31","unstructured":"Schmidhuber J, Greff K, Srivastava RK (2016) Highway and residual networks learn unrolled iterative estimation. Comput Res Repository"},{"key":"2156_CR32","unstructured":"Fan A, Grave E and Joulin A (2020) Reducing transformer depth on demand with structured dropout. In: 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, pages 26\u201330"},{"key":"2156_CR33","first-page":"13217","volume":"35","author":"B Li","year":"2021","unstructured":"Li B, Wang Z, Liu H, Quan D, Xiao T, Zhang C, Zhu J (2021) Learning light-weight translation models from deep transformer. Proc AAAI Conf Artif Intell 35:13217\u201313225","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2156_CR34","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T and Zhu W-J (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting of the Association for Computational Linguistics, pages 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"2156_CR35","doi-asserted-by":"crossref","unstructured":"Shankar V (1991) A gigaflop performance algorithm for solving Maxwell\u2019s equations of electromagnetics. In: Computational Fluid Dynamics Conference, page 1578","DOI":"10.2514\/6.1991-1578"},{"key":"2156_CR36","doi-asserted-by":"crossref","unstructured":"Wei X, Yu H, Hu Y, Zhang Y, Weng R and Luo W (2020) Multiscale collaborative deep models for neural machine translation. In: Annual Meeting of the Association for Computational Linguistics, pages 414\u2013426","DOI":"10.18653\/v1\/2020.acl-main.40"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02156-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02156-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02156-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T16:33:29Z","timestamp":1726245209000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02156-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,22]]},"references-count":36,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["2156"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02156-w","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"type":"print","value":"1868-8071"},{"type":"electronic","value":"1868-808X"}],"subject":[],"published":{"date-parts":[[2024,4,22]]},"assertion":[{"value":"24 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 April 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}