{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T12:53:15Z","timestamp":1780318395331,"version":"3.54.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T00:00:00Z","timestamp":1736208000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T00:00:00Z","timestamp":1736208000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s00530-024-01614-3","type":"journal-article","created":{"date-parts":[[2025,1,7]],"date-time":"2025-01-07T02:36:02Z","timestamp":1736217362000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["UAPT: an underwater acoustic target recognition method based on pre-trained Transformer"],"prefix":"10.1007","volume":"31","author":[{"given":"Jun","family":"Tang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Enxue","family":"Ma","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yang","family":"Qu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenbo","family":"Gao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuchen","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lin","family":"Gan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,1,7]]},"reference":[{"key":"1614_CR1","doi-asserted-by":"crossref","unstructured":"Kamal S, Mohammed S K, Pillai P R S, et al. Deep learning architectures for underwater target recognition[C]\/\/2013 Ocean Electronics (SYMPOL). IEEE, 2013: 48\u201354.","DOI":"10.1109\/SYMPOL.2013.6701911"},{"key":"1614_CR2","doi-asserted-by":"crossref","unstructured":"Ferguson E L, Ramakrishnan R, Williams S B, et al. Convolutional neural networks for passive monitoring of a shallow water environment using a single sensor[C]\/\/2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2017: 2657\u20132661.","DOI":"10.1109\/ICASSP.2017.7952638"},{"key":"1614_CR3","doi-asserted-by":"crossref","unstructured":"Valdenegro-Toro M. Improving sonar image patch matching via deep learning[C]\/\/2017 European Conference on Mobile Robots (ECMR). IEEE, 2017: 1\u20136.","DOI":"10.1109\/ECMR.2017.8098701"},{"key":"1614_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/LGRS.2020.3029584","volume":"19","author":"VS Doan","year":"2020","unstructured":"Doan, V.S., Huynh-The, T., Kim, D.S.: Underwater acoustic target classification based on dense convolutional neural network. IEEE Geosci. Remote Sens. Lett. 19, 1\u20135 (2020)","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"1614_CR5","doi-asserted-by":"crossref","unstructured":"Yue H, Zhang L, Wang D, et al. The classification of underwater acoustic targets based on deep learning methods[C]\/\/2017 2nd International Conference on Control, Automation and Artificial Intelligence (CAAI 2017). Atlantis Press, 2017: 526\u2013529.","DOI":"10.2991\/caai-17.2017.118"},{"issue":"5","key":"1614_CR6","doi-asserted-by":"publisher","first-page":"1104","DOI":"10.3390\/s19051104","volume":"19","author":"H Yang","year":"2019","unstructured":"Yang, H., Li, J., Shen, S., et al.: A deep convolutional neural network inspired by auditory perception for underwater acoustic target recognition. Sensors 19(5), 1104 (2019)","journal-title":"Sensors"},{"key":"1614_CR7","first-page":"1","volume":"19","author":"S Feng","year":"2022","unstructured":"Feng, S., Zhu, X.: A Transformer-Based Deep Learning Network for Underwater Acoustic Target Recognition. IEEE Geosci. Remote Sens. Lett. 19, 1\u20135 (2022)","journal-title":"IEEE Geosci. Remote Sens. Lett."},{"key":"1614_CR8","unstructured":"Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[J]. Advances in neural information processing systems, 2017, 30."},{"key":"1614_CR9","unstructured":"Hochreiter S, Bengio Y, Frasconi P, et al. Gradient flow in recurrent nets: the difficulty of learning long-term dependencies[J]. 2001."},{"key":"1614_CR10","unstructured":"Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate[J]. arXiv preprint arXiv:1409.0473, 2014."},{"key":"1614_CR11","unstructured":"Sutskever I, Vinyals O, Le Q V. Sequence to sequence learning with neural networks[J]. Advances in neural information processing systems, 2014, 27."},{"issue":"10","key":"1614_CR12","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"X Qiu","year":"2020","unstructured":"Qiu, X., Sun, T., Xu, Y., et al.: Pre-trained models for natural language processing: A survey. Sci. China Technol. Sci. 63(10), 1872\u20131897 (2020)","journal-title":"Sci. China Technol. Sci."},{"key":"1614_CR13","unstructured":"Parmar N, Vaswani A, Uszkoreit J, et al. Image transformer[C]\/\/International conference on machine learning. PMLR, 2018: 4055\u20134064."},{"key":"1614_CR14","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, et al. End-to-end object detection with transformers[C]\/\/European conference on computer vision. Cham: Springer International Publishing, 2020: 213\u2013229.","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1614_CR15","doi-asserted-by":"crossref","unstructured":"Dong L, Xu S, Xu B. Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition[C]\/\/2018 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE, 2018: 5884\u20135888.","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"1614_CR16","doi-asserted-by":"crossref","unstructured":"Gulati A, Qin J, Chiu C C, et al. Conformer: Convolution-augmented transformer for speech recognition[J]. ar**v preprint ar**v:2005.08100, 2020.","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"1614_CR17","doi-asserted-by":"crossref","unstructured":"Chen X, Wu Y, Wang Z, et al. Develo** real-time streaming transformer transducer for speech recognition on large-scale dataset[C]\/\/ICASSP 2021\u20132021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2021: 5904\u20135908.","DOI":"10.1109\/ICASSP39728.2021.9413535"},{"issue":"9","key":"1614_CR18","doi-asserted-by":"publisher","first-page":"1572","DOI":"10.1021\/acscentsci.9b00576","volume":"5","author":"P Schwaller","year":"2019","unstructured":"Schwaller, P., Laino, T., Gaudin, T., et al.: Molecular transformer: a model for uncertainty-calibrated chemical reaction prediction. ACS Cent. Sci. 5(9), 1572\u20131583 (2019)","journal-title":"ACS Cent. Sci."},{"issue":"15","key":"1614_CR19","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2016239118","volume":"118","author":"A Rives","year":"2021","unstructured":"Rives, A., Meier, J., Sercu, T., et al.: Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences. Proc. Natl. Acad. Sci. 118(15), e2016239118 (2021)","journal-title":"Proc. Natl. Acad. Sci."},{"issue":"10","key":"1614_CR20","doi-asserted-by":"publisher","first-page":"1428","DOI":"10.3390\/jmse10101428","volume":"10","author":"P Li","year":"2022","unstructured":"Li, P., Wu, J., Wang, Y., et al.: STM: spectrogram transformer model for underwater acoustic target recognition. J Mar Sci Eng 10(10), 1428 (2022)","journal-title":"J Mar Sci Eng"},{"key":"1614_CR21","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et al. An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929, 2020."},{"key":"1614_CR22","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, et al. Swin transformer: Hierarchical vision transformer using shifted windows[C]\/\/Proceedings of the IEEE\/CVF international conference on computer vision. 2021: 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1614_CR23","doi-asserted-by":"crossref","unstructured":"Zhu L, Wang X, Ke Z, et al. BiFormer: Vision Transformer with Bi-Level Routing Attention[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023: 10323\u201310333.","DOI":"10.1109\/CVPR52729.2023.00995"},{"key":"1614_CR24","unstructured":"Child R, Gray S, Radford A, et al. Generating long sequences with sparse transformers. arXiv preprint arXiv:1904.10509, 2019."},{"issue":"1","key":"1614_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s40537-016-0043-6","volume":"3","author":"K Weiss","year":"2016","unstructured":"Weiss, K., Khoshgoftaar, T.M., Wang, D.D.: A survey of transfer learning. J Big data 3(1), 1\u201340 (2016)","journal-title":"J Big data"},{"issue":"1","key":"1614_CR26","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1109\/JPROC.2020.3004555","volume":"109","author":"F Zhuang","year":"2020","unstructured":"Zhuang, F., Qi, Z., Duan, K., et al.: A comprehensive survey on transfer learning. Proc. IEEE 109(1), 43\u201376 (2020)","journal-title":"Proc. IEEE"},{"issue":"2","key":"1614_CR27","doi-asserted-by":"publisher","first-page":"384","DOI":"10.3390\/jmse11020384","volume":"11","author":"X Luo","year":"2023","unstructured":"Luo, X., Chen, L., Zhou, H., et al.: A survey of underwater acoustic target recognition methods based on machine learning. J Mar Sci Eng 11(2), 384 (2023)","journal-title":"J Mar Sci Eng"},{"key":"1614_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.107989","volume":"178","author":"F Liu","year":"2021","unstructured":"Liu, F., Shen, T., Luo, Z., et al.: Underwater target recognition using convolutional recurrent neural networks with 3-D Mel-spectrogram and data augmentation. Appl. Acoust. 178, 107989 (2021)","journal-title":"Appl. Acoust."},{"key":"1614_CR29","doi-asserted-by":"crossref","unstructured":"Park D S, Chan W, Zhang Y, et al. Specaugment: A simple data augmentation method for automatic speech recognition[J]. arXiv preprint arXiv:1904.08779, 2019.","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"1614_CR30","volume":"30","author":"S Kamal","year":"2022","unstructured":"Kamal, S., Mujeeb, A., Supriya, M.H.: Generative adversarial learning for improved data efficiency in underwater target classification[J]. Eng Sci Technol Int J 30, 101043 (2022)","journal-title":"Eng Sci Technol Int J"},{"issue":"11","key":"1614_CR31","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"1614_CR32","doi-asserted-by":"publisher","first-page":"63841","DOI":"10.1109\/ACCESS.2021.3075344","volume":"9","author":"X Luo","year":"2021","unstructured":"Luo, X., Feng, Y., Zhang, M.: An underwater acoustic target recognition method based on combined feature with automatic coding and reconstruction. Ieee Access 9, 63841\u201363854 (2021)","journal-title":"Ieee Access"},{"key":"1614_CR33","unstructured":"Kingma D P, Welling M. Auto-encoding variational bayes[J]. arXiv preprint arXiv:1312.6114, 2013."},{"key":"1614_CR34","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1613\/jair.731","volume":"12","author":"J Baxter","year":"2000","unstructured":"Baxter, J.: A model of inductive bias learning. J Artificial Intell Res 12, 149\u2013198 (2000)","journal-title":"J Artificial Intell Res"},{"key":"1614_CR35","doi-asserted-by":"crossref","unstructured":"Guzhov A, Raue F, Hees J, et al. Esresnet: Environmental sound classification based on visual domain models[C]\/\/2020 25th International Conference on Pattern Recognition (ICPR). IEEE, 2021: 4933\u20134940.","DOI":"10.1109\/ICPR48806.2021.9413035"},{"key":"1614_CR36","doi-asserted-by":"publisher","first-page":"321","DOI":"10.2478\/eletel-2014-0042","volume":"60","author":"G Gwardys","year":"2014","unstructured":"Gwardys, G., Grzywczak, D.: Deep image features in music information retrieval. Int J Electron Telecommun 60, 321\u2013326 (2014)","journal-title":"Int J Electron Telecommun"},{"key":"1614_CR37","unstructured":"Palanisamy K, Singhania D, Yao A. Rethinking CNN models for audio classification[J]. arXiv preprint arXiv:2007.11154, 2020."},{"key":"1614_CR38","doi-asserted-by":"crossref","unstructured":"Chollet F. Xception: Deep learning with depthwise separable convolutions[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1251\u20131258.","DOI":"10.1109\/CVPR.2017.195"},{"key":"1614_CR39","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., et al.: Imagenet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"1614_CR40","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1016\/j.apacoust.2016.06.008","volume":"113","author":"D Santos-Dom\u00ednguez","year":"2016","unstructured":"Santos-Dom\u00ednguez, D., Torres-Guijarro, S., Cardenal-L\u00f3pez, A., et al.: ShipsEar: An underwater vessel noise database. Appl. Acoust. 113, 64\u201369 (2016)","journal-title":"Appl. Acoust."},{"key":"1614_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.115270","volume":"183","author":"M Irfan","year":"2021","unstructured":"Irfan, M., Jiangbin, Z., Ali, S., et al.: DeepShip: An underwater acoustic benchmark dataset and a separable convolution based autoencoder for classification. Expert Syst. Appl. 183, 115270 (2021)","journal-title":"Expert Syst. Appl."},{"key":"1614_CR42","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, et al. Deep residual learning for image recognition[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"1614_CR43","doi-asserted-by":"crossref","unstructured":"He T, Zhang Z, Zhang H, et al. Bag of tricks for image classification with convolutional neural networks[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2019: 558\u2013567.","DOI":"10.1109\/CVPR.2019.00065"},{"key":"1614_CR44","first-page":"450","volume":"13206","author":"B Bryan","year":"2024","unstructured":"Bryan, B., Hasan, M.J., Kannan, S., et al.: Adaptive path-planning for AUVs in dynamic underwater environments using sonar data. Artificial Intelligence for Security and Defence Applications II. SPIE 13206, 450\u2013461 (2024)","journal-title":"SPIE"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01614-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01614-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01614-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,28]],"date-time":"2025-02-28T06:03:21Z","timestamp":1740722601000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01614-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,7]]},"references-count":44,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1614"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01614-3","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-4253542\/v1","asserted-by":"object"}]},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,7]]},"assertion":[{"value":"11 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 December 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"50"}}