{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T22:35:14Z","timestamp":1769553314846,"version":"3.49.0"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031463372","type":"print"},{"value":"9783031463389","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,11,5]],"date-time":"2023-11-05T00:00:00Z","timestamp":1699142400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,5]],"date-time":"2023-11-05T00:00:00Z","timestamp":1699142400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-46338-9_5","type":"book-chapter","created":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T17:02:05Z","timestamp":1699117325000},"page":"60-74","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Bird Species Recognition in\u00a0Soundscapes with\u00a0Self-supervised Pre-training"],"prefix":"10.1007","author":[{"given":"Hicham","family":"Bellafkir","sequence":"first","affiliation":[]},{"given":"Markus","family":"Vogelbacher","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Schneider","sequence":"additional","affiliation":[]},{"given":"Valeryia","family":"Kizik","sequence":"additional","affiliation":[]},{"given":"Markus","family":"M\u00fchling","sequence":"additional","affiliation":[]},{"given":"Bernd","family":"Freisleben","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,5]]},"reference":[{"key":"5_CR1","doi-asserted-by":"publisher","unstructured":"Cohen, Y., Nicholson, D.A., Sanchioni, A., Mallaber, E.K., Skidanova, V., Gardner, T.J.: Automated annotation of birdsong with a neural network that segments spectrograms. eLife 11, e63853 (2022). https:\/\/doi.org\/10.7554\/eLife.63853","DOI":"10.7554\/eLife.63853"},{"key":"5_CR2","unstructured":"Conde, M.V., Choi, U.: Few-shot long-tailed bird audio recognition. In: Proceedings of the Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy. CEUR Workshop Proceedings, vol. 3180, pp. 2036\u20132046. CEUR-WS.org (2022). http:\/\/ceur-ws.org\/Vol-3180\/paper-161.pdf"},{"key":"5_CR3","doi-asserted-by":"publisher","unstructured":"Dai, W., Dai, C., Qu, S., Li, J., Das, S.: Very deep convolutional neural networks for raw waveforms. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 421\u2013425 (2017). https:\/\/doi.org\/10.1109\/ICASSP.2017.7952190","DOI":"10.1109\/ICASSP.2017.7952190"},{"key":"5_CR4","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: 9th Int. Conference on Learning Representations (ICLR), Austria (2021). https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"5_CR5","doi-asserted-by":"publisher","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 776\u2013780 (2017). https:\/\/doi.org\/10.1109\/ICASSP.2017.7952261","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"5_CR6","doi-asserted-by":"publisher","unstructured":"Gong, Y., Chung, Y., Glass, J.R.: AST: audio spectrogram transformer. In: Interspeech 2021, pp. 571\u2013575 (2021). https:\/\/doi.org\/10.21437\/Interspeech. 2021\u2013698","DOI":"10.21437\/Interspeech"},{"issue":"10","key":"5_CR7","doi-asserted-by":"publisher","first-page":"10699","DOI":"10.1609\/aaai.v36i10.21315","volume":"36","author":"Y Gong","year":"2022","unstructured":"Gong, Y., Lai, C.I., Chung, Y.A., Glass, J.: Ssast: self-supervised audio spectrogram transformer. Proc. AAAI Conf. Artif. Intell. 36(10), 10699\u201310709 (2022). https:\/\/doi.org\/10.1609\/aaai.v36i10.21315","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"5_CR8","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"5_CR9","unstructured":"H\u00e9naff, O.J., et al.: Data-efficient image recognition with contrastive predictive coding. In: III, H.D., Singh, A. (eds.) Proceedings of the 37th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 119, pp. 4182\u20134192. PMLR (13\u201318 Jul 2020). https:\/\/proceedings.mlr.press\/v119\/henaff20a.html"},{"key":"5_CR10","unstructured":"Henkel, C., Pfeiffer, P., Singer, P.: Recognizing bird species in diverse soundscapes under weak supervision. In: Working Notes of CLEF 2021 - Conference and Labs of the Evaluation Forum, Bucharest, Romania. CEUR Workshop Proceedings, vol. 2936, pp. 1579\u20131586. CEUR-WS.org (2021). http:\/\/ceur-ws.org\/Vol-2936\/paper-134.pdf"},{"key":"5_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.ohx.2019.e00073","volume":"6","author":"AP Hill","year":"2019","unstructured":"Hill, A.P., Prince, P., Snaddon, J.L., Doncaster, C.P., Rogers, A.: Audiomoth: a low-cost acoustic device for monitoring biodiversity and the environment. HardwareX 6, e00073 (2019). https:\/\/doi.org\/10.1016\/j.ohx.2019.e00073","journal-title":"HardwareX"},{"key":"5_CR12","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-031-17436-0_6","volume-title":"Networked Systems: 10th International Conference, NETYS 2022, Virtual Event, May 17\u201319, 2022, Proceedings","author":"J H\u00f6chst","year":"2022","unstructured":"H\u00f6chst, J., et al.: Bird@Edge: bird species recognition at\u00a0the\u00a0edge. In: Koulali, M.-A., Mezini, M. (eds.) Networked Systems: 10th International Conference, NETYS 2022, Virtual Event, May 17\u201319, 2022, Proceedings, pp. 69\u201386. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-17436-0_6"},{"key":"5_CR13","unstructured":"iNaturalist: A community for naturalists. https:\/\/www.inaturalist.org\/"},{"key":"5_CR14","unstructured":"Kahl, S., et al.: Overview of BirdCLEF 2020: bird sound recognition in complex acoustic environments. In: Working Notes of CLEF 2020 - Conference and Labs of the Evaluation Forum, Thessaloniki, Greece. CEUR Workshop Proceedings, vol. 2696. CEUR-WS.org (2020). http:\/\/ceur-ws.org\/Vol-2696\/paper_262.pdf"},{"key":"5_CR15","unstructured":"Kahl, S., et al.: Overview of BirdCLEF 2021: bird call identification in soundscape recordings. In: Working Notes of CLEF - Conference and Labs of the Evaluation Forum, Bucharest, Romania. CEUR Workshop Proceedings, vol. 2936, pp. 1437\u20131450. CEUR-WS.org (2021). http:\/\/ceur-ws.org\/Vol-2936\/paper-123.pdf"},{"key":"5_CR16","unstructured":"Kahl, S., et al.: Overview of BirdCLEF 2022: endangered bird species recognition in soundscape recordings. In: Proceedings of the Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy. CEUR Workshop Proceedings, vol. 3180, pp. 1929\u20131939. CEUR-WS.org (2022). http:\/\/ceur-ws.org\/Vol-3180\/paper-154.pdf"},{"key":"5_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.ecoinf.2021.101236","volume":"61","author":"S Kahl","year":"2021","unstructured":"Kahl, S., Wood, C.M., Eibl, M., Klinck, H.: Birdnet: a deep learning solution for avian diversity monitoring. Eco. Inform. 61, 101236 (2021). https:\/\/doi.org\/10.1016\/j.ecoinf.2021.101236","journal-title":"Eco. Inform."},{"key":"5_CR18","doi-asserted-by":"publisher","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: IEEE\/CVF International Conference on Computer Vision (ICCV), Montreal, QC, Canada, pp. 9992\u201310002. IEEE (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"5_CR19","unstructured":"Martynov, E., Uematsu, Y.: Dealing with class imbalance in bird sound classification. In: Proceedings of the Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy. CEUR Workshop Proceedings, vol. 3180, pp. 2151\u20132158. CEUR-WS.org (2022). http:\/\/ceur-ws.org\/Vol-3180\/paper-170.pdf"},{"key":"5_CR20","doi-asserted-by":"publisher","DOI":"10.1016\/j.ecoinf.2022.101952","volume":"74","author":"F Michaud","year":"2023","unstructured":"Michaud, F., Sueur, J., Le Cesne, M., Haupert, S.: Unsupervised classification to improve the quality of a bird song recording dataset. Eco. Inform. 74, 101952 (2023). https:\/\/doi.org\/10.1016\/j.ecoinf.2022.101952","journal-title":"Eco. Inform."},{"key":"5_CR21","unstructured":"Miyaguchi, A., Yu, J., Cheungvivatpant, B., Dudley, D., Swain, A.: Motif mining and unsupervised representation learning for birdclef 2022. In: Proceedings of the Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy. CEUR Workshop Proceedings, vol. 3180, pp. 2159\u20132167. CEUR-WS.org (2022). http:\/\/ceur-ws.org\/Vol-3180\/paper-171.pdf"},{"key":"5_CR22","unstructured":"M\u00fchling, M., Franz, J., Korfhage, N., Freisleben, B.: Bird species recognition via neural architecture search. In: Working Notes of CLEF 2020 - Conference and Labs of the Evaluation Forum, Thessaloniki, Greece, September 22\u201325, 2020. CEUR Workshop Proceedings, vol. 2696. CEUR-WS.org (2020). http:\/\/ceur-ws.org\/Vol-2696\/paper_188.pdf"},{"key":"5_CR23","unstructured":"Murakami, N., Tanaka, H., Nishimori, M.: Birdcall identification using CNN and gradient boosting decision trees with weak and noisy supervision. In: Working Notes of CLEF 2021 - Conference and Labs of the Evaluation Forum, Bucharest, Romania. CEUR Workshop Proceedings, vol. 2936, pp. 1597\u20131608. CEUR-WS.org (2021). http:\/\/ceur-ws.org\/Vol-2936\/paper-136.pdf"},{"key":"5_CR24","unstructured":"Paszke, A.,et al.: PyTorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems 32, pp. 8024\u20138035. Curran Associates, Inc. (2019). http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"5_CR25","doi-asserted-by":"publisher","unstructured":"Prashanth, H., Rao, M., Eledath, D., Ramasubramanian, V.: Trainable windows for sincnet architecture. EURASIP J. Audio Speech Music Process. 2023(1) (2023). https:\/\/doi.org\/10.1186\/s13636-023-00271-0","DOI":"10.1186\/s13636-023-00271-0"},{"key":"5_CR26","unstructured":"Puget, J.F.: STFT transformers for bird song recognition. In: Working Notes of CLEF 2021 - Conference and Labs of the Evaluation Forum, Bucharest, Romania. CEUR Workshop Proceedings, vol. 2936. CEUR-WS.org (2021). http:\/\/ceur-ws.org\/Vol-2936\/paper-137.pdf"},{"key":"5_CR27","unstructured":"Ryan, P., Takafuji, S., Yang, C., Wilson, N., McBride, C.: Using self-supervised learning of birdsong for downstream industrial audio classification. In: ICML Workshop on Self-supervision in Audio and Speech (2020). https:\/\/openreview.net\/forum?id=_P9LyJ5pMDb"},{"key":"5_CR28","unstructured":"Sampathkumar, A., Kowerko, D.: TUC media computing at BirdCLEF 2022: Strategies in identifying bird sounds in a complex acoustic environments. In: Proceedings of the Working Notes of CLEF 2022 - Conference and Labs of the Evaluation Forum, Bologna, Italy. CEUR Workshop Proceedings, vol. 3180, pp. 2189\u20132198. CEUR-WS.org (2022). http:\/\/ceur-ws.org\/Vol-3180\/paper-174.pdf"},{"key":"5_CR29","doi-asserted-by":"publisher","unstructured":"Sharma, G., Umapathy, K., Krishnan, S.: Trends in audio signal feature extraction methods. Appl. Acoust. 158, 107020 (2020). https:\/\/doi.org\/10.1016\/j.apacoust.2019.107020, https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0003682X19308795","DOI":"10.1016\/j.apacoust.2019.107020"},{"key":"5_CR30","doi-asserted-by":"publisher","unstructured":"Silva, D.F., Yeh, C.M., Zhu, Y., Batista, G.E.A.P.A., Keogh, E.J.: Fast similarity matrix profile for music analysis and exploration. IEEE Trans. Multim. 21(1), 29\u201338 (2019). https:\/\/doi.org\/10.1109\/TMM.2018.2849563","DOI":"10.1109\/TMM.2018.2849563"},{"key":"5_CR31","doi-asserted-by":"publisher","DOI":"10.7717\/peerj.13152","volume":"10","author":"D Stowell","year":"2022","unstructured":"Stowell, D.: Computational bioacoustics with deep learning: a review and roadmap. PeerJ 10, e13152 (2022). https:\/\/doi.org\/10.7717\/peerj.13152","journal-title":"PeerJ"},{"key":"5_CR32","unstructured":"Tan, M., Le, Q.V.: Efficientnet: rethinking model scaling for convolutional neural networks. In: Proceedings of the 36th International Conference on Machine Learning, (ICML) Long Beach, California, USA. Proceedings of Machine Learning Research, vol. 97, pp. 6105\u20136114. PMLR (2019). 1905.11946"},{"key":"5_CR33","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers and distillation through attention. In: Proceedings of the 38th International Conference on Machine Learning (ICML). 139, pp. 10347\u201310357 2021. http:\/\/proceedings.mlr.press\/v139\/touvron21a.html"},{"key":"5_CR34","unstructured":"Turian, J., Schuller, B.W., Herremans, D., Kirchoff, K., Perera, P.G., Esling, P. (eds.): HEAR: Holistic Evaluation of Audio Representations (NeurIPS 2021 Competition), Proceedings of Machine Learning Research, vol. 166. PMLR (2022)"},{"key":"5_CR35","doi-asserted-by":"publisher","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017). https:\/\/doi.org\/10.5555\/3295222.3295349","DOI":"10.5555\/3295222.3295349"},{"key":"5_CR36","unstructured":"Xeno-canto: Sharing bird sounds from around the world. https:\/\/www.xeno-canto.org\/"},{"key":"5_CR37","doi-asserted-by":"publisher","unstructured":"Yang, Y., et al.: Torchaudio: building blocks for audio and speech processing. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Virtual and Singapore, pp. 6982\u20136986. IEEE (2022). https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747236","DOI":"10.1109\/ICASSP43922.2022.9747236"},{"key":"5_CR38","unstructured":"Zeghidour, N., Teboul, O., de Chaumont Quitry, F., Tagliasacchi, M.: LEAF: a learnable frontend for audio classification. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=jM76BCb6F9m"}],"container-title":["Communications in Computer and Information Science","Intelligent Systems and Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-46338-9_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T17:05:00Z","timestamp":1699117500000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-46338-9_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,5]]},"ISBN":["9783031463372","9783031463389"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-46338-9_5","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,5]]},"assertion":[{"value":"5 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ISPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Systems and Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hammamet","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tunisia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ispr22023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ispr2023.sciencesconf.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"129","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"44","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}