{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T05:01:52Z","timestamp":1742965312757,"version":"3.40.3"},"publisher-location":"Cham","reference-count":37,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031708923"},{"type":"electronic","value":"9783031708930"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70893-0_19","type":"book-chapter","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T11:02:54Z","timestamp":1724929374000},"page":"260-272","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Leveraging Weakly Supervised and\u00a0Multiple Instance Learning for\u00a0Multi-label Classification of\u00a0Passive Acoustic Monitoring Data"],"prefix":"10.1007","author":[{"given":"Ilira","family":"Troshani","sequence":"first","affiliation":[]},{"given":"Thiago S.","family":"Gouv\u00eaa","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Sonntag","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,30]]},"reference":[{"issue":"1","key":"19_CR1","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1093\/biosci\/biy147.","volume":"69","author":"LSM Sugai","year":"2019","unstructured":"Sugai, L.S.M., Silva, T.S.F., Ribeiro, J.W., Llusia, D.: Terrestrial passive acoustic monitoring: review and perspectives. Bioscience 69(1), 15\u201325 (2019). https:\/\/doi.org\/10.1093\/biosci\/biy147. Accessed 2023-03-01","journal-title":"Bioscience"},{"issue":"1","key":"19_CR2","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1038\/s41467-022-27980-y","volume":"13","author":"D Tuia","year":"2022","unstructured":"Tuia, D., et al.: Perspectives in machine learning for wildlife conservation. Nat. Commun. 13(1), 792 (2022)","journal-title":"Nat. Commun."},{"key":"19_CR3","doi-asserted-by":"publisher","unstructured":"Gouv\u00eaa, T.S., et al.: Interactive machine learning solutions for acoustic monitoring of animal wildlife in biosphere reserves. In: Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, pp. 6405\u20136413. International Joint Conferences on Artificial Intelligence Organization, Macau, SAR, China (2023). https:\/\/doi.org\/10.24963\/ijcai.2023\/711, https:\/\/www.ijcai.org\/proceedings\/2023\/711. Accessed 16 Aug 2023","DOI":"10.24963\/ijcai.2023\/711"},{"key":"19_CR4","doi-asserted-by":"publisher","first-page":"13152","DOI":"10.7717\/peerj.13152","volume":"10","author":"D Stowell","year":"2022","unstructured":"Stowell, D.: Computational bioacoustics with deep learning: a review and roadmap. PeerJ 10, 13152 (2022). https:\/\/doi.org\/10.7717\/peerj.13152. Accessed 2023-08-01","journal-title":"PeerJ"},{"issue":"1763","key":"19_CR5","doi-asserted-by":"publisher","first-page":"20170386","DOI":"10.1098\/rstb.2017.0386","volume":"374","author":"EK Meineke","year":"2018","unstructured":"Meineke, E.K., Davies, T.J., Daru, B.H., Davis, C.C.: Biological collections for understanding biodiversity in the Anthropocene. Philos. Trans. Royal Soc. B: Biol. Sci. 374(1763), 20170386 (2018). https:\/\/doi.org\/10.1098\/rstb.2017.0386. Accessed 2023-08-01","journal-title":"Philos. Trans. Royal Soc. B: Biol. Sci."},{"issue":"5","key":"19_CR6","doi-asserted-by":"publisher","first-page":"590","DOI":"10.1080\/09524622.2019.1633567","volume":"29","author":"S Dena","year":"2020","unstructured":"Dena, S., Rebou\u00e7as, R., Augusto-Alves, G., Zornosa-Torres, C., Pontes, M.R., Toledo, L.F.: How much are we losing in not depositing anuran sound recordings in scientific collections? Bioacoustics 29(5), 590\u2013601 (2020). https:\/\/doi.org\/10.1080\/09524622.2019.1633567. Accessed 2023-08-01","journal-title":"Bioacoustics"},{"key":"19_CR7","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1016\/j.ecolind.2018.12.021","volume":"99","author":"LSM Sugai","year":"2019","unstructured":"Sugai, L.S.M., Llusia, D.: Bioacoustic time capsules: using acoustic monitoring to document biodiversity. Ecol. Ind. 99, 149\u2013152 (2019). https:\/\/doi.org\/10.1016\/j.ecolind.2018.12.021. Accessed 2023-08-01","journal-title":"Ecol. Ind."},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"19_CR9","doi-asserted-by":"publisher","unstructured":"Simonyan, K., Zisserman, A.: Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv (2015). https:\/\/doi.org\/10.48550\/arXiv.1409.1556 . http:\/\/arxiv.org\/abs\/1409.1556. Accessed 02 Aug 2023","DOI":"10.48550\/arXiv.1409.1556"},{"key":"19_CR10","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der\u00a0Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4700\u20134708 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"19_CR11","doi-asserted-by":"publisher","first-page":"101236","DOI":"10.1016\/j.ecoinf.2021.101236","volume":"61","author":"S Kahl","year":"2021","unstructured":"Kahl, S., Wood, C.M., Eibl, M., Klinck, H.: BirdNET: a deep learning solution for avian diversity monitoring. Eco. Inform. 61, 101236 (2021). https:\/\/doi.org\/10.1016\/j.ecoinf.2021.101236. Accessed 2023-05-12","journal-title":"Eco. Inform."},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Tzirakis, P., Shiarella, A., Ewers, R., Schuller, B.W.: Computer audition for continuous rainforest occupancy monitoring: the case of Bornean gibbons\u2019 call detection (2020)","DOI":"10.21437\/Interspeech.2020-2655"},{"key":"19_CR13","doi-asserted-by":"publisher","unstructured":"\u00c7ak\u0131r, E., Parascandolo, G., Heittola, T., Huttunen, H., Virtanen, T.: Convolutional recurrent neural networks for polyphonic sound event detection. IEEE\/ACM Trans. Audio Speech Lang. Process. 25(6), 1291\u20131303 (2017). https:\/\/doi.org\/10.1109\/TASLP.2017.2690575 . Conference Name: IEEE\/ACM Transactions on Audio, Speech, and Language Processing","DOI":"10.1109\/TASLP.2017.2690575"},{"key":"19_CR14","doi-asserted-by":"publisher","first-page":"113390","DOI":"10.1016\/j.eswa.2020.113390","volume":"152","author":"J Xie","year":"2020","unstructured":"Xie, J., Hu, K., Zhu, M., Guo, Y.: Bioacoustic signal classification in continuous recordings: syllable-segmentation vs sliding-window. Expert Syst. Appl. 152, 113390 (2020)","journal-title":"Expert Syst. Appl."},{"key":"19_CR15","doi-asserted-by":"publisher","first-page":"101688","DOI":"10.1016\/j.ecoinf.2022.101688","volume":"70","author":"E Dufourq","year":"2022","unstructured":"Dufourq, E., Batist, C., Foquet, R., Durbach, I.: Passive acoustic monitoring of animal populations with transfer learning. Eco. Inform. 70, 101688 (2022). https:\/\/doi.org\/10.1016\/j.ecoinf.2022.101688. Accessed 2023-09-19","journal-title":"Eco. Inform."},{"key":"19_CR16","unstructured":"Kath, H., Serafini, P.P., Campos, I.B., Gouvea, T., Sonntag, D.: Leveraging transfer learning and active learning for sound event detection in passive acoustic monitoring of wildlife. In: 3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE-2024), Befindet Sich AAAI, February 26, Vancouver, BC, Canada (2024)"},{"key":"19_CR17","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"19_CR18","doi-asserted-by":"publisher","unstructured":"Wang, Y., Li, J., Metze, F.: A comparison of five multiple instance learning pooling functions for sound event detection with weak labeling. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 31\u201335 (2019). https:\/\/doi.org\/10.1109\/ICASSP.2019.8682847. ISSN: 2379-190X","DOI":"10.1109\/ICASSP.2019.8682847"},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Hershey, S., et al.: CNN architectures for large-scale audio classification. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 131\u2013135. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"19_CR20","unstructured":"Sprengel, E., Jaggi, M., Kilcher, Y., Hofmann, T.: Audio Based Bird Species Identification using Deep Learning Techniques. LifeCLEF 2016 (2016)"},{"key":"19_CR21","doi-asserted-by":"publisher","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 776\u2013780 (2017). https:\/\/doi.org\/10.1109\/ICASSP.2017.7952261","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"19_CR22","doi-asserted-by":"publisher","unstructured":"Kumar, A., Raj, B.: Audio event detection using weakly labeled data. In: Proceedings of the 24th ACM International Conference on Multimedia, pp. 1038\u20131047 (2016). https:\/\/doi.org\/10.1145\/2964284.2964310 . arXiv:1605.02401 [cs]. http:\/\/arxiv.org\/abs\/1605.02401. Accessed 13 Sept 2023","DOI":"10.1145\/2964284.2964310"},{"key":"19_CR23","doi-asserted-by":"publisher","unstructured":"Xu, Y., Kong, Q., Wang, W., Plumbley, M.D.: Large-scale weakly supervised audio classification using gated convolutional neural network. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 121\u2013125 (2018). https:\/\/doi.org\/10.1109\/ICASSP.2018.8461975. ISSN: 2379-190X","DOI":"10.1109\/ICASSP.2018.8461975"},{"key":"19_CR24","doi-asserted-by":"publisher","unstructured":"Miyazaki, K., Komatsu, T., Hayashi, T., Watanabe, S., Toda, T., Takeda, K.: Weakly-supervised sound event detection with self-attention. In: ICASSP 2020 \u2013 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 66\u201370 (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9053609 . ISSN: 2379-190X","DOI":"10.1109\/ICASSP40776.2020.9053609"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Xin, Y., Yang, D., Zou, Y.: Audio pyramid transformer with domain adaption for weakly supervised sound event detection and audio classification. In: Proceedings of the Interspeech 2022, pp. 1546\u20131550 (2022)","DOI":"10.21437\/Interspeech.2022-10057"},{"key":"19_CR26","doi-asserted-by":"publisher","unstructured":"Chen, S., et al.: BEATs: Audio Pre-Training with Acoustic Tokenizers. arXiv (2022). https:\/\/doi.org\/10.48550\/arXiv.2212.09058, http:\/\/arxiv.org\/abs\/2212.09058. Accessed 03 Aug 2023","DOI":"10.48550\/arXiv.2212.09058"},{"key":"19_CR27","doi-asserted-by":"crossref","unstructured":"Jiang, J.-J., et al.: Whistle detection and classification for whales based on convolutional neural networks. Appl. Acoust. 150, 169\u2013178 (2019)","DOI":"10.1016\/j.apacoust.2019.02.007"},{"issue":"5","key":"19_CR28","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1038\/s41386-018-0303-6","volume":"44","author":"KR Coffey","year":"2019","unstructured":"Coffey, K.R., Marx, R.E., Neumaier, J.F.: Deepsqueak: a deep learning-based system for detection and analysis of ultrasonic vocalizations. Neuropsychopharmacology 44(5), 859\u2013868 (2019)","journal-title":"Neuropsychopharmacology"},{"key":"19_CR29","doi-asserted-by":"publisher","first-page":"63853","DOI":"10.7554\/eLife.63853","volume":"11","author":"Y Cohen","year":"2022","unstructured":"Cohen, Y., Nicholson, D.A., Sanchioni, A., Mallaber, E.K., Skidanova, V., Gardner, T.J.: Automated annotation of birdsong with a neural network that segments spectrograms. Elife 11, 63853 (2022)","journal-title":"Elife"},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Ca\u00f1as, J.S., et al.: A dataset for benchmarking neotropical anuran calls identification in passive acoustic monitoring. Sci. Data 10(1), 771 (2023)","DOI":"10.1038\/s41597-023-02666-2"},{"key":"19_CR31","unstructured":"Yang, Y.-Y., et al.: TorchAudio: building blocks for audio and speech processing. arXiv preprint arXiv:2110.15018 (2021)"},{"key":"19_CR32","doi-asserted-by":"publisher","unstructured":"Hershey, S., et al.: CNN Architectures for Large-Scale Audio Classification. arXiv (2017). https:\/\/doi.org\/10.48550\/arXiv.1609.09430 . http:\/\/arxiv.org\/abs\/1609.09430. Accessed 11 Aug 2023","DOI":"10.48550\/arXiv.1609.09430"},{"key":"19_CR33","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Park, D.S., et al.: SpecAugment: a simple data augmentation method for automatic speech recognition. arXiv preprint arXiv:1904.08779 (2019)","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"19_CR35","unstructured":"Troshani, I., Gouvea, T., Sonntag, D.: Leveraging sound collections for animal species classification with weakly supervised learning. In: 3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE-2024), AAAI, Vancouver, Canada (2024)"},{"key":"19_CR36","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems 32, pp. 8024\u20138035. Curran Associates, Inc., ??? (2019). http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"19_CR37","unstructured":"Shah, A., Kumar, A., Hauptmann, A.G., Raj, B.: A closer look at weak label learning for audio events. CoRR arXiv:abs\/1804.09288 (2018)"}],"container-title":["Lecture Notes in Computer Science","KI 2024: Advances in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70893-0_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T11:06:59Z","timestamp":1724929619000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70893-0_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031708923","9783031708930"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70893-0_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"30 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"KI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"German Conference on Artificial Intelligence (K\u00fcnstliche Intelligenz)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"W\u00fcrzburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"47","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ki2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.informatik.uni-wuerzburg.de\/ki24\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}