{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T09:50:47Z","timestamp":1774950647691,"version":"3.50.1"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783031353819","type":"print"},{"value":"9783031353826","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-35382-6_4","type":"book-chapter","created":{"date-parts":[[2023,6,21]],"date-time":"2023-06-21T12:02:39Z","timestamp":1687348959000},"page":"32-46","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Deep Learning-Based Music Instrument Recognition: Exploring Learned Feature Representations"],"prefix":"10.1007","author":[{"given":"Michael","family":"Taenzer","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stylianos I.","family":"Mimilakis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jakob","family":"Abe\u00dfer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,6,22]]},"reference":[{"issue":"8","key":"4_CR1","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: a review and new perspectives. IEEE Trans. Pattern Anal. Mach. Intell. 35(8), 1798\u20131828 (2013). https:\/\/doi.org\/10.1109\/TPAMI.2013.50","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Drossos, K., Adavanne, S., Virtanen, T.: Automated audio captioning with recurrent neural networks. In: Proceedings of the IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), New Paltz, New York, USA (2017)","DOI":"10.1109\/WASPAA.2017.8170058"},{"key":"4_CR3","unstructured":"Engel, J., et al.: Neural audio synthesis of musical notes with WaveNet autoencoders. arXiv preprint arXiv:1704.01279 (2017)"},{"key":"4_CR4","unstructured":"Favory, X., Drossos, K., Virtanen, T., Serra, X.: Coala: co-aligned autoencoders for learning semantically enriched audio representations. arXiv preprint arXiv:2006.08386 (2020)"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Font, F., Roma, G., Serra, X.: Freesound technical demo. In: Proceedings of the 21st ACM International Conference on Multimedia, New York, NY, USA, pp. 411\u2013412 (2013)","DOI":"10.1145\/2502081.2502245"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 776\u2013780 (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"4_CR7","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS 2010), pp. 249\u2013256 (2010)"},{"key":"4_CR8","unstructured":"Gomez, J., Abe\u00dfer, J., Cano, E.: Jazz solo instrument classification with convolutional neural networks, source separation, and transfer learning. In: Proceedings of the 19th International Society of Music Information Retrieval Conference (ISMIR), Paris, France, pp. 577\u2013584 (2018)"},{"key":"4_CR9","volume-title":"Deep Learning","author":"I Goodfellow","year":"2016","unstructured":"Goodfellow, I., Bengio, Y., Courville, A.: Deep Learning. MIT Press, Cambridge (2016)"},{"key":"4_CR10","unstructured":"Gururani, S., Sharma, M., Lerch, A.: An attention mechanism for musical instrument recognition. In: Proceedings of the 20th International Society for Music Information Retrieval Conference (ISMIR), Delft, The Netherlands, pp. 83\u201390 (2019)"},{"issue":"1","key":"4_CR11","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1109\/TASLP.2016.2632307","volume":"25","author":"Y Han","year":"2017","unstructured":"Han, Y., Kim, J., Lee, K.: Deep convolutional neural networks for predominant instrument recognition in polyphonic music. IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP) 25(1), 208\u2013221 (2017)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process. (TASLP)"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. arXiv preprint arXiv:1512.03385 (2015)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Hershey, S., et al.: CNN architectures for large-scale audio classification. In: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), New Orleans, LA, USA, pp. 131\u2013135 (2017)","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"4_CR14","unstructured":"Humphrey, E.J., Durand, S., Mcfee, B.: OpenMIC-2018: an open data-set for multiple instrument recognition. In: Proceedings of the 19th International Society for Music Information Retrieval Conference (ISMIR), Paris, France, pp. 438\u2013444 (2018)"},{"key":"4_CR15","unstructured":"Hung, Y.N., Yang, Y.H.: Frame-level instrument recognition by timbre and pitch. In: Proceedings of the 19th International Society for Music Information Retrieval Conference (ISMIR), Paris, France, pp. 135\u2013142 (2018)"},{"key":"4_CR16","doi-asserted-by":"publisher","unstructured":"Jansen, A., Gemmeke, J.F., Ellis, D.P.W., Liu, X., Lawrence, W., Freedman, D.: Large-scale audio event discovery in one million YouTube videos. In: Proceedings of the International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 786\u2013790 (2017). https:\/\/doi.org\/10.1109\/ICASSP.2017.7952263","DOI":"10.1109\/ICASSP.2017.7952263"},{"key":"4_CR17","doi-asserted-by":"publisher","first-page":"590","DOI":"10.14419\/ijet.v7i3.34.19388","volume":"7","author":"D Kim","year":"2018","unstructured":"Kim, D., Sung, T., Cho, S., Lee, G., Sohn, C.: A single predominant instrument recognition of polyphonic music using CNN-based timbre analysis. Int. J. Eng. Technol. (UAE) 7, 590\u2013593 (2018)","journal-title":"Int. J. Eng. Technol. (UAE)"},{"key":"4_CR18","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: Proceedings of the 3rd International Conference on Learning Representations (ICLR) (2015)"},{"key":"4_CR19","unstructured":"Li, P., Qian, J., Wang, T.: Automatic instrument recognition in polyphonic music using convolutional neural networks. arXiv preprint arXiv:1511.05520 (2015)"},{"key":"4_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1007\/978-3-030-43859-3_6","volume-title":"Artificial Intelligence in Music, Sound, Art and Design","author":"X Li","year":"2020","unstructured":"Li, X., Wang, K., Soraghan, J., Ren, J.: Fusion of Hilbert-Huang transform and deep convolutional neural network for predominant musical instruments recognition. In: Romero, J., Ek\u00e1rt, A., Martins, T., Correia, J. (eds.) EvoMUSART 2020. LNCS, vol. 12103, pp. 80\u201389. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-43859-3_6"},{"key":"4_CR21","unstructured":"Long, M., Cao, Y., Wang, J., Jordan, M.I.: Learning transferable features with deep adaptation networks. In: Proceedings of the 32nd International Conference on Machine Learning (ICML), Lille, France, vol. 37, pp. 97\u2013105 (2015)"},{"key":"4_CR22","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1007\/978-3-030-43887-6_35","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"SI Mimilakis","year":"2020","unstructured":"Mimilakis, S.I., Weiss, C., Arifi-M\u00fcller, V., Abe\u00dfer, J., M\u00fcller, M.: Cross-version singing voice detection in opera recordings: challenges for supervised learning. In: Cellier, P., Driessens, K. (eds.) ECML PKDD 2019. CCIS, vol. 1168, pp. 429\u2013436. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-43887-6_35"},{"key":"4_CR23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74048-3","volume-title":"Information Retrieval for Music and Motion","author":"M M\u00fcller","year":"2007","unstructured":"M\u00fcller, M.: Information Retrieval for Music and Motion. Springer, Heidelberg (2007)"},{"key":"4_CR24","unstructured":"Nair, V., Hinton, G.E.: Rectified linear units improve restricted Boltzmann machines. In: Proceedings of the 27th International Conference on International Conference on Machine Learning (ICML), pp. 807\u2013814. Omnipress, Madison (2010)"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., Sivic, J.: Learning and transferring mid-level image representations using convolutional neural networks. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1717\u20131724 (2014)","DOI":"10.1109\/CVPR.2014.222"},{"key":"4_CR26","unstructured":"Park, T., Lee, T.: Musical instrument sound classification with deep convolutional neural network using feature fusion approach. arXiv preprint arXiv:1512.07370 (2015)"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: GloVe: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"issue":"8","key":"4_CR28","doi-asserted-by":"publisher","first-page":"1307","DOI":"10.1109\/TASLP.2018.2825440","volume":"26","author":"Z Rafii","year":"2018","unstructured":"Rafii, Z., Liutkus, A., St\u00f6ter, F.R., Mimilakis, S.I., FitzGerald, D., Pardo, B.: An overview of lead and accompaniment separation in music. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(8), 1307\u20131335 (2018)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"4_CR29","unstructured":"Scheirer, E.D.: Music-listening systems. Ph.D. thesis, Massachusetts Institute of Technology (2000)"},{"key":"4_CR30","unstructured":"Smaragdis, P.: Redundancy reduction for computational audition, a unifying approach. Ph.D. thesis, Massachusetts Institute of Technology (2001)"},{"key":"4_CR31","unstructured":"Taenzer, M., Abe\u00dfer, J., Mimilakis, S.I., Wei\u00df, C., M\u00fcller, M., Lukashevich, H.: Investigating CNN-based instrument family recognition for western classical music recordings. In: Proceedings of the 20th International Society for Music Information Retrieval Conference (ISMIR), Delft, The Netherlands, pp. 612\u2013619 (2019)"},{"key":"4_CR32","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) Proceedings of the 30th International Conference Advances in Neural Information Processing Systems (NeurIPS), pp. 5998\u20136008. Curran Associates, Inc. (2017)"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Vincent, P., Larochelle, H., Bengio, Y., Manzagol, P.A.: Extracting and composing robust features with denoising autoencoders. In: Proceedings of the 25th International Conference on Machine Learning (ICML), Helsinki, Finland, pp. 1096\u20131103. ACM (2008)","DOI":"10.1145\/1390156.1390294"},{"key":"4_CR34","unstructured":"Watcharasupat, K., Gururani, S., Lerch, A.: Visual attention for musical instrument recognition. arXiv preprint arXiv:2006.09640 (2020)"},{"key":"4_CR35","unstructured":"Wu, Y., He, K.: Group normalization. arXiv preprint arXiv:1803.08494 (2018)"}],"container-title":["Lecture Notes in Computer Science","Music in the AI Era"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-35382-6_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,21]],"date-time":"2023-06-21T12:03:01Z","timestamp":1687348981000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-35382-6_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031353819","9783031353826"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-35382-6_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"22 June 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CMMR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Computer Music Multidisciplinary Research","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cmmr2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.cmmr2021.gttm.jp\/committee\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easychair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"48","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"50% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}