{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,15]],"date-time":"2026-01-15T07:18:30Z","timestamp":1768461510426,"version":"3.49.0"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2013,7,12]],"date-time":"2013-07-12T00:00:00Z","timestamp":1373587200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J Intell Inf Syst"],"published-print":{"date-parts":[[2013,12]]},"DOI":"10.1007\/s10844-013-0248-5","type":"journal-article","created":{"date-parts":[[2013,7,11]],"date-time":"2013-07-11T07:13:07Z","timestamp":1373526787000},"page":"461-481","source":"Crossref","is-referenced-by-count":89,"title":["Feature learning and deep architectures: new directions for music informatics"],"prefix":"10.1007","volume":"41","author":[{"given":"Eric J.","family":"Humphrey","sequence":"first","affiliation":[]},{"given":"Juan P.","family":"Bello","sequence":"additional","affiliation":[]},{"given":"Yann","family":"LeCun","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2013,7,12]]},"reference":[{"key":"248_CR1","unstructured":"And\u00e9n, J., & Mallat, S. (2011). Multiscale scattering for audio classification. In Proc. 12th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"5","key":"248_CR2","doi-asserted-by":"crossref","first-page":"1035","DOI":"10.1109\/TSA.2005.851998","volume":"13","author":"JP Bello","year":"2005","unstructured":"Bello, J.P., Daudet, L., Abdallah, S., Duxbury, C., Davies, M., Sandler, M. (2005). A tutorial on onset detection in music signals. IEEE Transactions on Audio, Speech and Language Processing, 13(5), 1035\u20131047.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"issue":"1","key":"248_CR3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000006","volume":"2","author":"Y Bengio","year":"2009","unstructured":"Bengio, Y. (2009). Learning deep architectures for AI. Foundations and Trends in Machine Learning, 2(1), 1\u2013127.","journal-title":"Foundations and Trends in Machine Learning"},{"key":"248_CR4","unstructured":"Bengio, Y., Courville, A.C., Vincent, P. (2012). Unsupervised feature learning and deep learning: a review and new perspectives. arXiv:1206.5538 ."},{"key":"248_CR5","doi-asserted-by":"crossref","unstructured":"Bengio, Y., & LeCun, Y. (2007). Scaling learning algorithms towards AI. In Large-Scale Kernel Machines (Vol.\u00a034).","DOI":"10.7551\/mitpress\/7496.003.0016"},{"issue":"2","key":"248_CR6","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1162\/014892604323112257","volume":"28","author":"A Berenzweig","year":"2004","unstructured":"Berenzweig, A., Logan, B., Ellis, D.P., Whitman, B. (2004). A large-scale evaluation of acoustic and subjective music-similarity measures. Computer Music Journal, 28(2), 63\u201376.","journal-title":"Computer Music Journal"},{"key":"248_CR7","unstructured":"Bergstra, J., Breuleux, O., Bastien, F., Lamblin, P., Pascanu, R., Desjardins, G., Turian, J., Warde-Farley, D., Bengio, Y. (2010). Theano: A CPU and GPU math expression compiler. In Proc. of the Python for Scientific computing conf. (SciPy)."},{"key":"248_CR8","unstructured":"Bertin-Mahieux, T., & Ellis, D.P.W. (2012). Large-scale cover song recognition using the 2D fourier transform magnitude. In Proc. 13th Int. Conf. on Music Information Retrieval (ISMIR) (pp.\u00a0241\u2013246)."},{"key":"248_CR9","unstructured":"Bishop, C. (2006). Pattern recognition and machine learning. Springer."},{"key":"248_CR10","doi-asserted-by":"crossref","unstructured":"Cabral, G., & Pachet, F. (2006). Recognizing chords with EDS: Part One. Computer Music Modeling and Retrieval (pp.\u00a0185\u2013195).","DOI":"10.1007\/11751069_17"},{"issue":"4","key":"248_CR11","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1109\/JPROC.2008.916370","volume":"96","author":"M Casey","year":"2008","unstructured":"Casey, M., Veltkamp, R., Goto, M., Leman, M., Rhodes, C., Slaney, M. (2008). Content-based music information retrieval: current directions and future challenges. Proceedings of the IEEE, 96(4), 668\u2013696.","journal-title":"Proceedings of the IEEE"},{"key":"248_CR12","unstructured":"Cho, T., & Bello, J.P. (2011). A feature smoothing method for chord recognition using recurrence plots. In Proc. 12th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"2","key":"248_CR13","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1080\/09298215.2011.576318","volume":"40","author":"P Chordia","year":"2011","unstructured":"Chordia, P., Sastry, A., Sent\u00fcrk, S. (2011). Predictive tabla modelling using variable-length markov and hidden markov models. Journal of New Music Research, 40(2), 105\u2013118.","journal-title":"Journal of New Music Research"},{"key":"248_CR14","unstructured":"Collobert, R., Kavukcuoglu, K., Farabet, C. (2011). Torch7: A matlab-like environment for machine learning. In BigLearn, NIPS Workshop."},{"key":"248_CR15","unstructured":"Dannenberg, R. (1984). An on-line algorithm for real-time accompaniment. In Proc. Int. Computer Music Conf. (pp.\u00a0193\u2013198)."},{"issue":"4","key":"248_CR16","doi-asserted-by":"crossref","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., & Mermelstein, P. (1980). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics, Speech and Signal Processing, 28(4), 357\u2013366.","journal-title":"IEEE Transactions on Acoustics, Speech and Signal Processing"},{"key":"248_CR17","unstructured":"Dieleman, S., Brakel, P., Schrauwen, B. (2011). Audio-based music classification with a pretrained convolutional network. In Proc. 12th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"1","key":"248_CR18","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1080\/09298210701653310","volume":"36","author":"S Dixon","year":"2007","unstructured":"Dixon, S. (2007). Evaluation of the audio beat tracking system Beatroot. Journal of New Music Research, 36(1), 39\u201350.","journal-title":"Journal of New Music Research"},{"issue":"2\u20133","key":"248_CR19","first-page":"177","volume":"6","author":"W Edward","year":"1994","unstructured":"Edward, W., & Kolen, J.F. (1994). Resonance and the perception of musical meter. Connection Science, 6(2\u20133), 177\u2013208.","journal-title":"Connection Science"},{"key":"248_CR20","unstructured":"Flexer, A., Schnitzer, D., Schlueter, J. (2012). A MIREX meta-analysis of hubness in audio music similarity. In Proc. 13th Int. Conf. on Music Information Retrieval (ISMIR) (pp.\u00a0175\u2013180)."},{"key":"248_CR21","unstructured":"Fujishima, T. (1999). Realtime chord recognition of musical sound: a system using common lisp music. In Proc. int. computer music conf."},{"key":"248_CR22","unstructured":"Goto, M., & Muraoka, Y. (1995). A real-time beat tracking system for audio signals. In Proc. int. computer music conf. (pp.\u00a0171\u2013174)."},{"issue":"6","key":"248_CR23","doi-asserted-by":"crossref","first-page":"1688","DOI":"10.1109\/TASL.2010.2096216","volume":"19","author":"P Grosche","year":"2011","unstructured":"Grosche, P., & M\u00fcller, M. (2011). Extracting predominant local pulse information from music recordings. IEEE Transactions on Audio, Speech and Language Processing, 19(6), 1688\u20131701.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"248_CR24","doi-asserted-by":"crossref","unstructured":"Hadsell, R., Chopra, S., LeCun, Y. (2006). Dimensionality reduction by learning an invariant mapping. In Proc. Computer Vision and Pattern Recognition conf. (CVPR). IEEE Press.","DOI":"10.1109\/CVPR.2006.100"},{"key":"248_CR25","unstructured":"Hamel, P., Wood, S., Eck, D. (2009). Automatic identification of instrument classes in polyphonic and poly-instrument audio. In Proc. 10th Int. Conf. on Music Information Retrieval (ISMIR)."},{"key":"248_CR26","author":"G Hinton","year":"2012","unstructured":"Hinton, G., Deng, L., Yu, D., Dahl, G., Mohamed, A.r., Jaitly, N., Senior, A., Vanhoucke, V., Nguyen, P., Sainath, T., Kingsbury, B. (2012). Deep neural networks for acoustic modeling in speech recognition. IEEE Signal Processing Magazine. doi: 10.1109\/MSP.2012.2205597 .","journal-title":"IEEE Signal Processing Magazine"},{"issue":"7","key":"248_CR27","doi-asserted-by":"crossref","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Osindero, S., Teh, Y. (2006). A fast learning algorithm for deep belief nets. Neural Computation, 18(7), 1527\u20131554.","journal-title":"Neural Computation"},{"key":"248_CR28","doi-asserted-by":"crossref","unstructured":"Humphrey, E.J., & Bello, J.P. (2012). Rethinking automatic chord recognition with convolutional neural networks. In Proc. Int. Conf. on Machine Learning and Applications.","DOI":"10.1109\/ICMLA.2012.220"},{"key":"248_CR29","unstructured":"Humphrey, E.J., Bello, J.P., LeCun, Y. (2012). Moving beyond feature design: Deep architectures and automatic feature learning in music informatics. In Proc. 13th Int. Conf. on Music Information Retrieval (ISMIR)."},{"key":"248_CR30","unstructured":"Humphrey, E.J., Glennon, A.P., Bello, J.P. (2010). Non-linear semantic embedding for organizing large instrument sample libraries. In Proc. ICMLA."},{"key":"248_CR31","doi-asserted-by":"crossref","unstructured":"Klapuri, A., & Davy, M. (2006). Signal processing methods for music transcription. Springer.","DOI":"10.1007\/0-387-32845-9"},{"key":"248_CR32","unstructured":"Le, Q., Monga, R., Devin, M., Corrado, G., Chen, K., Ranzato, M., Dean, J., Ng, A. (2012). Building high-level features using large scale unsupervised learning. In Proc. Int. Conf. on Machine Learning (ICML)."},{"key":"248_CR33","unstructured":"Le, Q.V., Ngiam, J., Chen, Z., Chia, D., Koh, P.W., Ng, A.Y. (2010). Tiled convolutional neural networks. In Advances in Neural Information Processing Systems (Vol.\u00a023)."},{"issue":"6","key":"248_CR34","doi-asserted-by":"crossref","first-page":"1631","DOI":"10.1162\/neco.2008.04-07-510","volume":"20","author":"N Roux Le","year":"2008","unstructured":"Le\u00a0Roux, N., & Bengio, Y. (2008). Representational power of restricted Boltzmann machines and deep belief networks. Neural Computation, 20(6), 1631\u20131649.","journal-title":"Neural Computation"},{"key":"248_CR35","doi-asserted-by":"crossref","unstructured":"LeCun, Y. (2012). Learning invariant feature hierarchies. In Computer vision\u2013ECCV 2012. Workshops and demonstrations (pp.\u00a0496\u2013505). Springer.","DOI":"10.1007\/978-3-642-33863-2_51"},{"key":"248_CR36","unstructured":"LeCun, Y., Chopra, S., Hadsell, R., Ranzato, M., Huang, F. (2006). A tutorial on energy-based learning. Predicting Structured Data."},{"key":"248_CR37","unstructured":"Leveau, P., Sodoyer, D., Daudet, L. (2007). Automatic instrument recognition in a polyphonic mixture using sparse representations. In Proc. 8th Int. Conf. on Music Information Retrieval (ISMIR)."},{"key":"248_CR38","doi-asserted-by":"crossref","unstructured":"Levy, M., Noland, K., Sandler, M. (2007). A comparison of timbral and harmonic music segmentation algorithms. In 2007 IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP) (Vol.\u00a04, pp.\u00a01433\u20131436). IEEE.","DOI":"10.1109\/ICASSP.2007.367349"},{"issue":"3","key":"248_CR39","doi-asserted-by":"crossref","first-page":"383","DOI":"10.1109\/TMM.2009.2012913","volume":"11","author":"M Levy","year":"2009","unstructured":"Levy, M., & Sandler, M. (2009). Music information retrieval using social tags and audio. IEEE Transactions on Multimedia, 11(3), 383\u2013395.","journal-title":"IEEE Transactions on Multimedia"},{"issue":"9","key":"248_CR40","doi-asserted-by":"crossref","first-page":"2390","DOI":"10.1162\/NECO_a_00011","volume":"22","author":"R Lyon","year":"2010","unstructured":"Lyon, R., Rehn, M., Bengio, S., Walters, T., Chechik, G. (2010). Sound retrieval and ranking using sparse auditory representations. Neural computation, 22(9), 2390\u20132416.","journal-title":"Neural computation"},{"key":"248_CR41","unstructured":"Mandel, M., & Ellis, D. (2005). Song-level features and support vector machines for music classification. In Proc. 6th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"6","key":"248_CR42","doi-asserted-by":"crossref","first-page":"1280","DOI":"10.1109\/TASL.2009.2032947","volume":"18","author":"M Mauch","year":"2010","unstructured":"Mauch, M., & Dixon, S. (2010). Simultaneous estimation of chords and musical context from audio. IEEE Transactions on Audio, Speech and Language Processing, 18(6), 1280\u20131289.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"248_CR43","unstructured":"McFee, B., & Lanckriet, G. (2012). Hypergraph models of playlist dialects. In Proc. 13th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"6","key":"248_CR44","doi-asserted-by":"crossref","first-page":"1088","DOI":"10.1109\/JSTSP.2011.2112333","volume":"5","author":"M M\u00fcller","year":"2011","unstructured":"M\u00fcller, M., Ellis, D., Klapuri, A., Richard, G. (2011). Signal processing for music analysis. Journal Selected Topics in Signal Processing, 5(6), 1088\u20131110.","journal-title":"Journal Selected Topics in Signal Processing"},{"key":"248_CR45","unstructured":"M\u00fcller, M., & Ewert, S. (2011). Chroma Toolbox: MATLAB implementations for extracting variants of chroma-based audio features. In Proc. 12th Int. Conf. on Music Information Retrieval (ISMIR). Miami, USA."},{"key":"248_CR46","unstructured":"Nam, J., Ngiam, J., Lee, H., Slaney, M. (2011). A classification-based polyphonic piano transcription approach using learned feature representations. In Proc. 12th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"1","key":"248_CR47","doi-asserted-by":"crossref","first-page":"588","DOI":"10.1121\/1.421129","volume":"103","author":"ED Scheirer","year":"1998","unstructured":"Scheirer, E.D. (1998). Tempo and beat analysis of acoustic musical signals. Journal of the Acoustical Society of America, 103(1), 588\u2013601.","journal-title":"Journal of the Acoustical Society of America"},{"key":"248_CR48","unstructured":"Schmidt, E.M., & Kim, Y.E. (2011). Modeling the acoustic structure of musical emotion with deep belief networks. In Proc. neural information processing systems."},{"key":"248_CR49","unstructured":"Sheh, A., & Ellis, D.P.W. (2003). Chord segmentation and recognition using em-trained hidden markov models. In Proc. 4th Int. Conf. on Music Information Retrieval (ISMIR)."},{"issue":"2","key":"248_CR50","doi-asserted-by":"crossref","first-page":"12","DOI":"10.1109\/MMUL.2011.34","volume":"18","author":"M Slaney","year":"2011","unstructured":"Slaney, M. (2011). Web-scale multimedia analysis: does content matter? IEEE Multimedia, 18(2), 12\u201315.","journal-title":"IEEE Multimedia"},{"key":"248_CR51","doi-asserted-by":"crossref","unstructured":"Sumi, K., Arai, M., Fujishima, T., Hashimoto, S. (2012). A music retrieval system using chroma and pitch features based on conditional random fields. In 2012 IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP) (pp.\u00a01997\u20132000). IEEE.","DOI":"10.1109\/ICASSP.2012.6288299"},{"key":"248_CR52","unstructured":"Zils, A., & Pachet, F. (2004). Automatic extraction of music descriptors from acoustic signals using EDS. In Proc. AES."}],"container-title":["Journal of Intelligent Information Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-013-0248-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10844-013-0248-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10844-013-0248-5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T10:35:19Z","timestamp":1715682919000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10844-013-0248-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,7,12]]},"references-count":52,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2013,12]]}},"alternative-id":["248"],"URL":"https:\/\/doi.org\/10.1007\/s10844-013-0248-5","relation":{},"ISSN":["0925-9902","1573-7675"],"issn-type":[{"value":"0925-9902","type":"print"},{"value":"1573-7675","type":"electronic"}],"subject":[],"published":{"date-parts":[[2013,7,12]]}}}