{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,15]],"date-time":"2026-05-15T20:03:01Z","timestamp":1778875381936,"version":"3.51.4"},"reference-count":504,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2020,2,3]],"date-time":"2020-02-03T00:00:00Z","timestamp":1580688000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,2,3]],"date-time":"2020-02-03T00:00:00Z","timestamp":1580688000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2021,12]]},"DOI":"10.1007\/s10772-020-09681-3","type":"journal-article","created":{"date-parts":[[2020,2,3]],"date-time":"2020-02-03T20:03:41Z","timestamp":1580760221000},"page":"913-955","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":26,"title":["Pattern analysis based acoustic signal processing: a survey of the state-of-art"],"prefix":"10.1007","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1804-8590","authenticated-orcid":false,"given":"Jyotismita","family":"Chaki","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,2,3]]},"reference":[{"issue":"5","key":"9681_CR1","first-page":"1415","volume":"28","author":"AA Abdulsalam","year":"2017","unstructured":"Abdulsalam, A. A. (2017). Audio classification based on content features. Journal of College of Education for Women, 28(5), 1415\u20131423.","journal-title":"Journal of College of Education for Women"},{"key":"9681_CR2","doi-asserted-by":"crossref","unstructured":"Adavanne, S., Drossos, K., \u00c7akir, E., & Virtanen, T. (2017a). Stacked convolutional and recurrent neural networks for bird audio detection. In\u00a02017 25th European signal processing conference (EUSIPCO)\u00a0(pp. 1729\u20131733). Kos: IEEE.","DOI":"10.23919\/EUSIPCO.2017.8081505"},{"key":"9681_CR3","unstructured":"Adavanne, S., Parascandolo, G., Pertil\u00e4, P., Heittola, T., & Virtanen, T. (2017b). Sound event detection in multichannel audio using spatial and harmonic features. http:\/\/arxiv.org\/abs\/1706.02293."},{"key":"9681_CR4","unstructured":"Adavanne, S., & Virtanen, T. (2017). A report on sound event detection with different binaural features. http:\/\/arxiv.org\/abs\/1710.02997."},{"key":"9681_CR5","doi-asserted-by":"crossref","first-page":"122613","DOI":"10.1016\/j.physa.2019.122613","volume":"537","author":"S Ahmad","year":"2020","unstructured":"Ahmad, S., Agrawal, S., Joshi, S., Taran, S., Bajaj, V., Demir, F., et al. (2020). Environmental sound classification using optimum allocation sampling based empirical mode decomposition. Physica A: Statistical Mechanics and its Applications, 537, 122613.","journal-title":"Physica A: Statistical Mechanics and its Applications"},{"key":"9681_CR6","unstructured":"Al Maathidi, M. M. (2017).\u00a0Optimal feature selection and machine learning for high-level audio classification-a random forests approach.\u00a0Doctoral dissertation, University of Salford."},{"key":"9681_CR7","doi-asserted-by":"crossref","unstructured":"Alam, M. J., Kenny, P., Bhattacharya, G., & Stafylakis, T. (2015). Development of CRIM system for the automatic speaker verification spoofing and countermeasures challenge 2015. In\u00a0Sixteenth annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2015-469"},{"key":"9681_CR8","doi-asserted-by":"crossref","unstructured":"AlHanai, T. W., & Ghassemi, M. M. (2017). Predicting latent narrative mood using audio and physiologic data. In\u00a0Thirty-first AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v31i1.10625"},{"key":"9681_CR9","doi-asserted-by":"crossref","unstructured":"Al-Hussaini, I., Humayun, A. I., Alam, S., Foysal, S. I., Al Masud, A., Mahmud, A., Chowdhury, R. I., Ibtehaz, N., Zaman, S. U., Hyder, R., & Chowdhury, S. S. (2018). Predictive real-time beat tracking from music for embedded application. In\u00a02018 IEEE Conference on multimedia information processing and retrieval (MIPR)\u00a0(pp. 297\u2013300). Miami: IEEE.","DOI":"10.1109\/MIPR.2018.00068"},{"key":"9681_CR10","doi-asserted-by":"crossref","unstructured":"Ali, M., Mosa, A.H., Al Machot, F., & Kyamakya, K. (2018a). Emotion recognition involving physiological and speech signals: A comprehensive review. In\u00a0Recent advances in nonlinear dynamics and synchronization\u00a0(pp. 287\u2013302). Cham: Springer.","DOI":"10.1007\/978-3-319-58996-1_13"},{"issue":"6","key":"9681_CR11","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1007\/s00521-016-2501-7","volume":"29","author":"H Ali","year":"2018","unstructured":"Ali, H., Tran, S. N., Benetos, E., & Garcez, A. S. D. A. (2018b). Speaker recognition with hybrid features from a deep belief network. Neural Computing and Applications, 29(6), 13\u201319.","journal-title":"Neural Computing and Applications"},{"issue":"5","key":"9681_CR12","doi-asserted-by":"crossref","first-page":"143","DOI":"10.3390\/app6050143","volume":"6","author":"F Al\u00edas","year":"2016","unstructured":"Al\u00edas, F., Socor\u00f3, J. C., & Sevillano, X. (2016). A review of physical and perceptual feature extraction techniques for speech, music and environmental sounds. Applied Sciences, 6(5), 143\u2013186.","journal-title":"Applied Sciences"},{"key":"9681_CR13","unstructured":"Aljanaki, A., & Soleymani, M. (2018). A data-driven approach to mid-level perceptual musical feature modeling. http:\/\/arxiv.org\/abs\/1806.04903."},{"issue":"6","key":"9681_CR14","doi-asserted-by":"crossref","first-page":"1858","DOI":"10.3390\/s18061858","volume":"18","author":"N Almaadeed","year":"2018","unstructured":"Almaadeed, N., Asim, M., Al-Maadeed, S., Bouridane, A., & Beghdadi, A. (2018). Automatic detection and classification of audio events for road surveillance applications. Sensors, 18(6), 1858.","journal-title":"Sensors"},{"key":"9681_CR15","doi-asserted-by":"crossref","unstructured":"Al-Maathidi, M. M., & Li, F. F. (2015). Audio content feature selection and classification a random forests and decision tree approach. In\u00a02015 IEEE International conference on progress in informatics and computing (PIC)\u00a0(pp. 108\u2013112). Nanjing: IEEE.","DOI":"10.1109\/PIC.2015.7489819"},{"key":"9681_CR16","doi-asserted-by":"crossref","first-page":"6961","DOI":"10.1109\/ACCESS.2017.2696056","volume":"6","author":"A Al-Nasheri","year":"2017","unstructured":"Al-Nasheri, A., Muhammad, G., Alsulaiman, M., Ali, Z., Malki, K. H., Mesallam, T. A., et al. (2017). Voice pathology detection and classification using auto-correlation and entropy features in different frequency regions. IEEE Access, 6, 6961\u20136974.","journal-title":"IEEE Access"},{"key":"9681_CR17","unstructured":"Al-Noori, A., Li, F. F., & Duncan, P. J. (2016). Robustness of speaker recognition from noisy speech samples and mismatched languages. In\u00a0Audio engineering society convention 140. Audio Engineering Society."},{"key":"9681_CR18","unstructured":"Alsaadan, H. (2017).\u00a0Adaptive audio classification framework for in-vehicle environment with dynamic noise characteristics.\u00a0Doctoral dissertation, South Dakota State University."},{"key":"9681_CR19","doi-asserted-by":"crossref","unstructured":"Al-Shoshan, A. I. (2016). A classification of an audio signal using the wold-cramer decomposition. In\u00a0Advanced computer and communication engineering technology\u00a0(pp. 473\u2013479). Cham: Springer.","DOI":"10.1007\/978-3-319-24584-3_40"},{"issue":"4","key":"9681_CR20","doi-asserted-by":"crossref","first-page":"784","DOI":"10.1109\/TASLP.2016.2526779","volume":"24","author":"KT Andersen","year":"2016","unstructured":"Andersen, K. T., & Moonen, M. (2016). Adaptive time-frequency analysis for noise reduction in an audio filter bank with low delay. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 24(4), 784\u2013795.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR21","doi-asserted-by":"crossref","unstructured":"Apopei, V. (2015). Detection dangerous events in environmental sounds-a preliminary evaluation. In\u00a02015 International conference on speech technology and human-computer dialogue (SpeD)\u00a0(pp. 1\u20135). Bucharest: IEEE.","DOI":"10.1109\/SPED.2015.7343104"},{"issue":"2","key":"9681_CR22","doi-asserted-by":"crossref","first-page":"278","DOI":"10.1109\/TASLP.2014.2387388","volume":"23","author":"V Arora","year":"2015","unstructured":"Arora, V., & Behera, L. (2015). Multiple F0 estimation and source clustering of polyphonic music audio using PLCA and HMRFs. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 23(2), 278\u2013287.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"6","key":"9681_CR23","doi-asserted-by":"crossref","first-page":"777","DOI":"10.1049\/iet-spr.2016.0607","volume":"12","author":"M Arumugam","year":"2018","unstructured":"Arumugam, M., & Kaliappan, M. (2018). Feature selection based on MBFOA for audio signal classification under consideration of Gaussian white noise. IET Signal Processing, 12(6), 777\u2013785.","journal-title":"IET Signal Processing"},{"key":"9681_CR24","doi-asserted-by":"crossref","unstructured":"Aryafar, K., & Shokoufandeh, A. (2014). Multimodal music and lyrics fusion classifier for artist identification. In\u00a02014 13th international conference on machine learning and applications\u00a0(pp. 506\u2013509). Detroit: IEEE.","DOI":"10.1109\/ICMLA.2014.88"},{"key":"9681_CR25","doi-asserted-by":"crossref","unstructured":"Ashraf, M., Guohua, G., Wang, X., & Ahmad, F. (2018). Integration of speech\/music discrimination and mood classification with audio feature extraction. In\u00a02018 International conference on frontiers of information technology (FIT)\u00a0(pp. 224\u2013229). Islamabad: IEEE.","DOI":"10.1109\/FIT.2018.00046"},{"issue":"2","key":"9681_CR26","doi-asserted-by":"crossref","first-page":"629","DOI":"10.1016\/j.jestch.2018.10.008","volume":"22","author":"A Awad","year":"2019","unstructured":"Awad, A. (2019). Impulse noise reduction in audio signal through multi-stage technique. Engineering Science and Technology, an International Journal, 22(2), 629\u2013636.","journal-title":"Engineering Science and Technology, an International Journal"},{"key":"9681_CR27","doi-asserted-by":"crossref","unstructured":"Awasthi, D., & Madhe, S. (2015). Analysis of encrypted ECG signal in steganography using wavelet transforms. In\u00a02015 2nd international conference on electronics and communication systems (ICECS)\u00a0(pp. 718\u2013723). Coimbatore: IEEE.","DOI":"10.1109\/ECS.2015.7125005"},{"key":"9681_CR28","unstructured":"Aydo\u011fmu\u015f, H. (2018).\u00a0Multimode microwave sensors for microdroplet and single-cell detection.\u00a0Doctoral dissertation, Bilkent University."},{"key":"9681_CR29","first-page":"4","volume":"11","author":"JH Bach","year":"2017","unstructured":"Bach, J. H., Kollmeier, B., & Anem\u00fcller, J. (2017). Matching pursuit analysis of auditory receptive fields\u2019 spectro-temporal properties. Frontiers in Systems Neuroscience, 11, 4.","journal-title":"Frontiers in Systems Neuroscience"},{"key":"9681_CR30","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-50204-5","volume-title":"Speech coding: With code-excited linear prediction","author":"T B\u00e4ckstr\u00f6m","year":"2017","unstructured":"B\u00e4ckstr\u00f6m, T. (2017). Speech coding: With code-excited linear prediction. Berlin: Springer."},{"key":"9681_CR31","doi-asserted-by":"crossref","unstructured":"Badino, L., Canevari, C., Fadiga, L., & Metta, G. (2014). An auto-encoder based approach to unsupervised learning of subword units. In\u00a02014 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 7634\u20137638). Florence: IEEE.","DOI":"10.1109\/ICASSP.2014.6855085"},{"key":"9681_CR32","unstructured":"Bae, S.H., Choi, I., & Kim, N.S. (2016). Acoustic scene classification using parallel combination of LSTM and CNN. In\u00a0Proceedings of the detection and classification of acoustic scenes and events 2016 workshop (DCASE2016)\u00a0(pp. 11\u201315)."},{"key":"9681_CR33","unstructured":"Bahuleyan, H. (2018). Music genre classification using machine learning techniques.\u00a0http:\/\/arxiv.org\/abs\/1804.01149."},{"issue":"1","key":"9681_CR34","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1088\/1741-2560\/5\/1\/003","volume":"5","author":"O Bai","year":"2007","unstructured":"Bai, O., Lin, P., Vorbach, S., Floeter, M. K., Hattori, N., & Hallett, M. (2007). A high performance sensorimotor beta rhythm-based brain\u2013computer interface associated with human natural motor behavior. Journal of Neural Engineering, 5(1), 24\u201335.","journal-title":"Journal of Neural Engineering"},{"key":"9681_CR35","unstructured":"Baker, M., Cox, A., Paumgarten, M., & Govil, A. (2017). Directional audio technique."},{"key":"9681_CR36","doi-asserted-by":"crossref","unstructured":"Banerjee, A., Ghosh, A., Palit, S., & Ballester, M.A.F. (2018). A novel approach to string instrument recognition. In\u00a0International conference on image and signal processing\u00a0(pp. 165\u2013175). Cham: Springer.","DOI":"10.1007\/978-3-319-94211-7_19"},{"issue":"12","key":"9681_CR37","doi-asserted-by":"crossref","first-page":"2377","DOI":"10.1109\/TASLP.2016.2602546","volume":"24","author":"T Barker","year":"2016","unstructured":"Barker, T., & Virtanen, T. (2016). Blind separation of audio mixtures through nonnegative tensor factorization of modulation spectrograms. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 24(12), 2377\u20132389.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR38","doi-asserted-by":"crossref","unstructured":"Baum, E., Harper, M., Alicea, R., & Ordonez, C. (2018). Sound identification for fire-fighting mobile robots. In\u00a02018 Second IEEE international conference on robotic computing (IRC)\u00a0(pp. 79\u201386). Laguna Hills: IEEE.","DOI":"10.1109\/IRC.2018.00020"},{"key":"9681_CR39","doi-asserted-by":"crossref","unstructured":"Beauregard, G.T., Harish, M., & Wyse, L. (2015). Single pass spectrogram inversion. In\u00a02015 IEEE international conference on digital signal processing (DSP)\u00a0(pp. 427\u2013431). Singapore: IEEE.","DOI":"10.1109\/ICDSP.2015.7251907"},{"key":"9681_CR40","unstructured":"Becker, S., Ackermann, M., Lapuschkin, S., M\u00fcller, K. R., & Samek, W. (2018). Interpreting and explaining deep neural networks for classification of audio signals.\u00a0http:\/\/arxiv.org\/abs\/1807.03418."},{"key":"9681_CR41","doi-asserted-by":"crossref","unstructured":"Bhakre, S. K., & Bang, A. (2016). Emotion recognition on the basis of audio signal using naive bayes classifier. In\u00a02016 International conference on advances in computing, communications and informatics (ICACCI)\u00a0(pp. 2363\u20132367). Jaipur: IEEE.","DOI":"10.1109\/ICACCI.2016.7732408"},{"issue":"2","key":"9681_CR42","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1515\/aoa-2017-0024","volume":"42","author":"DG Bhalke","year":"2017","unstructured":"Bhalke, D. G., Rajesh, B., & Bormane, D. S. (2017). Automatic genre classification using fractional fourier transform based mel frequency cepstral coefficient and timbral features. Archives of Acoustics, 42(2), 213\u2013222.","journal-title":"Archives of Acoustics"},{"issue":"3","key":"9681_CR43","doi-asserted-by":"crossref","first-page":"425","DOI":"10.1007\/s10844-015-0360-9","volume":"46","author":"DG Bhalke","year":"2016","unstructured":"Bhalke, D. G., Rao, C. R., & Bormane, D. S. (2016). Automatic musical instrument classification using fractional fourier transform based-MFCC features and counter propagation neural network. Journal of Intelligent Information Systems, 46(3), 425\u2013446.","journal-title":"Journal of Intelligent Information Systems"},{"key":"9681_CR44","doi-asserted-by":"crossref","first-page":"635","DOI":"10.1016\/j.procs.2015.02.112","volume":"46","author":"J Bhaskar","year":"2015","unstructured":"Bhaskar, J., Sruthi, K., & Nedungadi, P. (2015). Hybrid approach for emotion classification of audio conversation based on text and speech mining. Procedia Computer Science, 46, 635\u2013643.","journal-title":"Procedia Computer Science"},{"key":"9681_CR45","doi-asserted-by":"crossref","unstructured":"Bhatia, R., Srivastava, S., Bhatia, V., & Singh, M. (2018). Analysis of audio features for music representation. In\u00a02018 7th international conference on reliability, infocom technologies and optimization (trends and future directions)(ICRITO)\u00a0(pp. 261\u2013266). Noida: IEEE.","DOI":"10.1109\/ICRITO.2018.8748783"},{"key":"9681_CR46","unstructured":"Bhattacharjee, M., Prasanna, S. R. M., & Guha, P. (2018). Time-frequency audio features for speech-music classification.\u00a0http:\/\/arxiv.org\/abs\/1811.01222."},{"issue":"3","key":"9681_CR47","doi-asserted-by":"crossref","first-page":"244","DOI":"10.3397\/1\/376442","volume":"65","author":"Y Bi","year":"2017","unstructured":"Bi, Y., Reid, T., & Davies, P. (2017). An exploratory study on proposed new sounds for future products. Noise Control Engineering Journal, 65(3), 244\u2013260.","journal-title":"Noise Control Engineering Journal"},{"key":"9681_CR48","doi-asserted-by":"crossref","unstructured":"Bietti, A., Bach, F., & Cont, A. (2015). An online EM algorithm in hidden (semi-) Markov models for audio segmentation and clustering. In\u00a02015 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 1881\u20131885). Brisbane: IEEE.","DOI":"10.1109\/ICASSP.2015.7178297"},{"key":"9681_CR49","doi-asserted-by":"crossref","unstructured":"Bisot, V., Serizel, R., Essid, S., & Richard, G. (2016). Acoustic scene classification with matrix factorization for unsupervised feature learning. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 6445\u20136449). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7472918"},{"key":"9681_CR50","unstructured":"Bittner, R. M., Salamon, J., Bosch, J. J., & Bello, J. P. (2017). Pitch contours as a mid-level representation for music informatics. In\u00a0Audio engineering society conference: 2017 AES international conference on semantic audio. Audio Engineering Society."},{"key":"9681_CR51","doi-asserted-by":"crossref","unstructured":"Black, M., Katsamanis, A., Lee, C. C., Lammert, A. C., Baucom, B. R., Christensen, A., Georgiou, P. G., & Narayanan, S. S. (2010). Automatic classification of married couples\u2019 behavior using audio features. In\u00a0Eleventh annual conference of the international speech communication association.","DOI":"10.21437\/Interspeech.2010-574"},{"key":"9681_CR52","doi-asserted-by":"crossref","unstructured":"B\u00f6ck, S., Korzeniowski, F., Schl\u00fcter, J., Krebs, F., & Widmer, G. (2016a). Madmom: A new python audio and music signal processing library. In\u00a0Proceedings of the 24th ACM international conference on Multimedia\u00a0(pp. 1174\u20131178). Amsterdam: ACM.","DOI":"10.1145\/2964284.2973795"},{"key":"9681_CR53","unstructured":"B\u00f6ck, S., Krebs, F., & Widmer, G. (2016b). Joint beat and downbeat tracking with recurrent neural networks. In\u00a0ISMIR\u00a0(pp. 255\u2013261)."},{"key":"9681_CR54","doi-asserted-by":"crossref","unstructured":"Bohak, C., & Marolt, M. (2016). Probabilistic segmentation of folk music recordings.\u00a0Mathematical problems in engineering,\u00a02016.","DOI":"10.1155\/2016\/8297987"},{"issue":"2","key":"9681_CR55","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1007\/s10772-014-9257-1","volume":"18","author":"P Borde","year":"2015","unstructured":"Borde, P., Varpe, A., Manza, R., & Yannawar, P. (2015). Recognition of isolated words using Zernike and MFCC features for audio visual speech recognition. International Journal of Speech Technology, 18(2), 167\u2013175.","journal-title":"International Journal of Speech Technology"},{"issue":"4","key":"9681_CR56","first-page":"918","volume":"9","author":"A Borg","year":"2015","unstructured":"Borg, A., & Micallef, P. (2015). A non-parametric based mapping algorithm for use in audio fingerprinting. World Academy of Science, Engineering and Technology, International Journal of Computer, Electrical, Automation, Control and Information Engineering, 9(4), 918\u2013921.","journal-title":"World Academy of Science, Engineering and Technology, International Journal of Computer, Electrical, Automation, Control and Information Engineering"},{"key":"9681_CR57","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1016\/j.compbiomed.2017.11.005","volume":"100","author":"MD Bugdol","year":"2018","unstructured":"Bugdol, M. D., Bugdol, M. N., Lipowicz, A. M., Mitas, A. W., Bienkowska, M. J., & Wijata, A. M. (2018). Prediction of menarcheal status of girls using voice features. Computers in Biology and Medicine, 100, 296\u2013304.","journal-title":"Computers in Biology and Medicine"},{"issue":"7\/8","key":"9681_CR58","first-page":"724","volume":"52","author":"JJ Burred","year":"2004","unstructured":"Burred, J. J., & Lerch, A. (2004). Hierarchical automatic audio signal classification. Journal of the Audio Engineering Society, 52(7\/8), 724\u2013739.","journal-title":"Journal of the Audio Engineering Society"},{"issue":"4","key":"9681_CR59","doi-asserted-by":"crossref","first-page":"21","DOI":"10.3390\/machines5040021","volume":"5","author":"W Caesarendra","year":"2017","unstructured":"Caesarendra, W., & Tjahjowidodo, T. (2017). A review of feature extraction methods in vibration-based condition monitoring and its application for degradation trend estimation of low-speed slew bearing. Machines, 5(4), 21.","journal-title":"Machines"},{"key":"9681_CR60","doi-asserted-by":"crossref","unstructured":"Caetano, M., Saitis, C., & Siedenburg, K. (2019). Audio content descriptors of timbre. In\u00a0Timbre: Acoustics, perception, and cognition\u00a0(pp. 297\u2013333). Cham: Springer.","DOI":"10.1007\/978-3-030-14832-4_11"},{"key":"9681_CR61","doi-asserted-by":"crossref","unstructured":"Camarena-Ibarrola, A., Luque, F., & Chavez, E. (2017). Speaker identification through spectral entropy analysis. In\u00a02017 IEEE international autumn meeting on power, electronics and computing (ROPEC)\u00a0(pp. 1\u20136). Ixtapa: IEEE.","DOI":"10.1109\/ROPEC.2017.8261607"},{"issue":"3\u20134","key":"9681_CR62","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1163\/22134468-00002085","volume":"5","author":"D Cameron","year":"2017","unstructured":"Cameron, D., Potter, K., Wiggins, G., & Pearce, M. (2017). Perception of rhythmic similarity is asymmetrical, and is influenced by musical training, expressive performance, and musical context. Timing & Time Perception, 5(3\u20134), 211\u2013227.","journal-title":"Timing & Time Perception"},{"key":"9681_CR63","unstructured":"Canadas-Quesada, F. J., Vera-Candeas, P., Ruiz-Reyes, N., Munoz-Montoro, A., & Bris-Penalver, F. J. (2016). A method to separate musical percussive sounds using chroma spectral flatness.\u00a0In SIGNAL 2016 editors, p. 51."},{"issue":"20","key":"9681_CR64","doi-asserted-by":"crossref","first-page":"29021","DOI":"10.1007\/s11042-018-6295-8","volume":"78","author":"J Cao","year":"2019","unstructured":"Cao, J., Cao, M., Wang, J., Yin, C., Wang, D., & Vidal, P. P. (2019). Urban noise recognition with convolutional neural network. Multimedia Tools and Applications, 78(20), 29021\u201329041.","journal-title":"Multimedia Tools and Applications"},{"issue":"12","key":"9681_CR65","doi-asserted-by":"crossref","first-page":"2422","DOI":"10.1109\/TASLP.2015.2481179","volume":"23","author":"MA Carlin","year":"2015","unstructured":"Carlin, M. A., & Elhilali, M. (2015). A framework for speech activity detection using adaptive auditory receptive fields. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 23(12), 2422\u20132433.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"issue":"3","key":"9681_CR66","doi-asserted-by":"crossref","first-page":"63","DOI":"10.1145\/3322240","volume":"52","author":"S Chandrakala","year":"2019","unstructured":"Chandrakala, S., & Jayalakshmi, S. L. (2019a). Environmental audio scene and sound event recognition for autonomous surveillance: A survey and comparative studies. ACM Computing Surveys (CSUR), 52(3), 63.","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"9681_CR67","doi-asserted-by":"publisher","DOI":"10.1145\/3322240","author":"S Chandrakala","year":"2019","unstructured":"Chandrakala, S., & Jayalakshmi, S. L. (2019b). Environmental audio scene and sound event recognition for autonomous surveillance: A survey and comparative studies. ACM Computing Surveys (CSUR). https:\/\/doi.org\/10.1145\/3322240.","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"9681_CR68","doi-asserted-by":"crossref","unstructured":"Chatterjee, A., & Yasmin, G. (2019). Human emotion recognition from speech in audio physical features. In\u00a0Applications of computing, automation and wireless systems in electrical engineering\u00a0(pp. 817\u2013824). Singapore: Springer.","DOI":"10.1007\/978-981-13-6772-4_70"},{"issue":"4","key":"9681_CR69","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1109\/JBHI.2015.2425932","volume":"20","author":"M Cheffena","year":"2015","unstructured":"Cheffena, M. (2015). Fall detection using smartphone audio features. IEEE Journal of Biomedical and Health Informatics, 20(4), 1073\u20131080.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"9681_CR70","doi-asserted-by":"crossref","first-page":"240","DOI":"10.1016\/j.autcon.2017.06.005","volume":"81","author":"CF Cheng","year":"2017","unstructured":"Cheng, C. F., Rashidi, A., Davenport, M. A., & Anderson, D. V. (2017). Activity analysis of construction equipment using audio signals and support vector machines. Automation in Construction, 81, 240\u2013253.","journal-title":"Automation in Construction"},{"key":"9681_CR71","doi-asserted-by":"crossref","unstructured":"Cho, J., Pappagari, R., Kulkarni, P., Villalba, J., Carmiel, Y., & Dehak, N. (2019). Deep neural networks for emotion recognition combining audio and transcripts.\u00a0http:\/\/arxiv.org\/abs\/1911.00432.","DOI":"10.21437\/Interspeech.2018-2466"},{"key":"9681_CR73","unstructured":"Chourdakis, E., Ward, L., Paradis, M., & Reiss, J.D. (2019). Modelling experts\u2019 decisions on assigning narrative importances of objects in a radio drama mix."},{"key":"9681_CR74","doi-asserted-by":"crossref","unstructured":"Chouvardas, S., Muma, M., Hamaidi, K., Theodoridis, S., & Zoubir, A. M. (2015). Distributed robust labeling of audio sources in heterogeneous wireless sensor networks. In\u00a02015 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 5783\u20135787). Brisbane: IEEE.","DOI":"10.1109\/ICASSP.2015.7179080"},{"key":"9681_CR75","doi-asserted-by":"crossref","unstructured":"Chrupa\u0142a, G., Gelderloos, L., & Alishahi, A. (2017). Representations of language in a model of visually grounded speech signal.\u00a0http:\/\/arxiv.org\/abs\/1702.01991.","DOI":"10.18653\/v1\/P17-1057"},{"issue":"1","key":"9681_CR76","doi-asserted-by":"crossref","first-page":"122","DOI":"10.1017\/S1355771814000533","volume":"20","author":"N Collins","year":"2015","unstructured":"Collins, N. (2015). The UbuWeb electronic music corpus: An MIR investigation of a historical database. Organised Sound, 20(1), 122\u2013134.","journal-title":"Organised Sound"},{"key":"9681_CR77","doi-asserted-by":"crossref","unstructured":"Colonna, J. G., Gama, J., & Nakamura, E. F. (2016). How to correctly evaluate an automatic bioacoustics classification method. In\u00a0Conference of the Spanish association for artificial intelligence\u00a0(pp. 37\u201347). Cham: Springer.","DOI":"10.1007\/978-3-319-44636-3_4"},{"key":"9681_CR78","doi-asserted-by":"crossref","first-page":"190","DOI":"10.1016\/j.eswa.2016.04.008","volume":"60","author":"DC Corr\u00eaa","year":"2016","unstructured":"Corr\u00eaa, D. C., & Rodrigues, F. A. (2016). A survey on symbolic data-based music genre classification. Expert Systems with Applications, 60, 190\u2013210.","journal-title":"Expert Systems with Applications"},{"key":"9681_CR79","unstructured":"Correya, A. A., Hennequin, R., & Arcos, M. (2018). Large-scale cover song detection in digital music libraries using metadata, lyrics and audio features.\u00a0http:\/\/arxiv.org\/abs\/1808.10351."},{"key":"9681_CR80","unstructured":"Cuccovillo, L., & Aichroth, P. (2017). Increasing the temporal resolution of ENF analysis via harmonic distortion. In\u00a0Audio engineering society conference: 2017 AES international conference on audio forensics. Audio Engineering Society."},{"key":"9681_CR81","doi-asserted-by":"crossref","unstructured":"Cummins, N., Amiriparian, S., Hagerer, G., Batliner, A., Steidl, S., & Schuller, B. W. (2017). An image-based deep spectrum feature representation for the recognition of emotional speech. In\u00a0Proceedings of the 25th ACM international conference on Multimedia\u00a0(pp. 478\u2013484). Mountain View: ACM.","DOI":"10.1145\/3123266.3123371"},{"issue":"9","key":"9681_CR82","doi-asserted-by":"crossref","first-page":"2917","DOI":"10.1109\/JSEN.2017.2670232","volume":"17","author":"L Cz\u00fani","year":"2017","unstructured":"Cz\u00fani, L., & Varga, P. Z. (2017). Time domain audio features for chainsaw noise detection using WSNs. IEEE Sensors Journal, 17(9), 2917\u20132924.","journal-title":"IEEE Sensors Journal"},{"key":"9681_CR83","unstructured":"Dalir, A., Beheshti, A. A., & Masoom, M. H. (2018). Classification of vehicles based on audio signals using quadratic discriminant analysis and high energy feature vectors.\u00a0http:\/\/arxiv.org\/abs\/1804.01212."},{"key":"9681_CR84","doi-asserted-by":"crossref","unstructured":"Dandashi, A., & AlJaam, J. (2017). A survey on audio content-based classification. In\u00a02017 International conference on computational science and computational intelligence (CSCI)\u00a0(pp. 408\u2013413). Las Vegas: IEEE.","DOI":"10.1109\/CSCI.2017.69"},{"key":"9681_CR85","doi-asserted-by":"crossref","unstructured":"Dandawate, Y. H., Kumari, P., & Bidkar, A. (2015). Indian instrumental music: Raga analysis and classification. In\u00a02015 1st international conference on next generation computing technologies (NGCT)\u00a0(pp. 725\u2013729). Dehradun: IEEE.","DOI":"10.1109\/NGCT.2015.7375216"},{"key":"9681_CR86","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1016\/j.asoc.2014.11.016","volume":"27","author":"K Daqrouq","year":"2015","unstructured":"Daqrouq, K., & Tutunji, T. A. (2015). Speaker identification using vowels features through a combined method of formants, wavelets, and neural network classifiers. Applied Soft Computing, 27, 231\u2013239.","journal-title":"Applied Soft Computing"},{"key":"9681_CR87","doi-asserted-by":"crossref","unstructured":"Darji, M. C., Patel, N. M., & Shah, Z. H. (2015). Extraction of video songs from movies using audio features. In\u00a02015 International symposium on advanced computing and communication (ISACC)\u00a0(pp. 60\u201364). Silchar: IEEE.","DOI":"10.1109\/ISACC.2015.7377316"},{"issue":"6","key":"9681_CR88","first-page":"1","volume":"1","author":"N Dave","year":"2013","unstructured":"Dave, N. (2013). Feature extraction methods LPC, PLP and MFCC in speech recognition. International Journal for Advance Research in Engineering and Technology, 1(6), 1\u20134.","journal-title":"International Journal for Advance Research in Engineering and Technology"},{"issue":"4","key":"9681_CR89","doi-asserted-by":"crossref","first-page":"1221","DOI":"10.1145\/3197517.3201371","volume":"37","author":"A Davis","year":"2018","unstructured":"Davis, A., & Agrawala, M. (2018). Visual rhythm and beat. ACM Transactions on Graphics, 37(4), 1221\u201312211.","journal-title":"ACM Transactions on Graphics"},{"key":"9681_CR90","unstructured":"Demirel, E., Bozkurt, B., & Serra, X. (2018). Automatic Makam recognition using chroma features. In Holzapfel, A., & Pikrakis, A. (eds.) Proceedings of the 8th international workshop on folk music analysis; 2018 Jun 26-29; Thessaloniki, Greece (pp. 19\u201324). Greece: Aristotle University of Thessaloniki."},{"key":"9681_CR91","unstructured":"Demirel, E., Bozkurt, B., & Serra, X. (2019). Automatic chord-scale recognition using harmonic pitch class profiles. In\u00a0Barbancho, I., Tard\u00f3n, L. J., Peinado, A., Barbancho, A. M. (eds.), Proceedings of the 16th sound & music computing conference; 2019 May 28\u201331; M\u00e1laga, Spain.[M\u00e1laga]: SMC; 2019. Sound & Music Computing Conference."},{"key":"9681_CR92","doi-asserted-by":"crossref","unstructured":"Devi, A., & ShivaKumar, K. B. (2016). Novel audio steganography technique for ECG signals in point of care systems (NASTPOCS). In\u00a02016 IEEE international conference on cloud computing in emerging markets (CCEM)\u00a0(pp. 101\u2013106). Bangalore: IEEE.","DOI":"10.1109\/CCEM.2016.026"},{"key":"9681_CR93","volume-title":"Intelligent speech signal processing","year":"2019","unstructured":"Dey, N. (Ed.). (2019). Intelligent speech signal processing. New York: Academic Press."},{"key":"9681_CR94","volume-title":"Classification and clustering in biomedical signal processing","year":"2016","unstructured":"Dey, N., & Ashour, A. (Eds.). (2016). Classification and clustering in biomedical signal processing. Hershey: IGI global."},{"key":"9681_CR95","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-319-73059-2","volume-title":"Direction of arrival estimation and localization of multi-speech sources","author":"N Dey","year":"2018","unstructured":"Dey, N., & Ashour, A. S. (2018). Direction of arrival estimation and localization of multi-speech sources. Berlin: Springer International Publishing."},{"key":"9681_CR96","volume-title":"Classification in BioApps: Automation of decision making","year":"2017","unstructured":"Dey, N., Ashour, A. S., & Borra, S. (Eds.). (2017). Classification in BioApps: Automation of decision making (Vol. 26). Berlin: Springer."},{"key":"9681_CR97","doi-asserted-by":"crossref","unstructured":"Dimaunahan, E. D., Ballado, A. H., Cruz, F. R. G., & Cruz, J. C. D. (2017). MFCC and VQ voice recognition based ATM security for the visually disabled. In\u00a02017IEEE 9th international conference on humanoid, nanotechnology, information technology, communication and control, environment and management (HNICEM)\u00a0(pp. 1\u20135). Manila: IEEE.","DOI":"10.1109\/HNICEM.2017.8269516"},{"key":"9681_CR98","doi-asserted-by":"crossref","unstructured":"Diment, A., Cakir, E., Heittola, T., & Virtanen, T. (2015). Automatic recognition of environmental sound events using all-pole group delay features. In\u00a02015 23rd European signal processing conference (EUSIPCO)\u00a0(pp. 729\u2013733). Nice: IEEE.","DOI":"10.1109\/EUSIPCO.2015.7362479"},{"key":"9681_CR99","doi-asserted-by":"publisher","DOI":"10.1177\/0020720918787456","author":"F Djebbar","year":"2017","unstructured":"Djebbar, F., & Ayad, B. (2017). Energy and entropy based features for WAV audio steganalysis. Journal of Information Hiding and Multimedia Signal Processing. https:\/\/doi.org\/10.1177\/0020720918787456.","journal-title":"Journal of Information Hiding and Multimedia Signal Processing"},{"issue":"1","key":"9681_CR100","doi-asserted-by":"crossref","first-page":"190","DOI":"10.12928\/telkomnika.v15i1.4581","volume":"15","author":"J Doherty","year":"2017","unstructured":"Doherty, J., Curran, K., & McKevitt, P. (2017). Streaming audio using MPEG-7 audio spectrum envelope to enable self-similarity within polyphonic audio. Telkomnika, 15(1), 190.","journal-title":"Telkomnika"},{"key":"9681_CR101","doi-asserted-by":"crossref","unstructured":"Dominguez-Morales, J. P., Jimenez-Fernandez, A., Rios-Navarro, A., Cerezuela-Escudero, E., Gutierrez-Galan, D., Dominguez-Morales, M. J., & Jimenez-Moreno, G. (2016). Multilayer spiking neural network for audio samples classification using SpiNNaker. In\u00a0International conference on artificial neural networks\u00a0(pp. 45\u201353). Cham: Springer.","DOI":"10.1007\/978-3-319-44778-0_6"},{"key":"9681_CR102","doi-asserted-by":"crossref","unstructured":"Draa, I. C., Tayeb, J., Niar, S., & Grislin, E. (2015). Application sequence prediction for energy consumption reduction in mobile systems. In\u00a02015 IEEE International conference on computer and information technology; ubiquitous computing and communications; dependable, autonomic and secure computing; pervasive intelligence and computing\u00a0(pp. 23\u201330). Liverpool: IEEE.","DOI":"10.1109\/CIT\/IUCC\/DASC\/PICOM.2015.7"},{"key":"9681_CR103","doi-asserted-by":"crossref","unstructured":"Dubey, H., Sangwan, A., & Hansen, J.H. (2018a). Robust speaker clustering using mixtures of von mises-fisher distributions for naturalistic audio streams.\u00a0http:\/\/arxiv.org\/abs\/1808.06045.","DOI":"10.21437\/Interspeech.2018-50"},{"issue":"11","key":"9681_CR104","doi-asserted-by":"crossref","first-page":"2056","DOI":"10.1109\/TASLP.2018.2848698","volume":"26","author":"H Dubey","year":"2018","unstructured":"Dubey, H., Sangwan, A., & Hansen, J. H. (2018b). Leveraging frequency-dependent kernel and DIP-based clustering for Robust speech activity detection in naturalistic audio streams. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 26(11), 2056\u20132071.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR105","doi-asserted-by":"crossref","unstructured":"Durand, S., Bello, J. P., David, B., & Richard, G. (2016). Feature adapted convolutional neural networks for downbeat tracking. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 296\u2013300). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7471684"},{"key":"9681_CR106","doi-asserted-by":"crossref","unstructured":"Elhilali, M., 2019. Modulation representations for speech and music. In\u00a0Timbre: Acoustics, perception, and cognition\u00a0(pp. 335\u2013359). Cham: Springer.","DOI":"10.1007\/978-3-030-14832-4_12"},{"issue":"8","key":"9681_CR107","doi-asserted-by":"crossref","first-page":"2191","DOI":"10.1002\/jum.14916","volume":"38","author":"K Elzaafarany","year":"2019","unstructured":"Elzaafarany, K., Aly, M. H., Kumar, G., & Nakhmani, A. (2019). Cerebral artery vasospasm detection using transcranial Doppler signal analysis. Journal of Ultrasound in Medicine, 38(8), 2191\u20132202.","journal-title":"Journal of Ultrasound in Medicine"},{"key":"9681_CR108","doi-asserted-by":"publisher","DOI":"10.2316\/P.2016.832-031","author":"T Emoto","year":"2016","unstructured":"Emoto, T., Abeyratne, U. R., Shono, T., Nonaka, R., Jinnouchi, O., Kawata, I., et al. (2016). Auditory image model for the characterisation of obstructive sleep apnoea. Screening. https:\/\/doi.org\/10.2316\/P.2016.832-031.","journal-title":"Screening"},{"issue":"1","key":"9681_CR109","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1080\/09298215.2014.929706","volume":"44","author":"TM Esparza","year":"2015","unstructured":"Esparza, T. M., Bello, J. P., & Humphrey, E. J. (2015). From genre classification to rhythm similarity: Computational and musicological insights. Journal of New Music Research, 44(1), 39\u201357.","journal-title":"Journal of New Music Research"},{"key":"9681_CR110","volume-title":"Real-time speech and music classification by large audio feature space extraction","author":"F Eyben","year":"2015","unstructured":"Eyben, F. (2015). Real-time speech and music classification by large audio feature space extraction. Berlin: Springer."},{"key":"9681_CR111","doi-asserted-by":"crossref","unstructured":"Font, R., Esp\u00edn, J. M., & Cano, M. J. (2017). Experimental analysis of features for replay attack detection-results on the ASVspoof 2017 challenge. In\u00a0Interspeech\u00a0(pp. 7\u201311).","DOI":"10.21437\/Interspeech.2017-450"},{"issue":"1\/2","key":"9681_CR112","doi-asserted-by":"crossref","first-page":"63","DOI":"10.17743\/jaes.2015.0006","volume":"63","author":"J Francombe","year":"2015","unstructured":"Francombe, J., Mason, R., Dewhirst, M., & Bech, S. (2015). A model of distraction in an audio-on-audio interference situation with music program material. Journal of the Audio Engineering Society, 63(1\/2), 63\u201377.","journal-title":"Journal of the Audio Engineering Society"},{"issue":"1","key":"9681_CR113","first-page":"6340","volume":"18","author":"M Freitag","year":"2017","unstructured":"Freitag, M., Amiriparian, S., Pugachevskiy, S., Cummins, N., & Schuller, B. (2017). audeep: Unsupervised learning of representations from audio with deep recurrent neural networks. The Journal of Machine Learning Research, 18(1), 6340\u20136344.","journal-title":"The Journal of Machine Learning Research"},{"key":"9681_CR114","unstructured":"Friberg, A., Schoonderwaldt, E., Hedblad, A., Fabiani, M., & Elowsson, A. (2014). Using perceptually defined music features in music information retrieval.\u00a0http:\/\/arxiv.org\/abs\/1403.7923."},{"key":"9681_CR115","doi-asserted-by":"crossref","unstructured":"Fujino, T., & Yoshida, T. (2017). A consideration of mechanism of audio signa deterioration caused by propagation noise between audio equipment. In\u00a02017 Asia-Pacific international symposium on electromagnetic compatibility (APEMC)\u00a0(pp. 155\u2013157). South Korea: IEEE.","DOI":"10.1109\/APEMC.2017.7975450"},{"key":"9681_CR116","doi-asserted-by":"crossref","unstructured":"Garc\u00eda, M.A., & Dest\u00e9fanis, E.A. (2017). Deep neural networks for shimmer approximation in synthesized audio signal. In\u00a0Argentine congress of computer science\u00a0(pp. 3\u201312). Cham: Springer.","DOI":"10.1007\/978-3-319-75214-3_1"},{"issue":"5","key":"9681_CR117","doi-asserted-by":"crossref","first-page":"1086","DOI":"10.1109\/TPAMI.2017.2648793","volume":"40","author":"ID Gebru","year":"2017","unstructured":"Gebru, I. D., Ba, S., Li, X., & Horaud, R. (2017). Audio-visual speaker diarization based on spatiotemporal bayesian fusion. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(5), 1086\u20131099.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"9681_CR118","doi-asserted-by":"crossref","unstructured":"Gemmeke, J. F., Ellis, D. P., Freedman, D., Jansen, A., Lawrence, W., Moore, R. C., Plakal, M., & Ritter, M. (2017). Audio set: An ontology and human-labeled dataset for audio events. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 776\u2013780). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"9681_CR119","unstructured":"Gencoglu, O., Virtanen, T., & Huttunen, H. (2014). Recognition of acoustic events using deep neural networks. In\u00a02014 22nd European signal processing conference (EUSIPCO)\u00a0(pp. 506\u2013510). Lisbon: IEEE"},{"key":"9681_CR120","doi-asserted-by":"crossref","unstructured":"George, J., & Jhunjhunwala, A. (2015). Scalable and robust audio fingerprinting method tolerable to time-stretching. In\u00a02015 IEEE International conference on digital signal processing (DSP)\u00a0(pp. 436\u2013440). Singapore: IEEE.","DOI":"10.1109\/ICDSP.2015.7251909"},{"key":"9681_CR121","unstructured":"Gergen, S., & Martin, R. (2016). Estimating source dominated microphone clusters in ad-hoc microphone arrays by fuzzy clustering in the feature space. In\u00a0Speech communication; 12. ITG symposium\u00a0(pp. 1\u20135). VDE."},{"key":"9681_CR122","unstructured":"Gerhard, D. (2000). Audio signal classification: An overview.\u00a0Canadian Artificial Intelligence, pp.4\u20136."},{"key":"9681_CR123","doi-asserted-by":"crossref","unstructured":"Ghaemmaghami, H., Dean, D., Kalantari, S., Sridharan, S., & Fookes, C. (2015). Complete-linkage clustering for voice activity detection in audio and visual speech.","DOI":"10.21437\/Interspeech.2015-444"},{"key":"9681_CR124","doi-asserted-by":"crossref","unstructured":"Ghasemzadeh, H., & Arjmandi, M. K. (2014). Reversed-Mel cepstrum based audio steganalysis. In\u00a02014 4th International conference on computer and knowledge engineering (ICCKE)\u00a0(pp. 679\u2013684). Mashhad: IEEE.","DOI":"10.1109\/ICCKE.2014.6993347"},{"key":"9681_CR125","unstructured":"Ghodasara, V., Waldekar, S., Paul, D., & Saha, G. (2016). Acoustic scene classification using block based MFCC features.\u00a0Detection and classification of acoustic scenes and events."},{"issue":"1","key":"9681_CR126","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1504\/IJCISTUDIES.2015.069831","volume":"4","author":"A Ghosal","year":"2015","unstructured":"Ghosal, A., Chakraborty, R., Dhara, B. C., & Saha, S. K. (2015). Perceptual feature-based song genre classification using RANSAC. International Journal of Computational Intelligence Studies, 4(1), 31\u201349.","journal-title":"International Journal of Computational Intelligence Studies"},{"issue":"12","key":"9681_CR127","doi-asserted-by":"crossref","first-page":"e0144610","DOI":"10.1371\/journal.pone.0144610","volume":"10","author":"T Giannakopoulos","year":"2015","unstructured":"Giannakopoulos, T. (2015). Pyaudioanalysis: An open-source python library for audio signal analysis. PLoS ONE, 10(12), e0144610.","journal-title":"PLoS ONE"},{"key":"9681_CR128","doi-asserted-by":"crossref","unstructured":"Giannakopoulos, T., & Perantonis, S. (2019). Recognizing the quality of urban sound recordings using hand-crafted and deep audio features. In\u00a0Proceedings of the 12th ACM international conference on pervasive technologies related to assistive environments\u00a0(pp. 323\u2013324). Rhodes: ACM.","DOI":"10.1145\/3316782.3322739"},{"issue":"5","key":"9681_CR481","first-page":"6","volume":"5","author":"GK Girisha","year":"2016","unstructured":"Girisha, G. K., & Pinjare, S. L. (2016). Performance analysis of adaptive filters for noise cancellation in audio signal for hearing aid application. IJSR, 5(5), 6\u2013319.","journal-title":"IJSR"},{"issue":"11","key":"9681_CR129","doi-asserted-by":"crossref","first-page":"1885","DOI":"10.1109\/TASLP.2016.2554283","volume":"24","author":"A Gkiokas","year":"2016","unstructured":"Gkiokas, A., Katsouros, V., Carayannis, G., Gkiokas, A., Katsouros, V., & Carayannis, G. (2016). Towards multi-purpose spectral rhythm features: An application to dance style, meter and tempo estimation. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 24(11), 1885\u20131896.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"key":"9681_CR130","unstructured":"Godfrey, H. (2016). Basic signal processing with MATLAB."},{"key":"9681_CR131","doi-asserted-by":"crossref","unstructured":"Goehring, T., Yang, X., Monaghan, J. J., & Bleeck, S. (2016). Speech enhancement for hearing-impaired listeners using deep neural networks with auditory-model based features. In\u00a02016 24th European signal processing conference (EUSIPCO)\u00a0(pp. 2300\u20132304). Budapest: IEEE.","DOI":"10.1109\/EUSIPCO.2016.7760659"},{"key":"9681_CR132","doi-asserted-by":"crossref","unstructured":"Grais, E. M., & Plumbley, M. D. (2017). Single channel audio source separation using convolutional denoising autoencoders. In\u00a02017 IEEE global conference on signal and information processing (GlobalSIP)\u00a0(pp. 1265\u20131269). Montreal: IEEE.","DOI":"10.1109\/GlobalSIP.2017.8309164"},{"key":"9681_CR133","doi-asserted-by":"crossref","unstructured":"Grama, L., Buhu\u015f, E. R., & Rusu, C. (2017). Acoustic classification using linear predictive coding for wildlife detection systems. In\u00a02017 International symposium on signals, circuits and systems (ISSCS)\u00a0(pp. 1\u20134). Iasi: IEEE.","DOI":"10.1109\/ISSCS.2017.8034944"},{"key":"9681_CR134","doi-asserted-by":"crossref","unstructured":"Grama, L., & Rusu, C. (2017). Audio signal classification using linear predictive coding and random forests. In\u00a02017 International conference on speech technology and human-computer dialogue (SpeD)\u00a0(pp. 1\u20139). Bucharest: IEEE.","DOI":"10.1109\/SPED.2017.7990431"},{"key":"9681_CR135","doi-asserted-by":"crossref","unstructured":"Grekow, J. (2015). Audio features dedicated to the detection of four basic emotions. In\u00a0IFIP international conference on computer information systems and industrial management\u00a0(pp. 583\u2013591). Cham: Springer.","DOI":"10.1007\/978-3-319-24369-6_49"},{"key":"9681_CR136","doi-asserted-by":"crossref","unstructured":"Grekow, J. (2017). Audio features dedicated to the detection of arousal and valence in music recordings. In\u00a02017 IEEE international conference on innovations in intelligent systems and applications (INISTA)\u00a0(pp. 40\u201344). Gdynia: IEEE.","DOI":"10.1109\/INISTA.2017.8001129"},{"key":"9681_CR137","doi-asserted-by":"crossref","unstructured":"Grzywczak, D., & Gwardys, G. (2014). Audio features in music information retrieval. In\u00a0International conference on active media technology\u00a0(pp. 187\u2013199). Cham: Springer.","DOI":"10.1007\/978-3-319-09912-5_16"},{"key":"9681_CR138","doi-asserted-by":"crossref","unstructured":"Guan, H., Liu, Z., Wang, L., Dang, J., & Yu, R. (2017). Speech emotion recognition considering local dynamic features. In\u00a0International seminar on speech production\u00a0(pp. 14\u201323). Cham: Springer.","DOI":"10.1007\/978-3-030-00126-1_2"},{"key":"9681_CR139","unstructured":"Gulhane, S. R., Badhe, S. S., & Shirbahadurkar, S. D. (2018). Cepstral (MFCC) feature and spectral (Timbral) features analysis for musical instrument sounds. In\u00a02018 IEEE global conference on wireless computing and networking (GCWCN)\u00a0(pp. 109\u2013113). Lonavala: IEEE."},{"issue":"2","key":"9681_CR140","first-page":"2278","volume":"17","author":"S Gupta","year":"2015","unstructured":"Gupta, S., & Dhanda, N. (2015). Audio steganography using discrete wavelet transformation (DWT) & discrete cosine transformation (DCT). IOSR Journal of Computer Engineering, 17(2), 2278\u20132661.","journal-title":"IOSR Journal of Computer Engineering"},{"key":"9681_CR141","doi-asserted-by":"crossref","unstructured":"Guzman-Zavaleta, Z. J., Feregrino-Uribe, C., Menendez-Ortiz, A., & Garcia-Hernandez, J. J. (2014). A robust audio fingerprinting method using spectrograms saliency maps. In\u00a0The 9th international conference for internet technology and secured transactions (ICITST-2014)\u00a0(pp. 47\u201352). London: IEEE.","DOI":"10.1109\/ICITST.2014.7038773"},{"issue":"4","key":"9681_CR142","doi-asserted-by":"crossref","first-page":"321","DOI":"10.2478\/eletel-2014-0042","volume":"60","author":"G Gwardys","year":"2014","unstructured":"Gwardys, G., & Grzywczak, D. (2014). Deep image features in music information retrieval. International Journal of Electronics and Telecommunications, 60(4), 321\u2013326.","journal-title":"International Journal of Electronics and Telecommunications"},{"key":"9681_CR143","doi-asserted-by":"crossref","unstructured":"Han, B. J., & Hwang, E. (2009). Environmental sound classification based on feature collaboration. In\u00a02009 IEEE international conference on multimedia and expo\u00a0(pp. 542\u2013545). New York: IEEE.","DOI":"10.1109\/ICME.2009.5202553"},{"issue":"1","key":"9681_CR144","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1109\/TASLP.2016.2632307","volume":"25","author":"Y Han","year":"2017","unstructured":"Han, Y., Kim, J., Lee, K., Han, Y., Kim, J., & Lee, K. (2017). Deep convolutional neural networks for predominant instrument recognition in polyphonic music. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 25(1), 208\u2013221.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"issue":"12","key":"9681_CR145","doi-asserted-by":"crossref","first-page":"15431","DOI":"10.1007\/s11042-017-5123-x","volume":"77","author":"C Han","year":"2018","unstructured":"Han, C., Xue, R., Zhang, R., & Wang, X. (2018). A new audio steganalysis method based on linear prediction. Multimedia Tools and Applications, 77(12), 15431\u201315455.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR146","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1016\/j.jecp.2017.01.006","volume":"159","author":"EE Hannon","year":"2017","unstructured":"Hannon, E. E., Schachner, A., & Nave-Blodgett, J. E. (2017). Babies know bad dancing when they see it: Older but not younger infants discriminate between synchronous and asynchronous audiovisual musical displays. Journal of Experimental Child Psychology, 159, 159\u2013174.","journal-title":"Journal of Experimental Child Psychology"},{"key":"9681_CR147","first-page":"196","volume":"31","author":"NF Hassan","year":"2018","unstructured":"Hassan, N. F., & Alden, S. Q. S. (2018). Gender classification based on audio features. Al-Ma\u2019mon College Journal, 31, 196\u2013213.","journal-title":"Al-Ma\u2019mon College Journal"},{"key":"9681_CR148","doi-asserted-by":"crossref","unstructured":"Helmrich, C. R., Markovi\u0107, G., & Edler, B. (2014). Improved low-delay MDCT-based coding of both stationary and transient audio signals. In\u00a02014 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 6954\u20136958). Florence: IEEE.","DOI":"10.1109\/ICASSP.2014.6854948"},{"key":"9681_CR149","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1016\/j.compbiomed.2017.03.011","volume":"84","author":"J Heo","year":"2017","unstructured":"Heo, J., Baek, H. J., Hong, S., Chang, M. H., Lee, J. S., & Park, K. S. (2017). Music and natural sounds in an auditory steady-state response) based brain\u2013computer interface to increase user acceptance. Computers in Biology and Medicine, 84, 45\u201352.","journal-title":"Computers in Biology and Medicine"},{"key":"9681_CR150","unstructured":"Herberger, T., Tost, T., & Engel, T. (2018). Bellevue Investments & Co Kgaa GmbH.\u00a0System and method for controlled dynamics adaptation for musical content. U.S. Patent 9,991,861."},{"issue":"1","key":"9681_CR151","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1076\/jnmr.32.1.3.16798","volume":"32","author":"P Herrera-Boyer","year":"2003","unstructured":"Herrera-Boyer, P., Peeters, G., & Dubnov, S. (2003). Automatic classification of musical instrument sounds. Journal of New Music Research, 32(1), 3\u201321.","journal-title":"Journal of New Music Research"},{"key":"9681_CR152","doi-asserted-by":"crossref","unstructured":"Hershey, S., Chaudhuri, S., Ellis, D. P., Gemmeke, J. F., Jansen, A., Moore, R. C., Plakal, M., Platt, D., Saurous, R. A., Seybold, B., & Slaney, M. (2017). CNN architectures for large-scale audio classification. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (icassp)\u00a0(pp. 131\u2013135). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"9681_CR153","doi-asserted-by":"crossref","unstructured":"Hershey, J. R., Chen, Z., Le Roux, J., & Watanabe, S. (2016). Deep clustering: Discriminative embeddings for segmentation and separation. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 31\u201335). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"9681_CR154","doi-asserted-by":"crossref","unstructured":"Heshi, R., Suma, S. M., Koolagudi, S. G., Bhandari, S., & Rao, K. S. (2016). Rhythm and timbre analysis for carnatic music processing. In\u00a0Proceedings of 3rd international conference on advanced computing, networking and informatics\u00a0(pp. 603\u2013609). New Delhi: Springer.","DOI":"10.1007\/978-81-322-2538-6_62"},{"issue":"1","key":"9681_CR155","doi-asserted-by":"crossref","first-page":"2266","DOI":"10.1038\/s41598-018-20732-3","volume":"8","author":"S Hoefle","year":"2018","unstructured":"Hoefle, S., Engel, A., Basilio, R., Alluri, V., Toiviainen, P., Cagy, M., et al. (2018). Identifying musical pieces from fMRI data using encoding and decoding models. Scientific Reports, 8(1), 2266\u20132278.","journal-title":"Scientific Reports"},{"issue":"12","key":"9681_CR156","doi-asserted-by":"crossref","first-page":"980","DOI":"10.17743\/jaes.2015.0087","volume":"63","author":"P Hoffmann","year":"2016","unstructured":"Hoffmann, P., & Kostek, B. (2016). Bass enhancement settings in portable devices based on music genre recognition. Journal of the Audio Engineering Society, 63(12), 980\u2013989.","journal-title":"Journal of the Audio Engineering Society"},{"issue":"11","key":"9681_CR157","doi-asserted-by":"crossref","first-page":"44","DOI":"10.1109\/MCOM.2018.1700577","volume":"56","author":"MS Hossain","year":"2018","unstructured":"Hossain, M. S., & Muhammad, G. (2018). Environment classification for urban big data using deep learning. IEEE Communications Magazine, 56(11), 44\u201350.","journal-title":"IEEE Communications Magazine"},{"key":"9681_CR158","doi-asserted-by":"crossref","unstructured":"Hossain, N., & Naznin, M. (2018). Sensing emotion from voice jitter. In\u00a0Proceedings of the 16th ACM conference on embedded networked sensor systems\u00a0(pp. 359\u2013360). Shenzhen: ACM.","DOI":"10.1145\/3274783.3275182"},{"issue":"2","key":"9681_CR159","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1002\/asi.23649","volume":"68","author":"X Hu","year":"2017","unstructured":"Hu, X., Choi, K., & Downie, J. S. (2017). A framework for evaluating multimodal music mood classification. Journal of the Association for Information Science and Technology, 68(2), 273\u2013285.","journal-title":"Journal of the Association for Information Science and Technology"},{"issue":"3","key":"9681_CR160","doi-asserted-by":"crossref","first-page":"1138","DOI":"10.1016\/j.patcog.2013.06.010","volume":"47","author":"P Hu","year":"2014","unstructured":"Hu, P., Liu, W., Jiang, W., & Yang, Z. (2014). Latent topic model for audio retrieval. Pattern Recognition, 47(3), 1138\u20131143.","journal-title":"Pattern Recognition"},{"key":"9681_CR161","unstructured":"Huang, J., Child, R., Rao, V., Liu, H., Satheesh, S., & Coates, A. (2016). Active learning for speech recognition: The power of gradients.\u00a0http:\/\/arxiv.org\/abs\/1612.03226."},{"key":"9681_CR162","doi-asserted-by":"crossref","unstructured":"Huang, L., & Pun, C. M. (2019). Audio replay spoof attack detection using segment-based hybrid feature and densenet-LSTM network. In\u00a0ICASSP 2019-2019 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2567\u20132571). Brighton: IEEE.","DOI":"10.1109\/ICASSP.2019.8682573"},{"key":"9681_CR163","doi-asserted-by":"crossref","unstructured":"Huang, Z., Weng, C., Li, K., Cheng, Y. C., & Lee, C. H. (2014). Deep learning vector quantization for acoustic information retrieval. In\u00a02014 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 1350\u20131354). Florence: IEEE.","DOI":"10.1109\/ICASSP.2014.6853817"},{"key":"9681_CR164","doi-asserted-by":"crossref","unstructured":"Hyder, R., Ghaffarzadegan, S., Feng, Z., Hansen, J.H., & Hasan, T. (2017). Acoustic scene classification using a CNN-supervector system trained with auditory and spectrogram image features. In\u00a0INTERSPEECH\u00a0(pp. 3073\u20133077).","DOI":"10.21437\/Interspeech.2017-431"},{"key":"9681_CR165","doi-asserted-by":"crossref","unstructured":"Isik, Y., Roux, J.L., Chen, Z., Watanabe, S., & Hershey, J.R., 2016. Single-channel multi-speaker separation using deep clustering.\u00a0http:\/\/arxiv.org\/abs\/1607.02173.","DOI":"10.21437\/Interspeech.2016-1176"},{"key":"9681_CR166","doi-asserted-by":"crossref","unstructured":"Islam, M. T., Shaan, M. N., Easha, E. J., Minhaz, A. T., Shahnaz, C., & Fattah, S. A. (2017). Enhancement of noisy speech based on decision-directed Wiener approach in perceptual wavelet packet domain. In\u00a0TENCON 2017-2017 IEEE region 10 conference\u00a0(pp. 2666\u20132671). Penang: IEEE.","DOI":"10.1109\/TENCON.2017.8228313"},{"key":"9681_CR167","doi-asserted-by":"crossref","unstructured":"Jack, R.H., Stockman, T., & McPherson, A. (2016). Effect of latency on performer interaction and subjective quality assessment of a digital musical instrument. In\u00a0Proceedings of the audio mostly 2016\u00a0(pp. 116\u2013123). Norrk\u00f6ping: ACM.","DOI":"10.1145\/2986416.2986428"},{"key":"9681_CR168","doi-asserted-by":"crossref","unstructured":"Jalil, M., Butt, F. A., & Malik, A. (2013). Short-time energy, magnitude, zero crossing rate and autocorrelation measurement for discriminating voiced and unvoiced segments of speech signals. In\u00a02013 The international conference on technological advances in electrical, electronics and computer engineering (TAEECE)\u00a0(pp. 208\u2013212). Konya: IEEE.","DOI":"10.1109\/TAEECE.2013.6557272"},{"issue":"3","key":"9681_CR169","first-page":"308","volume":"11","author":"N Jamil","year":"2015","unstructured":"Jamil, N., Ramli, M. I., & Seman, N. (2015). Sentence boundary detection without speech recognition: A case of an under-resourced language. Journal of Electrical Systems, 11(3), 308\u2013318.","journal-title":"Journal of Electrical Systems"},{"key":"9681_CR170","doi-asserted-by":"crossref","unstructured":"Jansen, A., Plakal, M., Pandya, R., Ellis, D. P., Hershey, S., Liu, J., Moore, R. C., & Saurous, R. A. (2018). Unsupervised learning of semantic audio representations. In\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 126\u2013130). Calgary: IEEE.","DOI":"10.1109\/ICASSP.2018.8461684"},{"key":"9681_CR171","unstructured":"Jarina, R., O\u2019Connor, N., Marlow, S., & Murphy, N. (2002). Rhythm detection for speech-music discrimination in mpeg compressed domain. In\u00a02002 14th international conference on digital signal processing proceedings. DSP 2002 (Cat. No. 02TH8628)\u00a0(pp. 129\u2013132). Santorini: IEEE."},{"issue":"10","key":"9681_CR172","doi-asserted-by":"crossref","first-page":"1831","DOI":"10.1109\/LGRS.2014.2311819","volume":"11","author":"RJ Javier","year":"2014","unstructured":"Javier, R. J., & Kim, Y. (2014). Application of linear predictive coding for human activity classification based on micro-Doppler signatures. IEEE Geoscience and Remote Sensing Letters, 11(10), 1831\u20131834.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"issue":"3","key":"9681_CR173","first-page":"907","volume":"11","author":"T Jayasankar","year":"2017","unstructured":"Jayasankar, T., Vinothkumar, K., & Vijayaselvi, A. (2017). Automatic gender identification in speech recognition by genetic algorithm. Applied Mathematics, 11(3), 907\u2013913.","journal-title":"Applied Mathematics"},{"key":"9681_CR174","doi-asserted-by":"crossref","unstructured":"Jleed, H., & Bouchard, M. (2017). Acoustic environment classification using discrete hartley transform features. In\u00a02017 IEEE 30th Canadian conference on electrical and computer engineering (CCECE)\u00a0(pp. 1\u20134). Windsor: IEEE.","DOI":"10.1109\/CCECE.2017.7946646"},{"key":"9681_CR175","doi-asserted-by":"crossref","first-page":"174","DOI":"10.1016\/j.procs.2017.10.019","volume":"116","author":"AG Jondya","year":"2017","unstructured":"Jondya, A. G., & Iswanto, B. H. (2017). Indonesian\u2019s traditional music clustering based on audio features. Procedia Computer Science, 116, 174\u2013181.","journal-title":"Procedia Computer Science"},{"key":"9681_CR176","doi-asserted-by":"crossref","unstructured":"Jorr\u00edn-Prieto, J., Vaquero, C., & Garc\u00eda, P. (2016). Analysis of the impact of the audio database characteristics in the accuracy of a speaker clustering system. In\u00a0Odyssey\u00a0(pp. 393\u2013399).","DOI":"10.21437\/Odyssey.2016-57"},{"issue":"9","key":"9681_CR177","doi-asserted-by":"crossref","first-page":"1509","DOI":"10.1109\/TASLP.2015.2438549","volume":"23","author":"A Juki\u0107","year":"2015","unstructured":"Juki\u0107, A., van Waterschoot, T., Gerkmann, T., & Doclo, S. (2015). Multi-channel linear prediction-based speech dereverberation with sparse priors. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 23(9), 1509\u20131520.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"key":"9681_CR178","unstructured":"Jumelle, M., & Sakmeche, T. (2018). Speaker clustering with neural networks and audio processing.\u00a0http:\/\/arxiv.org\/abs\/1803.08276."},{"key":"9681_CR179","doi-asserted-by":"crossref","unstructured":"Kacprzak, S., Chwie\u0107ko, B., & Zi\u00f3\u0142ko, B. (2017). Speech\/music discrimination for analysis of radio stations. In\u00a02017 International conference on systems, signals and image processing (IWSSIP)\u00a0(pp. 1\u20134). Poznan: IEEE.","DOI":"10.1109\/IWSSIP.2017.7965606"},{"issue":"1","key":"9681_CR180","first-page":"39","volume":"1","author":"M Kalamani","year":"2015","unstructured":"Kalamani, M., Valarmathy, D. S., & Anith, S. (2015). Hybrid speech segmentation algorithm for continuous speech recognition. International Journal on Applications of Information and Communication Engineering, 1(1), 39\u201346.","journal-title":"International Journal on Applications of Information and Communication Engineering"},{"issue":"2","key":"9681_CR181","doi-asserted-by":"crossref","first-page":"2223","DOI":"10.1007\/s11042-015-3181-5","volume":"76","author":"I Kapsouras","year":"2017","unstructured":"Kapsouras, I., Tefas, A., Nikolaidis, N., Peeters, G., Benaroya, L., & Pitas, I. (2017). Multimodal speaker clustering in full length movies. Multimedia Tools and Applications, 76(2), 2223\u20132242.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR182","doi-asserted-by":"crossref","unstructured":"Karaa, W. B. A., Ashour, A. S., Sassi, D. B., Roy, P., Kausar, N., & Dey, N. (2016). Medline text mining: an enhancement genetic algorithm based approach for document clustering. In\u00a0Applications of intelligent optimization in biology and medicine\u00a0(pp. 267\u2013287). Cham: Springer.","DOI":"10.1007\/978-3-319-21212-8_12"},{"issue":"4","key":"9681_CR183","first-page":"33","volume":"9","author":"K Karthikeyan","year":"2018","unstructured":"Karthikeyan, K., & Mala, D. R. (2018). Content based audio classification using artificial neural network techniques. International Journal of Computer Engineering & Technology, 9(4), 33\u201348.","journal-title":"International Journal of Computer Engineering & Technology"},{"key":"9681_CR184","doi-asserted-by":"crossref","unstructured":"Kartikay, A., Ganesan, H., & Ladwani, V.M. (2016). Classification of music into moods using musical features. In\u00a02016 International conference on inventive computation technologies (ICICT)\u00a0(Vol. 3, pp. 1\u20135). Coimbatore: IEEE.","DOI":"10.1109\/INVENTIVE.2016.7830197"},{"key":"9681_CR185","unstructured":"Kaur, K., & Jain, N. (2015). Feature extraction and classification for automatic speaker recognition system\u2014A review.\u00a0International Journal of Advanced Research in Computer Science and Software Engineering,\u00a05."},{"issue":"5","key":"9681_CR186","first-page":"289","volume":"4","author":"G Kaur","year":"2015","unstructured":"Kaur, G., Singh, D., & Kaur, G. (2015). A survey on speech recognition algorithms. International Journal of Emerging Research in Management and Technology, 4(5), 289\u2013298.","journal-title":"International Journal of Emerging Research in Management and Technology"},{"key":"9681_CR187","unstructured":"Kelkar, T., & Jensenius, A. R. (2017). Exploring melody and motion features in \u201csound-tracings\u201d. In\u00a0Proceedings of the SMC conferences\u00a0(pp. 98\u2013103). Aalto University."},{"issue":"15","key":"9681_CR188","doi-asserted-by":"crossref","first-page":"5973","DOI":"10.1007\/s11042-014-1902-9","volume":"74","author":"M Khalil","year":"2015","unstructured":"Khalil, M., & Adib, A. (2015). Informed audio watermarking based on adaptive carrier modulation. Multimedia Tools and Applications, 74(15), 5973\u20135993.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR189","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.dsp.2015.09.005","volume":"48","author":"BK Khonglah","year":"2016","unstructured":"Khonglah, B. K., & Prasanna, S. M. (2016). Speech\/music classification using speech-specific features. Digital Signal Processing, 48, 71\u201383.","journal-title":"Digital Signal Processing"},{"key":"9681_CR190","doi-asserted-by":"crossref","first-page":"57","DOI":"10.1016\/j.ins.2013.04.014","volume":"243","author":"P Khunarsal","year":"2013","unstructured":"Khunarsal, P., Lursinsap, C., & Raicharoen, T. (2013). Very short time environmental sound classification based on spectrogram pattern matching. Information Sciences, 243, 57\u201374.","journal-title":"Information Sciences"},{"key":"9681_CR191","doi-asserted-by":"crossref","unstructured":"Kiktova, E., Lojka, M., Pleva, M., Juhar, J., & Cizmar, A. (2015). Gun type recognition from gunshot audio recordings. In\u00a03rd international workshop on biometrics and forensics (IWBF 2015)\u00a0(pp. 1\u20136). Gjovik: IEEE.","DOI":"10.1109\/IWBF.2015.7110240"},{"key":"9681_CR192","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2019.09.009","author":"GH Kim","year":"2019","unstructured":"Kim, G. H., Bae, I. H., Park, H. J., & Lee, Y. W. (2019). Comparison of cepstral analysis based on voiced-segment extraction and voice tasks for discriminating dysphonic and normophonic Korean speakers. Journal of Voice. https:\/\/doi.org\/10.1016\/j.jvoice.2019.09.009.","journal-title":"Journal of Voice"},{"key":"9681_CR193","unstructured":"Kim, K., Baijal, A., Ko, B.S., Lee, S., Hwang, I., & Kim, Y. (2015). Speech music discrimination using an ensemble of biased classifiers. In\u00a0Audio engineering society convention, Vol. 139. Audio Engineering Society."},{"key":"9681_CR194","volume-title":"MPEG-7 audio and beyond: Audio content indexing and retrieval","author":"HG Kim","year":"2006","unstructured":"Kim, H. G., Moreau, N., & Sikora, T. (2006). MPEG-7 audio and beyond: Audio content indexing and retrieval. New York: Wiley."},{"issue":"3","key":"9681_CR195","first-page":"1246","volume":"7","author":"D Kim","year":"2017","unstructured":"Kim, D., Van Ho, P., & Lim, Y. (2017). A new recognition method for visualizing music emotion. International Journal of Electrical and Computer Engineering, 7(3), 1246\u20131254.","journal-title":"International Journal of Electrical and Computer Engineering"},{"issue":"4","key":"9681_CR196","doi-asserted-by":"crossref","first-page":"2225","DOI":"10.1121\/1.4950680","volume":"139","author":"N Kirch","year":"2016","unstructured":"Kirch, N., & Zhu, N. (2016). A discourse on the effectiveness of digital filters at removing noise from audio. The Journal of the Acoustical Society of America, 139(4), 2225.","journal-title":"The Journal of the Acoustical Society of America"},{"key":"9681_CR197","doi-asserted-by":"crossref","unstructured":"Kiska, T., Galaz, Z., Zvoncak, V., Mucha, J., Mekyska, J., & Smekal, Z. (2018). Music information retrieval techniques for determining the place of origin of a music interpretation. In\u00a02018 10th international congress on ultra modern telecommunications and control systems and workshops (ICUMT)\u00a0(pp. 1\u20135). Moscow: IEEE.","DOI":"10.1109\/ICUMT.2018.8631268"},{"key":"9681_CR198","doi-asserted-by":"crossref","unstructured":"Knees, P., & Schedl, M. (2016). Basic methods of audio signal processing. In\u00a0Music similarity and retrieval\u00a0(pp. 33\u201350). Berlin: Springer.","DOI":"10.1007\/978-3-662-49722-7_2"},{"key":"9681_CR199","doi-asserted-by":"crossref","unstructured":"Korvel, G., & Kostek, B. (2017). Examining feature vector for phoneme recognition. In\u00a02017 IEEE international symposium on signal processing and information technology (ISSPIT)\u00a0(pp. 394\u2013398). Bilbao: IEEE.","DOI":"10.1109\/ISSPIT.2017.8388675"},{"key":"9681_CR200","doi-asserted-by":"crossref","unstructured":"Kotti, M., & Stylianou, Y. (2017). Effective emotion recognition in movie audio tracks. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 5120\u20135124). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7953132"},{"key":"9681_CR201","doi-asserted-by":"crossref","unstructured":"Koutini, K., Eghbal-zadeh, H., Dorfer, M., & Widmer, G. (2019). The receptive field as a regularizer in deep convolutional neural networks for acoustic scene classification. In\u00a02019 27th European signal processing conference (EUSIPCO)\u00a0(pp. 1\u20135). A Coruna: IEEE.","DOI":"10.23919\/EUSIPCO.2019.8902732"},{"key":"9681_CR202","doi-asserted-by":"crossref","unstructured":"Koutras, P., Zlatintsi, A., Iosif, E., Katsamanis, A., Maragos, P., & Potamianos, A. (2015). Predicting audio-visual salient events based on visual, audio and text modalities for movie summarization. In\u00a02015 IEEE international conference on image processing (ICIP)\u00a0(pp. 4361\u20134365). Quebec City: IEEE.","DOI":"10.1109\/ICIP.2015.7351630"},{"key":"9681_CR203","doi-asserted-by":"crossref","unstructured":"Kraljevi\u0107, L., Russo, M., Mlikota, M., & \u0160ari\u0107, M. (2017). Cochlea-based features for music emotion classification. In\u00a014th international conference on signal processing and multimedia applications.","DOI":"10.5220\/0006466900640068"},{"key":"9681_CR204","doi-asserted-by":"crossref","first-page":"105","DOI":"10.1016\/j.sigpro.2016.06.020","volume":"130","author":"T Kronvall","year":"2017","unstructured":"Kronvall, T., Juhlin, M., Sw\u00e4rd, J., Adalbj\u00f6rnsson, S. I., & Jakobsson, A. (2017). Sparse modeling of chroma features. Signal Processing, 130, 105\u2013117.","journal-title":"Signal Processing"},{"issue":"3","key":"9681_CR205","first-page":"72","volume":"24","author":"VA Kulyukin","year":"2016","unstructured":"Kulyukin, V. A., & Reka, S. K. (2016). Toward sustainable electronic beehive monitoring: Algorithms for omnidirectional bee counting from images and harmonic analysis of buzzing signals. Engineering Letters, 24(3), 72\u201382.","journal-title":"Engineering Letters"},{"key":"9681_CR206","doi-asserted-by":"crossref","unstructured":"Kumar, A., & Florencio, D. (2016). Speech enhancement in multiple-noise conditions using deep neural networks.\u00a0http:\/\/arxiv.org\/abs\/1605.02427.","DOI":"10.21437\/Interspeech.2016-88"},{"key":"9681_CR207","doi-asserted-by":"crossref","unstructured":"Kumar, A., & Raj, B. (2016). Audio event detection using weakly labeled data. In\u00a0Proceedings of the 24th ACM international conference on Multimedia\u00a0(pp. 1038\u20131047). Amsterdam: ACM.","DOI":"10.1145\/2964284.2964310"},{"issue":"2","key":"9681_CR208","doi-asserted-by":"crossref","first-page":"995","DOI":"10.1007\/s11042-012-1108-y","volume":"73","author":"K Kusama","year":"2014","unstructured":"Kusama, K., & Itoh, T. (2014). Abstract picture generation and zooming user interface for intuitive music browsing. Multimedia Tools and Applications, 73(2), 995\u20131010.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR209","unstructured":"Kwon, T., Jeong, D., & Nam, J. (2017). Audio-to-score alignment of piano music using RNN-based automatic music transcription.\u00a0http:\/\/arxiv.org\/abs\/1711.04480."},{"key":"9681_CR210","doi-asserted-by":"crossref","unstructured":"Lampropoulos, A. S., & Tsihrintzis, G. A. (2012). Evaluation of MPEG-7 descriptors for speech emotional recognition. In\u00a02012 Eighth international conference on intelligent information hiding and multimedia signal processing\u00a0(pp. 98\u2013101). Piraeus: IEEE.","DOI":"10.1109\/IIH-MSP.2012.29"},{"key":"9681_CR211","doi-asserted-by":"crossref","unstructured":"Lane, N. D., Georgiev, P., & Qendro, L. (2015). DeepEar: Robust smartphone audio sensing in unconstrained acoustic environments using deep learning. In\u00a0Proceedings of the 2015 ACM international joint conference on pervasive and ubiquitous computing\u00a0(pp. 283\u2013294). Osaka: ACM.","DOI":"10.1145\/2750858.2804262"},{"key":"9681_CR212","doi-asserted-by":"crossref","unstructured":"Lartillot, O., & Grandjean, D. (2019). Tempo and metrical analysis by tracking multiple metrical levels using autocorrelation.","DOI":"10.3390\/app9235121"},{"key":"9681_CR213","doi-asserted-by":"crossref","unstructured":"Lavrentyeva, G., Novoselov, S., Malykh, E., Kozlov, A., Kudashev, O., & Shchemelinin, V. (2017). Audio-replay attack detection countermeasures. In\u00a0International conference on speech and computer\u00a0(pp. 171\u2013181). Cham: Springer.","DOI":"10.1007\/978-3-319-66429-3_16"},{"key":"9681_CR214","doi-asserted-by":"crossref","unstructured":"Lazaro, A., Sarno, R., Andre, R.J., & Mahardika, M.N. (2017). Music tempo classification using audio spectrum centroid, audio spectrum flatness, and audio spectrum spread based on MPEG-7 audio features. In\u00a02017 3rd international conference on science in information technology (ICSITech)\u00a0(pp. 41\u201346). Bandung: IEEE.","DOI":"10.1109\/ICSITech.2017.8257083"},{"issue":"9","key":"9681_CR215","doi-asserted-by":"crossref","first-page":"1751","DOI":"10.1109\/TASLP.2017.2716178","volume":"25","author":"T Le Cornu","year":"2017","unstructured":"Le Cornu, T., & Milner, B. (2017). Generating intelligible audio speech from visual speech. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 25(9), 1751\u20131761.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"4","key":"9681_CR216","doi-asserted-by":"crossref","first-page":"549","DOI":"10.3390\/s16040549","volume":"16","author":"J Lee","year":"2016","unstructured":"Lee, J., Choi, H., Park, D., Chung, Y., Kim, H. Y., & Yoon, S. (2016). Fault detection and diagnosis of railway point machines by sound analysis. Sensors, 16(4), 549\u2013650.","journal-title":"Sensors"},{"key":"9681_CR217","unstructured":"Lee, K., Junokas, M. J., Amanzadeh, M., & Garnett, G. E. (2015a). Exploratory analysis on expressions in two different 4\/4 beat patterns. In\u00a0ICMC."},{"key":"9681_CR218","unstructured":"Lee, J., Kim, T., Park, J., & Nam, J. (2017). Raw waveform-based audio classification using sample-level CNN architectures.\u00a0http:\/\/arxiv.org\/abs\/1712.00866."},{"key":"9681_CR219","doi-asserted-by":"crossref","unstructured":"Lee, J., Shin, S., Jang, D., Jang, S.J., & Yoon, K. (2015b). Music recommendation system based on usage history and automatic genre classification. In\u00a02015 IEEE international conference on consumer electronics (ICCE)\u00a0(pp. 134\u2013135). Las Vegas: IEEE.","DOI":"10.1109\/ICCE.2015.7066352"},{"issue":"8","key":"9681_CR220","first-page":"1","volume":"20","author":"L Lei","year":"2018","unstructured":"Lei, L., & She, K. (2018). Identity vector extraction by perceptual wavelet packet entropy and convolutional neural network for voice authentication. Entropy, 20(8), 1\u201315.","journal-title":"Entropy"},{"issue":"2","key":"9681_CR221","doi-asserted-by":"crossref","first-page":"318","DOI":"10.1109\/TASL.2007.910781","volume":"16","author":"M Levy","year":"2008","unstructured":"Levy, M., & Sandler, M. (2008). Structural segmentation of musical audio by constrained clustering. IEEE Transactions on Audio, Speech and Language Processing, 16(2), 318\u2013326.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"issue":"3","key":"9681_CR222","doi-asserted-by":"crossref","first-page":"639","DOI":"10.1166\/jmihi.2017.2082","volume":"7","author":"Z Li","year":"2017","unstructured":"Li, Z., Dey, N., Ashour, A. S., Cao, L., Wang, Y., Wang, D., et al. (2017a). Convolutional neural network based clustering and manifold learning method for diabetic plantar pressure imaging dataset. Journal of Medical Imaging and Health Informatics, 7(3), 639\u2013652.","journal-title":"Journal of Medical Imaging and Health Informatics"},{"key":"9681_CR223","doi-asserted-by":"crossref","unstructured":"Li, M., Miao, Z., & Ma, C. (2015). Feature extraction with convolutional restricted boltzmann machine for audio classification. In\u00a02015 3rd IAPR Asian conference on pattern recognition (ACPR)\u00a0(pp. 791\u2013795). Kuala Lumpur: IEEE.","DOI":"10.1109\/ACPR.2015.7486611"},{"issue":"1","key":"9681_CR224","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1186\/s13636-017-0123-3","volume":"2017","author":"W Li","year":"2017","unstructured":"Li, W., Wang, G., & Li, K. (2017b). Clustering algorithm for audio signals based on the sequential Psim matrix and Tabu search. EURASIP Journal on Audio, Speech, and Music Processing, 2017(1), 26\u201334.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"issue":"3","key":"9681_CR225","doi-asserted-by":"crossref","first-page":"266","DOI":"10.1049\/iet-spr.2014.0388","volume":"10","author":"R Li","year":"2016","unstructured":"Li, R., Xu, S., & Yang, H. (2016). Spread spectrum audio watermarking based on perceptual characteristic aware extraction. IET Signal Processing, 10(3), 266\u2013273.","journal-title":"IET Signal Processing"},{"key":"9681_CR226","first-page":"57","volume":"5","author":"S Liang","year":"2014","unstructured":"Liang, S., & Fan, X. (2014). Audio content classification method research based on two-step strategy. International Journal of Advanced Computer Science and Applications (IJACSA), 5, 57\u201362.","journal-title":"International Journal of Advanced Computer Science and Applications (IJACSA)"},{"key":"9681_CR227","unstructured":"Lidy, T. (2015). Spectral convolutional neural network for music classification.\u00a0In Music information retrieval evaluation eX-change (MIREX), Malaga, Spain."},{"key":"9681_CR228","unstructured":"Lidy, T., & Schindler, A. (2016). CQT-based convolutional neural networks for audio scene classification. In\u00a0Proceedings of the detection and classification of acoustic scenes and events 2016 workshop (DCASE2016)\u00a0(Vol. 90, pp. 1032\u20131048). DCASE2016 challenge."},{"key":"9681_CR230","doi-asserted-by":"crossref","unstructured":"Lim, W., Jang, D., & Lee, T. (2016). Speech emotion recognition using convolutional and recurrent neural networks. In\u00a02016 Asia-Pacific signal and information processing association annual summit and conference (APSIPA)\u00a0(pp. 1\u20134). Jeju: IEEE.","DOI":"10.1109\/APSIPA.2016.7820699"},{"issue":"6","key":"9681_CR231","first-page":"2748","volume":"12","author":"M Lim","year":"2018","unstructured":"Lim, M., Lee, D., Park, H., Kang, Y., Oh, J., Park, J. S., et al. (2018). Convolutional neural network based audio event classification. KSII Transactions on Internet & Information Systems, 12(6), 2748\u20132760.","journal-title":"KSII Transactions on Internet & Information Systems"},{"issue":"1","key":"9681_CR232","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1186\/1743-0003-11-18","volume":"11","author":"YP Lin","year":"2014","unstructured":"Lin, Y. P., Duann, J. R., Feng, W., Chen, J. H., & Jung, T. P. (2014). Revealing spatio-spectral electroencephalographic dynamics of musical mode and tempo perception by independent component analysis. Journal of Neuroengineering and Rehabilitation, 11(1), 18.","journal-title":"Journal of Neuroengineering and Rehabilitation"},{"key":"9681_CR233","doi-asserted-by":"crossref","unstructured":"Lin, X., & Kang, X. (2017). Supervised audio tampering detection using an autoregressive model. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2142\u20132146). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952535"},{"issue":"3","key":"9681_CR234","doi-asserted-by":"crossref","first-page":"594","DOI":"10.1109\/TASLP.2016.2519146","volume":"24","author":"X Liu","year":"2016","unstructured":"Liu, X., & Bao, C. (2016). Audio bandwidth extension based on ensemble echo state networks with temporal evolution. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 24(3), 594\u2013607.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"key":"9681_CR235","doi-asserted-by":"crossref","first-page":"761","DOI":"10.1016\/j.sigpro.2015.01.001","volume":"120","author":"Y Liu","year":"2016","unstructured":"Liu, Y., Feng, X., & Zhou, Z. (2016). Multimodal video classification with stacked contractive autoencoders. Signal Processing, 120, 761\u2013766.","journal-title":"Signal Processing"},{"key":"9681_CR236","doi-asserted-by":"crossref","unstructured":"Liu, Z., & Lu, W. (2017). Fast copy-move detection of digital audio. In\u00a02017 IEEE second international conference on data science in cyberspace (DSC)\u00a0(pp. 625\u2013629). Shenzhen: IEEE.","DOI":"10.1109\/DSC.2017.11"},{"key":"9681_CR237","doi-asserted-by":"crossref","unstructured":"Liu, X., Tian, W., Yin, H., & He, L. (2018). Automatic detection of nasal leak in cleft palate speech based on an improved group delay method. In\u00a02018 International symposium on communication engineering & computer science (CECS 2018). Atlantis Press.","DOI":"10.2991\/cecs-18.2018.65"},{"key":"9681_CR238","unstructured":"L\u00f3pez-Serrano, P., Dittmar, C., & M\u00fcller, M. (2017). Mid-level audio features based on cascaded harmonic-residual-percussive separation. In\u00a0Audio engineering society conference: 2017 AES international conference on semantic audio. Audio Engineering Society."},{"issue":"1","key":"9681_CR239","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1186\/s13636-018-0138-4","volume":"2018","author":"V Lostanlen","year":"2018","unstructured":"Lostanlen, V., Lafay, G., And\u00e9n, J., & Lagrange, M. (2018). Relevance-based quantization of scattering features for unsupervised mining of environmental audio. EURASIP Journal on Audio, Speech, and Music Processing, 2018(1), 15\u201324.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"9681_CR240","doi-asserted-by":"crossref","unstructured":"Loweimi, E., Barker, J., & Hain, T. (2018). Exploring the use of group delay for generalised vts based noise compensation. In\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 4824\u20134828). Calgary: IEEE.","DOI":"10.1109\/ICASSP.2018.8462595"},{"issue":"7","key":"9681_CR241","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1109\/TSA.2002.804546","volume":"10","author":"L Lu","year":"2002","unstructured":"Lu, L., Zhang, H. J., & Jiang, H. (2002). Content analysis for audio classification and segmentation. IEEE Transactions on Speech and Audio Processing, 10(7), 504\u2013516.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"issue":"1","key":"9681_CR242","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1016\/j.csl.2014.04.001","volume":"30","author":"J Lude\u00f1a-Choez","year":"2015","unstructured":"Lude\u00f1a-Choez, J., & Gallardo-Antol\u00edn, A. (2015). Feature extraction based on the high-pass filtering of audio signals for acoustic event classification. Computer Speech & Language, 30(1), 32\u201342.","journal-title":"Computer Speech & Language"},{"key":"9681_CR243","unstructured":"Lukasik, E., Yang, C., & Kurzawski, L. (2016). Temporal envelope for audio classification. In\u00a0Audio engineering society convention 140. Audio Engineering Society."},{"key":"9681_CR244","doi-asserted-by":"crossref","unstructured":"Lukic, Y., Vogt, C., D\u00fcrr, O., & Stadelmann, T. (2016). Speaker identification and clustering using convolutional neural networks. In\u00a02016 IEEE 26th international workshop on machine learning for signal processing (MLSP)\u00a0(pp. 1\u20136). Vietri sul Mare: IEEE.","DOI":"10.1109\/MLSP.2016.7738816"},{"key":"9681_CR245","doi-asserted-by":"crossref","unstructured":"Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., & Mesgarani, N. (2017a). Deep clustering and conventional networks for music separation: Stronger together. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 61\u201365). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952118"},{"key":"9681_CR246","doi-asserted-by":"crossref","unstructured":"Luo, X., Jiang, J., Zhu, J., & Dou, Y. (2017b). Parallel algorithm design for audio feature extraction. In\u00a02017 5th international conference on machinery, materials and computing technology (ICMMCT 2017). Atlantis Press.","DOI":"10.2991\/icmmct-17.2017.229"},{"issue":"9","key":"9681_CR247","doi-asserted-by":"crossref","first-page":"2179","DOI":"10.1109\/TIFS.2018.2812185","volume":"13","author":"D Luo","year":"2018","unstructured":"Luo, D., Korus, P., & Huang, J. (2018). Band energy difference for source attribution in audio forensics. IEEE Transactions on Information Forensics and Security, 13(9), 2179\u20132189.","journal-title":"IEEE Transactions on Information Forensics and Security"},{"issue":"5","key":"9681_CR248","doi-asserted-by":"crossref","first-page":"688","DOI":"10.1109\/LSP.2016.2549600","volume":"23","author":"D Luo","year":"2016","unstructured":"Luo, D., Sun, M., & Huang, J. (2016). Audio postprocessing detection based on amplitude cooccurrence vector feature. IEEE Signal Processing Letters, 23(5), 688\u2013692.","journal-title":"IEEE Signal Processing Letters"},{"key":"9681_CR249","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1016\/j.dsp.2014.11.003","volume":"37","author":"D Luo","year":"2015","unstructured":"Luo, D., Yang, R., & Huang, J. (2015). Identification of AMR decompressed audio. Digital Signal Processing, 37, 85\u201391.","journal-title":"Digital Signal Processing"},{"issue":"5","key":"9681_CR250","doi-asserted-by":"crossref","first-page":"1","DOI":"10.3390\/s16050717","volume":"16","author":"J Luque","year":"2016","unstructured":"Luque, J., Larios, D., Personal, E., Barbancho, J., & Le\u00f3n, C. (2016). Evaluation of MPEG-7-based audio descriptors for animal voice recognition over wireless acoustic sensor networks. Sensors, 16(5), 1\u201322.","journal-title":"Sensors"},{"key":"9681_CR251","doi-asserted-by":"crossref","first-page":"248","DOI":"10.1016\/j.eswa.2017.11.016","volume":"95","author":"A Luque","year":"2018","unstructured":"Luque, A., Romero-Lemos, J., Carrasco, A., & Barbancho, J. (2018). Non-sequential automatic classification of anuran sounds for the estimation of climate-change indicators. Expert Systems with Applications, 95, 248\u2013260.","journal-title":"Expert Systems with Applications"},{"key":"9681_CR252","unstructured":"Lykartsis, A., & Lerch, A. (2015). Beat histogram features for rhythm-based musical genre classification using multiple novelty functions. In\u00a0Proceedings of the 16th ISMIR Conference\u00a0(pp. 434-440)."},{"key":"9681_CR253","unstructured":"Lykartsis, A., & Weinzierl, S. (2016). Rhythm description for music and speech using the beat histogram with multiple novelty functions: First results."},{"key":"9681_CR254","unstructured":"Lykartsis, A., Wu, C.W., & Lerch, A. (2015). Beat histogram features from NMF-based novelty functions for music classification. In\u00a0ISMIR\u00a0(pp. 434\u2013440)."},{"key":"9681_CR255","doi-asserted-by":"crossref","unstructured":"Ma, M., Ramabhadran, B., Emond, J., Rosenberg, A., & Biadsy, F. (2019). Comparison of data augmentation and adaptation strategies for code-switched automatic speech recognition. In\u00a0ICASSP 2019-2019 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 6081\u20136085). Brighton: IEEE.","DOI":"10.1109\/ICASSP.2019.8682824"},{"key":"9681_CR256","doi-asserted-by":"crossref","unstructured":"Ma, X., Yang, H., Chen, Q., Huang, D., & Wang, Y. (2016a). Depaudionet: An efficient deep model for audio based depression classification. In\u00a0Proceedings of the 6th international workshop on audio\/visual emotion challenge\u00a0(pp. 35\u201342). Amsterdam: ACM.","DOI":"10.1145\/2988257.2988267"},{"key":"9681_CR257","doi-asserted-by":"crossref","first-page":"9733","DOI":"10.1109\/ACCESS.2016.2646458","volume":"4","author":"Z Ma","year":"2016","unstructured":"Ma, Z., Yu, H., Tan, Z. H., & Guo, J. (2016b). Text-independent speaker identification using the histogram transform model. IEEE Access, 4, 9733\u20139739.","journal-title":"IEEE Access"},{"issue":"1","key":"9681_CR258","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1007\/s10772-014-9243-7","volume":"18","author":"SR Madikeri","year":"2015","unstructured":"Madikeri, S. R., Talambedu, A., & Murthy, H. A. (2015). Modified group delay feature based total variability space modelling for speaker recognition. International Journal of Speech Technology, 18(1), 17\u201323.","journal-title":"International Journal of Speech Technology"},{"key":"9681_CR259","unstructured":"Magare, M., & Dahake, R. (2016). Audio based music classification based on genre and emotion using Gaussian process.\u00a0International Journal of Advanced Research in Computer and Communication Engineering."},{"issue":"6","key":"9681_CR260","first-page":"49","volume":"15","author":"P Mahana","year":"2015","unstructured":"Mahana, P., & Singh, G. (2015). Comparative analysis of machine learning algorithms for audio signals classification. International Journal of Computer Science and Network Security (IJCSNS), 15(6), 49.","journal-title":"International Journal of Computer Science and Network Security (IJCSNS)"},{"key":"9681_CR261","doi-asserted-by":"crossref","unstructured":"Mahardhika, F., Warnars, H. L. H. S., & Heryadi, Y. (2018). Indonesian\u2019s dangdut music classification based on audio features. In\u00a02018 Indonesian association for pattern recognition international conference (INAPR)\u00a0(pp. 99\u2013103). Jakarta: IEEE.","DOI":"10.1109\/INAPR.2018.8627046"},{"key":"9681_CR262","first-page":"17","volume":"13","author":"O Marshall","year":"2019","unstructured":"Marshall, O. (2019). Jitter: Clocking as audible media. International Journal of Communication, 13, 17.","journal-title":"International Journal of Communication"},{"issue":"1","key":"9681_CR263","doi-asserted-by":"crossref","first-page":"53","DOI":"10.5121\/ijsc.2015.6105","volume":"6","author":"AD Mayvan","year":"2015","unstructured":"Mayvan, A. D., Beheshti, S. A., & Masoom, M. H. (2015). Classification of vehicles based on audio signals using quadratic discriminant analysis and high energy feature vectors. International Journal on Soft Computing, 6(1), 53.","journal-title":"International Journal on Soft Computing"},{"key":"9681_CR264","unstructured":"McAdams, S., & Siedenburg, K. (2019). Perception and cognition of musical timbre.\u00a0In Foundations in music psychology: Theory and research."},{"key":"9681_CR265","doi-asserted-by":"crossref","unstructured":"McFee, B., Raffel, C., Liang, D., Ellis, D. P., McVicar, M., Battenberg, E., & Nieto, O. (2015). librosa: Audio and music signal analysis in python. In\u00a0Proceedings of the 14th python in science conference\u00a0(Vol. 8).","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"9681_CR266","unstructured":"McPherson, A.P., & Morreale, F. (2017). Technology and community in toolkits for musical interface design. CHI."},{"key":"9681_CR267","doi-asserted-by":"crossref","unstructured":"Medhat, F., Chesmore, D., & Robinson, J. (2017). Masked conditional neural networks for audio classification. In\u00a0International conference on artificial neural networks\u00a0(pp. 349\u2013358). Cham: Springer.","DOI":"10.1007\/978-3-319-68612-7_40"},{"key":"9681_CR268","doi-asserted-by":"crossref","unstructured":"Meng, X., Li, C., & Tian, L. (2018). Detecting audio splicing forgery algorithm based on local noise level estimation. In\u00a02018 5th international conference on systems and informatics (ICSAI)\u00a0(pp. 861\u2013865). China: IEEE.","DOI":"10.1109\/ICSAI.2018.8599318"},{"key":"9681_CR269","doi-asserted-by":"crossref","unstructured":"Mesaros, A., Heittola, T., Dikmen, O., & Virtanen, T. (2015). Sound event detection in real life recordings using coupled matrix factorization of spectral representations and class activity annotations. In\u00a02015 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 151\u2013155). Brisbane: IEEE.","DOI":"10.1109\/ICASSP.2015.7177950"},{"key":"9681_CR270","doi-asserted-by":"crossref","unstructured":"Meudt, S., & Schwenker, F. (2014). Enhanced autocorrelation in real world emotion recognition. In\u00a0Proceedings of the 16th international conference on multimodal interaction\u00a0(pp. 502\u2013507). Istanbul: ACM.","DOI":"10.1145\/2663204.2666276"},{"key":"9681_CR271","doi-asserted-by":"crossref","unstructured":"Miano, T. (2018).\u00a0Hear and see: End-to-end sound classification and visualization of classified sounds\u00a0(No. e27280v1). PeerJ Preprints.","DOI":"10.7287\/peerj.preprints.27280v1"},{"key":"9681_CR272","doi-asserted-by":"publisher","DOI":"10.1155\/2018\/5209207","author":"D Min","year":"2018","unstructured":"Min, D., Park, B., & Park, J. (2018). Artificial engine sound synthesis method for modification of the acoustic characteristics of electric vehicles. Shock and Vibration. https:\/\/doi.org\/10.1155\/2018\/5209207.","journal-title":"Shock and Vibration"},{"issue":"9","key":"9681_CR273","first-page":"69","volume":"39","author":"L Mingming","year":"2016","unstructured":"Mingming, L., Hui, Z., & Qinghong, S. H. E. N. (2016). Realization of audio fingerprint based on power spectrum feature. Electronic Measurement Technology, 39(9), 69\u201372.","journal-title":"Electronic Measurement Technology"},{"issue":"4","key":"9681_CR274","first-page":"1","volume":"1","author":"SR Mishra","year":"2012","unstructured":"Mishra, S. R., Somani, S. B., Deshmukh, P., & Soni, D. (2012). EEG signal processing and classification of sensorimoter rhythm-based BCI. International Journal of Engineering Research and Technology, 1(4), 1\u20134.","journal-title":"International Journal of Engineering Research and Technology"},{"key":"9681_CR275","unstructured":"Mitrovic, D., Zeppelzauer, M., & Breiteneder, C. (2006). Discrimination and retrieval of animal sounds. In\u00a02006 12th international multi-media modelling conference\u00a0(p. 5). Beijing: IEEE."},{"key":"9681_CR276","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2017.2724515","author":"S Mo","year":"2017","unstructured":"Mo, S., & Niu, J. (2017). A novel method based on OMPGW method for feature extraction in automatic music mood classification. IEEE Transactions on Affective Computing. https:\/\/doi.org\/10.1109\/TAFFC.2017.2724515.","journal-title":"IEEE Transactions on Affective Computing"},{"issue":"2\u20133","key":"9681_CR277","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1007\/s10994-016-5570-z","volume":"104","author":"DC Mocanu","year":"2016","unstructured":"Mocanu, D. C., Mocanu, E., Nguyen, P. H., Gibescu, M., & Liotta, A. (2016). A topological insight into restricted boltzmann machines. Machine Learning, 104(2\u20133), 243\u2013270.","journal-title":"Machine Learning"},{"key":"9681_CR278","unstructured":"Moffat, D., Ronan, D., & Reiss, J. D. (2015). An evaluation of audio feature extraction toolboxes."},{"issue":"1","key":"9681_CR279","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1007\/s11554-018-0788-6","volume":"15","author":"R Molina","year":"2018","unstructured":"Molina, R., Gazzano, J. D., Rincon, F., Gil-Costa, V., Barba, J., Petrino, R., et al. (2018). Heterogeneous SoC-based acceleration of MPEG-7 compliance image retrieval process. Journal of Real-Time Image Processing, 15(1), 161\u2013172.","journal-title":"Journal of Real-Time Image Processing"},{"issue":"1","key":"9681_CR280","doi-asserted-by":"crossref","first-page":"184","DOI":"10.1109\/JBHI.2018.2800741","volume":"23","author":"J Monge-Alvarez","year":"2018","unstructured":"Monge-Alvarez, J., Hoyos-Barcel\u00f3, C., Lesso, P., & Casaseca-de-la-Higuera, P. (2018). Robust detection of audio-cough events using local Hu moments. IEEE Journal of Biomedical and Health Informatics, 23(1), 184\u2013196.","journal-title":"IEEE Journal of Biomedical and Health Informatics"},{"key":"9681_CR281","doi-asserted-by":"crossref","unstructured":"Muhammad, G., Alotaibi, Y. A., Alsulaiman, M. & Huda, M. N. (2010). Environment recognition using selected MPEG-7 audio features and mel-frequency cepstral coefficients. In\u00a02010 Fifth international conference on digital telecommunications\u00a0(pp. 11\u201316). Athens: IEEE.","DOI":"10.1109\/ICDT.2010.10"},{"key":"9681_CR282","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.bspc.2014.02.001","volume":"11","author":"G Muhammad","year":"2014","unstructured":"Muhammad, G., & Melhem, M. (2014). Pathological voice detection and binary classification using MPEG-7 audio features. Biomedical Signal Processing and Control, 11, 1\u20139.","journal-title":"Biomedical Signal Processing and Control"},{"issue":"21","key":"9681_CR283","doi-asserted-by":"crossref","first-page":"27997","DOI":"10.1007\/s11042-018-5993-6","volume":"77","author":"H Mukherjee","year":"2018","unstructured":"Mukherjee, H., Obaidullah, S. M., Phadikar, S., & Roy, K. (2018a). MISNA-A musical instrument segregation system from noisy audio with LPCC-S features and extreme learning. Multimedia Tools and Applications, 77(21), 27997\u201328022.","journal-title":"Multimedia Tools and Applications"},{"issue":"4","key":"9681_CR284","doi-asserted-by":"crossref","first-page":"753","DOI":"10.1007\/s10772-018-9525-6","volume":"21","author":"H Mukherjee","year":"2018","unstructured":"Mukherjee, H., Obaidullah, S. M., Santosh, K. C., Phadikar, S., & Roy, K. (2018b). Line spectral frequency-based features and extreme learning machine for voice activity detection from audio signal. International Journal of Speech Technology, 21(4), 753\u2013760.","journal-title":"International Journal of Speech Technology"},{"key":"9681_CR285","doi-asserted-by":"crossref","unstructured":"Mun, S., Shon, S., Kim, W., Han, D. K., & Ko, H. (2017). Deep neural network based learning and transferring mid-level audio features for acoustic scene classification. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 796\u2013800). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952265"},{"key":"9681_CR286","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1016\/j.eswa.2018.04.005","volume":"106","author":"YS Murthy","year":"2018","unstructured":"Murthy, Y. S., & Koolagudi, S. G. (2018). Classification of vocal and non-vocal segments in audio clips using genetic algorithm based feature selection (GAFS). Expert Systems with Applications, 106, 77\u201391.","journal-title":"Expert Systems with Applications"},{"key":"9681_CR287","doi-asserted-by":"crossref","unstructured":"Nagathil, A., Schlattmann, J.W., Neumann, K., & Martin, R. (2017). A feature-based linear regression model for predicting perceptual ratings of music by cochlear implant listeners. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 346\u2013350). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952175"},{"key":"9681_CR288","doi-asserted-by":"crossref","unstructured":"Nagavi, T. C., & Bhajantri, N. U. (2017). A new approach to query by humming based on modulated frequency features. In\u00a02017 International conference on wireless communications, signal processing and networking (WiSPNET)\u00a0(pp. 1675\u20131679). Chennai: IEEE.","DOI":"10.1109\/WiSPNET.2017.8300046"},{"issue":"1","key":"9681_CR289","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.eij.2015.05.004","volume":"17","author":"NJ Nalini","year":"2016","unstructured":"Nalini, N. J., & Palanivel, S. (2016). Music emotion recognition: The combined evidence of MFCC and residual phase. Egyptian Informatics Journal, 17(1), 1\u201310.","journal-title":"Egyptian Informatics Journal"},{"issue":"1","key":"9681_CR290","doi-asserted-by":"crossref","first-page":"65","DOI":"10.1504\/IJIM.2015.070024","volume":"1","author":"D Nandi","year":"2015","unstructured":"Nandi, D., Ashour, A. S., Samanta, S., Chakraborty, S., Salem, M. A., & Dey, N. (2015). Principal component analysis in medical image processing: a study. International Journal of Image Mining, 1(1), 65\u201386.","journal-title":"International Journal of Image Mining"},{"key":"9681_CR291","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/j.patrec.2017.01.013","volume":"88","author":"L Nanni","year":"2017","unstructured":"Nanni, L., Costa, Y. M., Lucio, D. R., Silla, C. N., Jr., & Brahnam, S. (2017). Combining visual and acoustic features for audio classification tasks. Pattern Recognition Letters, 88, 49\u201356.","journal-title":"Pattern Recognition Letters"},{"issue":"4","key":"9681_CR292","doi-asserted-by":"crossref","first-page":"941","DOI":"10.1007\/s10772-018-9524-7","volume":"21","author":"MA Nasr","year":"2018","unstructured":"Nasr, M. A., Abd-Elnaby, M., El-Fishawy, A. S., El-Rabaie, S., & El-Samie, F. E. A. (2018). Speaker identification based on normalized pitch frequency and mel frequency cepstral coefficients. International Journal of Speech Technology, 21(4), 941\u2013951.","journal-title":"International Journal of Speech Technology"},{"key":"9681_CR293","doi-asserted-by":"crossref","unstructured":"Nath, S. S., Mishra, G., Kar, J., Chakraborty, S., & Dey, N. (2014). A survey of image classification methods and techniques. In\u00a02014 International conference on control, instrumentation, communication and computational technologies (ICCICCT)\u00a0(pp. 554\u2013557). Kanyakumari: IEEE.","DOI":"10.1109\/ICCICCT.2014.6993023"},{"key":"9681_CR294","doi-asserted-by":"crossref","unstructured":"Nawasalkar, R. K., Thakare, V. M., Jambhekar, N. D., & Butey, P. K. (2015). Performance analysis of different audio with raga Yaman. In\u00a02015 1st international conference on next generation computing technologies (NGCT)\u00a0(pp. 929\u2013931). Dehradun: IEEE.","DOI":"10.1109\/NGCT.2015.7375256"},{"issue":"1","key":"9681_CR295","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1097\/MAO.0000000000001250","volume":"38","author":"JS Nemer","year":"2017","unstructured":"Nemer, J. S., Kohlberg, G. D., Mancuso, D. M., Griffin, B. M., Certo, M. V., Chen, S. Y., et al. (2017). Reduction of the harmonic series influences musical enjoyment with cochlear implants. Otology & Neurotology, 38(1), 31\u201337.","journal-title":"Otology & Neurotology"},{"key":"9681_CR296","doi-asserted-by":"crossref","unstructured":"Niu, L., Saiki, S., & Nakamura, M. (2017). Integrating environmental sensing and BLE-based location for improving daily activity recognition in OPH. In\u00a0Proceedings of the 19th international conference on information integration and web-based applications & services\u00a0(pp. 330\u2013337). Salzburg: ACM.","DOI":"10.1145\/3151759.3151791"},{"issue":"12","key":"9681_CR297","doi-asserted-by":"crossref","first-page":"443","DOI":"10.3390\/app6120443","volume":"6","author":"J Noda","year":"2016","unstructured":"Noda, J., Travieso, C., & S\u00e1nchez-Rodr\u00edguez, D. (2016). Automatic taxonomic classification of fish based on their acoustic signals. Applied Sciences, 6(12), 443.","journal-title":"Applied Sciences"},{"issue":"4","key":"9681_CR298","doi-asserted-by":"crossref","first-page":"722","DOI":"10.1007\/s10489-014-0629-7","volume":"42","author":"K Noda","year":"2015","unstructured":"Noda, K., Yamaguchi, Y., Nakadai, K., Okuno, H. G., & Ogata, T. (2015). Audio-visual speech recognition using deep learning. Applied Intelligence, 42(4), 722\u2013737.","journal-title":"Applied Intelligence"},{"key":"9681_CR299","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1016\/j.bspc.2015.12.009","volume":"27","author":"R Nonaka","year":"2016","unstructured":"Nonaka, R., Emoto, T., Abeyratne, U. R., Jinnouchi, O., Kawata, I., Ohnishi, H., et al. (2016). Automatic snore sound extraction from sleep sound recordings via auditory image modeling. Biomedical Signal Processing and Control, 27, 7\u201314.","journal-title":"Biomedical Signal Processing and Control"},{"key":"9681_CR300","doi-asserted-by":"crossref","unstructured":"Nousias, S., Lakoumentas, J., Lalos, A., Kikidis, D., Moustakas, K., Votis, K., & Tzovaras, D. (2016). Monitoring asthma medication adherence through content based audio classification. In\u00a02016 IEEE symposium series on computational intelligence (SSCI)\u00a0(pp. 1\u20135). Athens: IEEE.","DOI":"10.1109\/SSCI.2016.7849898"},{"issue":"5","key":"9681_CR301","doi-asserted-by":"crossref","first-page":"358","DOI":"10.17743\/jaes.2015.0025","volume":"63","author":"S Ntalampiras","year":"2015","unstructured":"Ntalampiras, S. (2015). Audio pattern recognition of baby crying sound events. Journal of the Audio Engineering Society, 63(5), 358\u2013369.","journal-title":"Journal of the Audio Engineering Society"},{"key":"9681_CR302","doi-asserted-by":"crossref","unstructured":"Ntalampiras, S. (2018). On acoustic monitoring of farm environments. In\u00a0International symposium on signal processing and intelligent recognition systems\u00a0(pp. 53\u201363). Singapore: Springer.","DOI":"10.1007\/978-981-13-5758-9_5"},{"issue":"2","key":"9681_CR303","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1162\/COMJ_a_00412","volume":"41","author":"C\u00d3 Nuan\u00e1in","year":"2017","unstructured":"Nuan\u00e1in, C. \u00d3., Herrera, P., & Jord\u00e1, S. (2017). Rhythmic concatenative synthesis for electronic music: Techniques, implementation, and evaluation. Computer Music Journal, 41(2), 21\u201337.","journal-title":"Computer Music Journal"},{"key":"9681_CR304","doi-asserted-by":"crossref","unstructured":"Obermayer, A. (2016). Glossary of literary terms-S.\u00a0Otago German Studies,\u00a02.","DOI":"10.11157\/ogs-vol2id61"},{"key":"9681_CR305","doi-asserted-by":"crossref","unstructured":"Oletic, D., Bilas, V., Magno, M., Felber, N., & Benini, L. (2016). Low-power multichannel spectro-temporal feature extraction circuit for audio pattern wake-up. In\u00a02016 Design, automation & test in Europe conference & exhibition (DATE)\u00a0(pp. 355\u2013360). Dresden: IEEE.","DOI":"10.3850\/9783981537079_0295"},{"key":"9681_CR306","doi-asserted-by":"crossref","unstructured":"Olteanu, E., Miu, D. O., Drosu, A., Segarceanu, S., Suciu, G., & Gavat, I. (2019). Fusion of speech techniques for automatic environmental sound recognition. In\u00a02019 International conference on speech technology and human-computer dialogue (SpeD)\u00a0(pp. 1\u20138). Timisoara: IEEE.","DOI":"10.1109\/SPED.2019.8906574"},{"key":"9681_CR307","unstructured":"Oo, M. M., & Oo, L. L. (2019). Acoustic scene classification by using combination of MODWPT and spectral features."},{"issue":"13","key":"9681_CR308","doi-asserted-by":"crossref","first-page":"5858","DOI":"10.1016\/j.eswa.2014.03.026","volume":"41","author":"CS Ooi","year":"2014","unstructured":"Ooi, C. S., Seng, K. P., Ang, L. M., & Chew, L. W. (2014). A new approach of audio emotion recognition. Expert Systems with Applications, 41(13), 5858\u20135869.","journal-title":"Expert Systems with Applications"},{"key":"9681_CR309","unstructured":"Oord, A. V. D., Dieleman, S., Zen, H., Simonyan, K., Vinyals, O., Graves, A., Kalchbrenner, N., Senior, A., & Kavukcuoglu, K. (2016). Wavenet: A generative model for raw audio.\u00a0http:\/\/arxiv.org\/abs\/1609.03499."},{"key":"9681_CR310","unstructured":"Oramas, S., Nieto, O., Barbieri, F., & Serra, X. (2017). Multi-label music genre classification from audio, text, and images using deep features.\u00a0http:\/\/arxiv.org\/abs\/1707.04916."},{"key":"9681_CR311","doi-asserted-by":"crossref","unstructured":"Ortolani, F. (2019). A comparative study on using phased or timed arrays in audio surveillance applications. In\u00a02019 IEEE 39th international conference on electronics and nanotechnology (ELNANO)\u00a0(pp. 808\u2013812). Kyiv: IEEE.","DOI":"10.1109\/ELNANO.2019.8783876"},{"key":"9681_CR312","doi-asserted-by":"crossref","unstructured":"Owens, A., & Efros, A. A. (2018). Audio-visual scene analysis with self-supervised multisensory features. In\u00a0Proceedings of the European conference on computer vision (ECCV)\u00a0(pp. 631\u2013648).","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"9681_CR313","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1016\/j.procs.2017.06.020","volume":"111","author":"I Ozer","year":"2017","unstructured":"Ozer, I., Ozer, Z., & Findik, O. (2017). Lanczos kernel based spectrogram image features for sound classification. Procedia Computer Science, 111, 137\u2013144.","journal-title":"Procedia Computer Science"},{"key":"9681_CR314","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.apacoust.2018.02.009","volume":"136","author":"T \u00d6zseven","year":"2018","unstructured":"\u00d6zseven, T., & D\u00fc\u011fenci, M. (2018). SPeech ACoustic (SPAC): A novel tool for speech feature extraction and classification. Applied Acoustics, 136, 1\u20138.","journal-title":"Applied Acoustics"},{"key":"9681_CR315","doi-asserted-by":"crossref","unstructured":"Padilla, P., Knights, F., Ruiz, A.T., & Tidhar, D. (2017). Identification and evolution of musical style I: Hierarchical transition networks and their modular structure. In\u00a0International conference on mathematics and computation in music\u00a0(pp. 259\u2013278). Cham: Springer.","DOI":"10.1007\/978-3-319-71827-9_20"},{"issue":"2","key":"9681_CR316","first-page":"311","volume":"5","author":"HK Palo","year":"2015","unstructured":"Palo, H. K., & Mohanty, M. N. (2015). Classification of emotional speech of children using probabilistic neural network. International Journal of Electrical and Computer Engineering, 5(2), 311\u2013317.","journal-title":"International Journal of Electrical and Computer Engineering"},{"key":"9681_CR317","doi-asserted-by":"crossref","unstructured":"Palo, H. K., & Mohanty, M. N. (2017). Wavelet based feature combination for recognition of emotions.\u00a0Ain Shams Engineering Journal.","DOI":"10.1016\/j.asej.2016.11.001"},{"key":"9681_CR318","doi-asserted-by":"crossref","unstructured":"Palo, H. K., & Sagar, S. (2018). Comparison of neural network models for speech emotion recognition. In\u00a02018 2nd international conference on data science and business analytics (ICDSBA)\u00a0(pp. 127\u2013131). Changsha: IEEE.","DOI":"10.1109\/ICDSBA.2018.00030"},{"key":"9681_CR319","unstructured":"Panda, R., Malheiro, R. M., & Paiva, R. P. (2018). Novel audio features for music emotion recognition.\u00a0IEEE Transactions on Affective Computing."},{"key":"9681_CR320","doi-asserted-by":"crossref","unstructured":"Parascandolo, G., Huttunen, H., & Virtanen, T. (2016). Recurrent neural networks for polyphonic sound event detection in real life recordings. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 6440\u20136444). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7472917"},{"key":"9681_CR321","doi-asserted-by":"crossref","unstructured":"Parekh, S., Font, F., & Serra, X. (2016). Improving audio retrieval through loudness profile categorization. In\u00a02016 IEEE international symposium on multimedia (ISM)\u00a0(pp. 565\u2013568). San Jose: IEEE.","DOI":"10.1109\/ISM.2016.0123"},{"key":"9681_CR322","first-page":"1059","volume":"17","author":"W Paszkowski","year":"2017","unstructured":"Paszkowski, W., & Loska, A. (2017). The use of data mining methods for the psychoacoustic assessment of noise in urban environment. International Multidisciplinary Scientific GeoConference: SGEM: Surveying Geology & mining Ecology Management, 17, 1059\u20131066.","journal-title":"International Multidisciplinary Scientific GeoConference: SGEM: Surveying Geology & mining Ecology Management"},{"key":"9681_CR323","doi-asserted-by":"crossref","unstructured":"Patil, S. R., & Machale, S. J. (2020). Indian musical instrument recognition using Gaussian mixture model. In\u00a0Techno-societal 2018\u00a0(pp. 51\u201357). Cham: Springer.","DOI":"10.1007\/978-3-030-16962-6_6"},{"key":"9681_CR324","doi-asserted-by":"crossref","unstructured":"Patil, N. M., & Nemade, M. U. (2019a). Content-based audio classification and retrieval using segmentation, feature extraction and neural network approach. In\u00a0Advances in computer communication and computational sciences\u00a0(pp. 263\u2013281). Singapore: Springer.","DOI":"10.1007\/978-981-13-6861-5_23"},{"key":"9681_CR325","doi-asserted-by":"crossref","unstructured":"Patil, N. M., & Nemade, M. U. (2019b). Content-based audio classification and retrieval using segmentation, feature extraction and neural network.\u00a0Advances in computer communication and computational sciences: Proceedings of IC4S 2018, p. 263.","DOI":"10.1007\/978-981-13-6861-5_23"},{"issue":"4","key":"9681_CR326","doi-asserted-by":"crossref","first-page":"1890","DOI":"10.1121\/1.414456","volume":"98","author":"RD Patterson","year":"1995","unstructured":"Patterson, R. D., Allerhand, M. H., & Giguere, C. (1995). Time-domain modeling of peripheral auditory processing: A modular architecture and a software platform. The Journal of the Acoustical Society of America, 98(4), 1890\u20131894.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"1","key":"9681_CR327","doi-asserted-by":"crossref","first-page":"067215","DOI":"10.1155\/2007\/67215","volume":"2007","author":"G Peeters","year":"2006","unstructured":"Peeters, G. (2006). Template-based estimation of time-varying tempo. EURASIP Journal on Advances in Signal Processing, 2007(1), 067215.","journal-title":"EURASIP Journal on Advances in Signal Processing"},{"key":"9681_CR328","unstructured":"Peeters, G., McAdams, S., & Herrera, P. (2000). Instrument sound description in the context of MPEG-7."},{"issue":"6","key":"9681_CR329","doi-asserted-by":"crossref","first-page":"1278","DOI":"10.1109\/TASLP.2017.2690564","volume":"25","author":"H Phan","year":"2017","unstructured":"Phan, H., Hertel, L., Maass, M., Koch, P., Mazur, R., & Mertins, A. (2017a). Improved audio scene classification based on label-tree embeddings and convolutional neural networks. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 25(6), 1278\u20131290.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR330","doi-asserted-by":"crossref","unstructured":"Phan, H., Hertel, L., Maass, M., & Mertins, A. (2016). Robust audio event recognition with 1-max pooling convolutional neural networks.\u00a0http:\/\/arxiv.org\/abs\/1604.06338.","DOI":"10.21437\/Interspeech.2016-123"},{"key":"9681_CR331","doi-asserted-by":"crossref","unstructured":"Phan, H., Koch, P., Katzberg, F., Maass, M., Mazur, R., McLoughlin, I., & Mertins, A. (2017b). What makes audio event detection harder than classification?. In\u00a02017 25th European signal processing conference (EUSIPCO)\u00a0(pp. 2739\u20132743). Kos: IEEE.","DOI":"10.23919\/EUSIPCO.2017.8081709"},{"issue":"3","key":"9681_CR332","doi-asserted-by":"crossref","first-page":"e0193345","DOI":"10.1371\/journal.pone.0193345","volume":"13","author":"YF Phillips","year":"2018","unstructured":"Phillips, Y. F., Towsey, M., & Roe, P. (2018). Revealing the ecological content of long-duration audio-recordings of the environment through clustering and visualisation. PLoS ONE, 13(3), e0193345.","journal-title":"PLoS ONE"},{"key":"9681_CR333","doi-asserted-by":"crossref","unstructured":"Picart, B., Brognaux, S., & Dupont, S. (2015). Analysis and automatic recognition of human beatbox sounds: A comparative study. In\u00a02015 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 4255\u20134259). Brisbane: IEEE.","DOI":"10.1109\/ICASSP.2015.7178773"},{"key":"9681_CR334","doi-asserted-by":"crossref","unstructured":"Piczak, K. J. (2015a). Environmental sound classification with convolutional neural networks. In\u00a02015 IEEE 25th international workshop on machine learning for signal processing (MLSP)\u00a0(pp. 1\u20136). Boston: IEEE.","DOI":"10.1109\/MLSP.2015.7324337"},{"key":"9681_CR335","doi-asserted-by":"crossref","unstructured":"Piczak, K. J. (2015b). ESC: Dataset for environmental sound classification. In\u00a0Proceedings of the 23rd ACM international conference on Multimedia\u00a0(pp. 1015\u20131018). Australia: ACM.","DOI":"10.1145\/2733373.2806390"},{"issue":"1","key":"9681_CR336","doi-asserted-by":"crossref","first-page":"160","DOI":"10.3390\/s18010160","volume":"18","author":"IM Pires","year":"2018","unstructured":"Pires, I. M., Santos, R., Pombo, N., Garcia, N. M., Fl\u00f3rez-Revuelta, F., Spinsante, S., et al. (2018). Recognition of activities of daily living based on environmental analyses using audio fingerprinting techniques: A systematic review. Sensors, 18(1), 160\u2013182.","journal-title":"Sensors"},{"key":"9681_CR337","doi-asserted-by":"crossref","unstructured":"Pishdadian, F., Pardo, B., & Liutkus, A. (2017). A multi-resolution approach to common fate-based audio separation. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 566\u2013570). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952219"},{"key":"9681_CR338","doi-asserted-by":"crossref","unstructured":"Pokorny, F. B., Schuller, B. W., Marschik, P. B., Brueckner, R., Nystr\u00f6m, P., Cummins, N., B\u00f6lte, S., Einspieler, C., & Falck-Ytter, T. (2017). Earlier identification of children with autism spectrum disorder: An automatic vocalisation-based approach. In\u00a0INTERSPEECH\u00a0(pp. 309\u2013313).","DOI":"10.21437\/Interspeech.2017-1007"},{"key":"9681_CR339","doi-asserted-by":"crossref","unstructured":"Pons, J., & Serra, X. (2019). Randomly weighted CNNs for (music) audio classification. In\u00a0ICASSP 2019-2019 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 336\u2013340). Brighton: IEEE.","DOI":"10.1109\/ICASSP.2019.8682912"},{"key":"9681_CR340","doi-asserted-by":"crossref","unstructured":"Pop, G.P. (2017). Discriminate animal sounds using TESPAR analysis. In\u00a0International conference on advancements of medicine and health care through technology\u00a0(pp. 185\u2013188). Cluj-Napoca, Cham: Springer.","DOI":"10.1007\/978-3-319-52875-5_41"},{"key":"9681_CR341","doi-asserted-by":"crossref","first-page":"50","DOI":"10.1016\/j.neucom.2015.01.095","volume":"174","author":"S Poria","year":"2016","unstructured":"Poria, S., Cambria, E., Howard, N., Huang, G. B., & Hussain, A. (2016). Fusing audio, visual and textual clues for sentiment analysis from multimodal content. Neurocomputing, 174, 50\u201359.","journal-title":"Neurocomputing"},{"key":"9681_CR342","doi-asserted-by":"crossref","unstructured":"Poria, S., Hussain, A., & Cambria, E. (2018). combining textual clues with audio-visual information for multimodal sentiment analysis. In\u00a0Multimodal sentiment analysis\u00a0(pp. 153\u2013178). Cham: Springer.","DOI":"10.1007\/978-3-319-95020-4_7"},{"key":"9681_CR343","unstructured":"Prego, T. D. M., de Lima, A. A., Zambrano-L\u00f3pez, R., & Netto, S. L. (2015). Blind estimators for reverberation time and direct-to-reverberant energy ratio using subband speech decomposition. In\u00a02015 IEEE workshop on applications of signal processing to audio and acoustics (WASPAA)\u00a0(pp. 1\u20135). New Paltz: IEEE."},{"key":"9681_CR344","volume-title":"Auditory signal processing: Physiology, psychoacoustics, and models","year":"2006","unstructured":"Pressnitzer, D., de Cheveigne, A., McAdams, S., & Collet, L. (Eds.). (2006). Auditory signal processing: Physiology, psychoacoustics, and models. Berlin: Springer Science & Business Media."},{"key":"9681_CR345","doi-asserted-by":"crossref","unstructured":"Qiu-Yu, Z., Yang-Wei, L., Yi-Bo, H., Peng-Fei, X., & Zhong-Ping, Y. (2014). Perceptual hashing algorithm for speech content identification based on spectrum entropy in compressed domain.\u00a0International Journal on Smart Sensing & Intelligent Systems,\u00a07(1).","DOI":"10.21307\/ijssis-2017-656"},{"issue":"3","key":"9681_CR346","first-page":"1720","volume":"8","author":"FH Rachman","year":"2018","unstructured":"Rachman, F. H., Sarno, R., & Fatichah, C. (2018). Music emotion classification based on lyrics-audio using corpus based emotion. International Journal of Electrical and Computer Engineering, 8(3), 1720.","journal-title":"International Journal of Electrical and Computer Engineering"},{"issue":"4","key":"9681_CR347","doi-asserted-by":"crossref","first-page":"336","DOI":"10.4236\/jsip.2011.24048","volume":"2","author":"M Radmard","year":"2011","unstructured":"Radmard, M., Hadavi, M., & Nayebi, M. M. (2011). A new method of voiced\/unvoiced classification based on clustering. Journal of Signal and Information Processing, 2(4), 336\u2013347.","journal-title":"Journal of Signal and Information Processing"},{"issue":"1","key":"9681_CR348","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1007\/s10772-017-9397-1","volume":"20","author":"R Rajan","year":"2017","unstructured":"Rajan, R., Misra, M., & Murthy, H. A. (2017). Melody extraction from music using modified group delay functions. International Journal of Speech Technology, 20(1), 185\u2013204.","journal-title":"International Journal of Speech Technology"},{"key":"9681_CR349","doi-asserted-by":"crossref","unstructured":"Rajanna, A. R., Aryafar, K., Shokoufandeh, A., & Ptucha, R. (2015). Deep neural networks: A case study for music genre classification. In\u00a02015 IEEE 14th international conference on machine learning and applications (ICMLA)\u00a0(pp. 655\u2013660). Miami: IEEE.","DOI":"10.1109\/ICMLA.2015.160"},{"issue":"3","key":"9681_CR350","doi-asserted-by":"crossref","first-page":"551","DOI":"10.1007\/s10772-016-9347-3","volume":"19","author":"B Rajesh","year":"2016","unstructured":"Rajesh, B., & Bhalke, D. G. (2016). Automatic genre classification of Indian Tamil and western music using fractional MFCC. International Journal of Speech Technology, 19(3), 551\u2013563.","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"9681_CR351","first-page":"142","volume":"23","author":"A Rakotomamonjy","year":"2014","unstructured":"Rakotomamonjy, A., & Gasso, G. (2014). Histogram of gradients of time\u2013frequency representations for audio scene classification. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 23(1), 142\u2013153.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR352","unstructured":"Ramirez, M. A. M., Benetos, E., & Reiss, J. D. (2019). A general-purpose deep learning approach to model time-varying audio effects.\u00a0http:\/\/arxiv.org\/abs\/1905.06148."},{"key":"9681_CR353","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1016\/j.procs.2017.10.017","volume":"116","author":"ZE Rasjid","year":"2017","unstructured":"Rasjid, Z. E., & Setiawan, R. (2017). Performance comparison and optimization of text document classification using k-nn and na\u00efve bayes classification techniques. Procedia Computer Science, 116, 107\u2013112.","journal-title":"Procedia Computer Science"},{"key":"9681_CR354","unstructured":"Rawlinson, H., Segal, N., & Fiala, J. (2015). Meyda: An audio feature extraction library for the web audio api. In\u00a0The 1st web audio conference (WAC). Paris, Fr."},{"key":"9681_CR355","doi-asserted-by":"crossref","unstructured":"Ren, J., Mao, D., Wang, Z., & Gao, C. (2009). The effect of packet delay on VOIP speech quality: failure of Hurst method. In\u00a02009 WRI world congress on computer science and information engineering\u00a0(pp. 230\u2013234). Los Angeles: IEEE.","DOI":"10.1109\/CSIE.2009.618"},{"key":"9681_CR356","doi-asserted-by":"crossref","unstructured":"Ren, Y., & Wu, Y. (2014). Convolutional deep belief networks for feature extraction of EEG signal. In\u00a02014 International joint conference on neural networks (IJCNN)\u00a0(pp. 2850\u20132853). Beijing: IEEE.","DOI":"10.1109\/IJCNN.2014.6889383"},{"issue":"3","key":"9681_CR357","doi-asserted-by":"crossref","first-page":"236","DOI":"10.1109\/TAFFC.2015.2427836","volume":"6","author":"JM Ren","year":"2015","unstructured":"Ren, J. M., Wu, M. J., & Jang, J. S. R. (2015). Automatic music mood classification based on timbre and modulation features. IEEE Transactions on Affective Computing, 6(3), 236\u2013246.","journal-title":"IEEE Transactions on Affective Computing"},{"key":"9681_CR358","doi-asserted-by":"crossref","unstructured":"Renjith, S., & Manju, K. G. (2017). Speech based emotion recognition in Tamil and Telugu using LPCC and hurst parameters\u2014A comparitive study using KNN and ANN classifiers. In\u00a02017 International conference on circuit, power and computing technologies (ICCPCT)\u00a0(pp. 1\u20136). Kollam: IEEE.","DOI":"10.1109\/ICCPCT.2017.8074220"},{"key":"9681_CR359","unstructured":"Rida, I. (2018). Feature extraction for temporal signal recognition: An overview.\u00a0http:\/\/arxiv.org\/abs\/1812.01780."},{"key":"9681_CR360","doi-asserted-by":"crossref","unstructured":"Ridoean, J. A., Sarno, R., Sunaryo, D., & Wijaya, D. R. (2017). Music mood classification using audio power and audio harmonicity based on MPEG-7 audio features and support vector machine. In\u00a02017 3rd International conference on science in information technology (ICSITech)\u00a0(pp. 72\u201376). Bandung: IEEE.","DOI":"10.1109\/ICSITech.2017.8257088"},{"key":"9681_CR361","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1016\/j.ins.2014.02.017","volume":"277","author":"AM Rinaldi","year":"2014","unstructured":"Rinaldi, A. M. (2014). A multimedia ontology model based on linguistic properties and audio-visual features. Information Sciences, 277, 234\u2013246.","journal-title":"Information Sciences"},{"key":"9681_CR362","unstructured":"Robertson, S., Penn, G., & Wang, Y. (2019). Exploring spectro-temporal features in end-to-end convolutional neural networks.\u00a0http:\/\/arxiv.org\/abs\/1901.00072."},{"key":"9681_CR363","doi-asserted-by":"crossref","unstructured":"Rocha, B. M., Mendes, L., Chouvarda, I., Carvalho, P., & Paiva, R. P. (2018). Detection of cough and adventitious respiratory sounds in audio recordings by internal sound analysis. In\u00a0Precision Medicine Powered by pHealth and Connected Health\u00a0(pp. 51\u201355). Singapore: Springer.","DOI":"10.1007\/978-981-10-7419-6_9"},{"key":"9681_CR364","doi-asserted-by":"crossref","unstructured":"Rocha, B. M., Mendes, L., Couceiro, R., Henriques, J., Carvalho, P., & Paiva, R. P. (2017). Detection of explosive cough events in audio recordings by internal sound analysis. In\u00a02017 39th Annual international conference of the IEEE engineering in medicine and biology society (EMBC)\u00a0(pp. 2761\u20132764). Seogwipo: IEEE.","DOI":"10.1109\/EMBC.2017.8037429"},{"key":"9681_CR365","unstructured":"Roma, G., Xamb\u00f3, A., Green, O., & Tremblay, P.A. (2018). A javascript library for flexible visualization of audio descriptors. In\u00a0Proceedings of the 4th web audio conference."},{"key":"9681_CR366","unstructured":"Ronan, D., Gunes, H., Moffat, D., & Reiss, J.D. (2015). Automatic subgrouping of multitrack audio."},{"key":"9681_CR367","doi-asserted-by":"crossref","unstructured":"Rong, F. (2016). Audio classification method based on machine learning. In\u00a02016 International conference on intelligent transportation, big data & smart city (ICITBS)\u00a0(pp. 81\u201384). Changsha: IEEE.","DOI":"10.1109\/ICITBS.2016.98"},{"key":"9681_CR368","doi-asserted-by":"crossref","first-page":"162","DOI":"10.1016\/j.cnsns.2018.07.008","volume":"67","author":"T Roy","year":"2019","unstructured":"Roy, T., Marwala, T., & Chakraverty, S. (2019). Precise detection of speech endpoints dynamically: A wavelet convolution based approach. Communications in Nonlinear Science and Numerical Simulation, 67, 162\u2013175.","journal-title":"Communications in Nonlinear Science and Numerical Simulation"},{"key":"9681_CR369","doi-asserted-by":"crossref","unstructured":"Rubin, J., Abreu, R., Ganguli, A., Nelaturi, S., Matei, I., & Sricharan, K. (2016). Classifying heart sound recordings using deep convolutional neural networks and mel-frequency cepstral coefficients. In\u00a02016 Computing in cardiology conference (CinC)\u00a0(pp. 813\u2013816). Vancouver: IEEE.","DOI":"10.22489\/CinC.2016.236-175"},{"key":"9681_CR370","doi-asserted-by":"crossref","unstructured":"Saggese, A., Strisciuglio, N., Vento, M., & Petkov, N. (2016). Time-frequency analysis for audio event detection in real scenarios. In\u00a02016 13th IEEE international conference on advanced video and signal based surveillance (AVSS)\u00a0(pp. 438\u2013443). Colorado Springs: IEEE.","DOI":"10.1109\/AVSS.2016.7738082"},{"key":"9681_CR371","doi-asserted-by":"crossref","unstructured":"Sailor, H. B., & Patil, H. A. (2016). Filterbank learning using convolutional restricted Boltzmann machine for speech recognition. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 5895\u20135899). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7472808"},{"key":"9681_CR372","doi-asserted-by":"crossref","unstructured":"Saki, F., & Kehtarnavaz, N. (2014). Background noise classification using random forest tree classifier for cochlear implant applications. In\u00a02014 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 3591\u20133595). Florence: IEEE.","DOI":"10.1109\/ICASSP.2014.6854270"},{"key":"9681_CR373","doi-asserted-by":"crossref","unstructured":"Saki, F., Sehgal, A., Panahi, I., & Kehtarnavaz, N. (2016). Smartphone-based real-time classification of noise signals using subband features and random forest classifier. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2204\u20132208). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7472068"},{"issue":"3","key":"9681_CR374","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1109\/LSP.2017.2657381","volume":"24","author":"J Salamon","year":"2017","unstructured":"Salamon, J., & Bello, J. P. (2017). Deep convolutional neural networks and data augmentation for environmental sound classification. IEEE Signal Processing Letters, 24(3), 279\u2013283.","journal-title":"IEEE Signal Processing Letters"},{"key":"9681_CR375","doi-asserted-by":"crossref","unstructured":"Salishev, S., Klotchkov, I., & Barabanov, A. (2017). Microphone array post-filter in frequency domain for speech recognition using short-time log-spectral amplitude estimator and spectral harmonic\/noise classifier. In\u00a0International conference on speech and computer\u00a0(pp. 525\u2013534). Cham: Springer.","DOI":"10.1007\/978-3-319-66429-3_52"},{"issue":"2","key":"9681_CR376","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1007\/s10772-019-09606-9","volume":"22","author":"KC Santosh","year":"2019","unstructured":"Santosh, K. C., Borra, S., Joshi, A., & Dey, N. (2019). Preface: Special section: Advances in speech, music and audio signal processing (Articles 1\u201313). International Journal of Speech Technology, 22(2), 293\u2013294.","journal-title":"International Journal of Speech Technology"},{"issue":"1","key":"9681_CR377","doi-asserted-by":"crossref","first-page":"115","DOI":"10.1007\/s11042-014-2274-x","volume":"75","author":"N Sarafianos","year":"2016","unstructured":"Sarafianos, N., Giannakopoulos, T., & Petridis, S. (2016). Audio-visual speaker diarization using fisher linear semi-discriminant analysis. Multimedia Tools and Applications, 75(1), 115\u2013130.","journal-title":"Multimedia Tools and Applications"},{"issue":"9","key":"9681_CR378","first-page":"6660","volume":"13","author":"VM Sardar","year":"2018","unstructured":"Sardar, V. M., & Shirbahadurkar, S. D. (2018). Speaker identification of whispering sound using selected audio descriptors. International Journal of Applied Engineering Research, 13(9), 6660\u20136666.","journal-title":"International Journal of Applied Engineering Research"},{"issue":"4","key":"9681_CR379","doi-asserted-by":"crossref","first-page":"778","DOI":"10.1109\/TASLP.2014.2303296","volume":"22","author":"R Sarikaya","year":"2014","unstructured":"Sarikaya, R., Hinton, G. E., & Deoras, A. (2014). Application of deep belief networks for natural language understanding. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP), 22(4), 778\u2013784.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP)"},{"key":"9681_CR380","doi-asserted-by":"crossref","unstructured":"Sarkar, R., Biswas, N., & Chakraborty, S. (2018). Music genre classification using frequency domain features. In\u00a02018 Fifth international conference on emerging applications of information technology (EAIT)\u00a0(pp. 1\u20134). Kolkata: IEEE.","DOI":"10.1109\/EAIT.2018.8470441"},{"issue":"05","key":"9681_CR381","doi-asserted-by":"crossref","first-page":"1850016","DOI":"10.1142\/S0218213018500161","volume":"27","author":"R Sarno","year":"2018","unstructured":"Sarno, R., Ridoean, J. A., Sunaryono, D., & Wijaya, D. R. (2018). Classification of music mood using MPEG-7 audio features and SVM with confidence interval. International Journal on Artificial Intelligence Tools, 27(05), 1850016.","journal-title":"International Journal on Artificial Intelligence Tools"},{"issue":"5","key":"9681_CR382","doi-asserted-by":"crossref","first-page":"32","DOI":"10.5120\/ijca2017913533","volume":"163","author":"M Sarode","year":"2017","unstructured":"Sarode, M., & Bhalke, D. G. (2017). Automatic music mood recognition using support vector regression. International Journal of Computers and Applications, 163(5), 32\u201335.","journal-title":"International Journal of Computers and Applications"},{"key":"9681_CR383","unstructured":"Sarroff, A. M., & Casey, M. A. (2014). Musical audio synthesis using autoencoding neural nets. In\u00a0ICMC."},{"issue":"5","key":"9681_CR384","doi-asserted-by":"crossref","first-page":"557","DOI":"10.1016\/j.jvoice.2017.01.006","volume":"31","author":"C Sauder","year":"2017","unstructured":"Sauder, C., Bretl, M., & Eadie, T. (2017). Predicting voice disorder status from smoothed measures of cepstral peak prominence using praat and analysis of dysphonia in speech and voice (ADSV). Journal of Voice, 31(5), 557\u2013566.","journal-title":"Journal of Voice"},{"issue":"1","key":"9681_CR385","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1007\/s12559-016-9439-z","volume":"9","author":"S Scardapane","year":"2017","unstructured":"Scardapane, S., & Uncini, A. (2017). Semi-supervised echo state networks for audio classification. Cognitive Computation, 9(1), 125\u2013135.","journal-title":"Cognitive Computation"},{"key":"9681_CR386","unstructured":"Scaringella, N., & Zoia, G. (2004). A real-time beat tracker for unrestricted audio signals.\u00a0In Proc. of SMC,\u00a04."},{"key":"9681_CR387","doi-asserted-by":"crossref","unstructured":"Scarpiniti, M., Scardapane, S., Comminiello, D., & Uncini, A. (2020). Music genre classification using stacked auto-encoders. In\u00a0Neural approaches to dynamics of signal exchanges\u00a0(pp. 11\u201319). Singapore: Springer.","DOI":"10.1007\/978-981-13-8950-4_2"},{"issue":"1017\/9781316676","key":"9681_CR388","first-page":"006","volume":"10","author":"KR Scherer","year":"2017","unstructured":"Scherer, K. R., Schuller, B. W., & Elkins, A. (2017). Computational analysis of vocal expression of affect: Trends and challenges. Social Signal Processing. DOI, 10(1017\/9781316676202), 006.","journal-title":"Social Signal Processing. DOI"},{"key":"9681_CR389","doi-asserted-by":"crossref","unstructured":"Schindler, A., & Rauber, A. (2015). An audio-visual approach to music genre classification through affective color features. In\u00a0European conference on information retrieval\u00a0(pp. 61\u201367). Cham: Springer.","DOI":"10.1007\/978-3-319-16354-3_8"},{"key":"9681_CR390","doi-asserted-by":"crossref","unstructured":"Schmitt, M., Ringeval, F., & Schuller, B. W. (2016). At the border of acoustics and linguistics: Bag-of-audio-words for the recognition of emotions in speech. In\u00a0Interspeech\u00a0(pp. 495\u2013499). San Francisco.","DOI":"10.21437\/Interspeech.2016-1124"},{"issue":"12","key":"9681_CR391","doi-asserted-by":"crossref","first-page":"2198","DOI":"10.1109\/TASLP.2015.2467964","volume":"23","author":"J Schr\u00f6der","year":"2015","unstructured":"Schr\u00f6der, J., Goetze, S., & Anem\u00fcller, J. (2015). Spectro-temporal Gabor filterbank features for acoustic event detection. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 23(12), 2198\u20132208.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR392","doi-asserted-by":"crossref","first-page":"42","DOI":"10.1016\/j.specom.2015.12.008","volume":"81","author":"J Sebastian","year":"2016","unstructured":"Sebastian, J., Kumar, M., & Murthy, H. A. (2016). An analysis of the high resolution property of group delay function with applications to audio signal processing. Speech Communication, 81, 42\u201353.","journal-title":"Speech Communication"},{"key":"9681_CR393","doi-asserted-by":"crossref","unstructured":"Sell, G., & Clark, P. (2014). Music tonality features for speech\/music discrimination. In\u00a02014 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2489\u20132493). Florence: IEEE.","DOI":"10.1109\/ICASSP.2014.6854048"},{"key":"9681_CR394","doi-asserted-by":"crossref","unstructured":"Sen, S., Dutta, A., & Dey, N. (2019a). Audio indexing. In\u00a0Audio processing and speech recognition\u00a0(pp. 1\u201311). Singapore: Springer.","DOI":"10.1007\/978-981-13-6098-5_1"},{"key":"9681_CR395","doi-asserted-by":"crossref","unstructured":"Sen, S., Dutta, A., & Dey, N. (2019b). Speech processing and recognition system. In\u00a0Audio processing and speech recognition\u00a0(pp. 13\u201343). Singapore: Springer.","DOI":"10.1007\/978-981-13-6098-5_2"},{"key":"9681_CR396","doi-asserted-by":"crossref","DOI":"10.1007\/978-981-13-6098-5","volume-title":"Audio Processing and Speech Recognition: Concepts, Techniques and Research Overviews","author":"S Sen","year":"2019","unstructured":"Sen, S., Dutta, A., & Dey, N. (2019c). Audio Processing and Speech Recognition: Concepts, Techniques and Research Overviews. Berlin: Springer."},{"issue":"3","key":"9681_CR397","first-page":"23","volume":"4","author":"ENW Senevirathna","year":"2015","unstructured":"Senevirathna, E. N. W., & Jayaratne, L. (2015). Audio music monitoring: Analyzing current techniques for song recognition and identification. GSTF Journal on Computing (JoC), 4(3), 23\u201334.","journal-title":"GSTF Journal on Computing (JoC)"},{"issue":"6","key":"9681_CR398","first-page":"436","volume":"36","author":"JS Seo","year":"2017","unstructured":"Seo, J. S., Kim, J., & Park, J. (2017). An investigation of chroma n-gram selection for cover song search. The Journal of the Acoustical Society of Korea, 36(6), 436\u2013441.","journal-title":"The Journal of the Acoustical Society of Korea"},{"issue":"1","key":"9681_CR399","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1080\/09298215.2014.916723","volume":"44","author":"NH Sephus","year":"2015","unstructured":"Sephus, N. H., Lanterman, A. D., & Anderson, D. V. (2015). Modulation spectral features: In pursuit of invariant representations of music with application to unsupervised source identification. Journal of New Music Research, 44(1), 58\u201370.","journal-title":"Journal of New Music Research"},{"key":"9681_CR400","doi-asserted-by":"crossref","unstructured":"Serizel, R., Bisot, V., Essid, S., & Richard, G. (2018). Acoustic features for environmental sound analysis. In\u00a0Computational analysis of sound scenes and events\u00a0(pp. 71\u2013101). Cham: Springer.","DOI":"10.1007\/978-3-319-63450-0_4"},{"issue":"3","key":"9681_CR401","doi-asserted-by":"crossref","first-page":"1481","DOI":"10.15680\/ijircce.2015.0303012","volume":"3","author":"S Shafee","year":"2015","unstructured":"Shafee, S., & Anuradha, B. (2015). Isolated Telugu speech recognition using MFCC and gamma tone features by radial basis networks in noisy environment. International Journal of Innovative Research in Computer and Communication Engineering (IJIRCCE), 3(3), 1481\u20131488.","journal-title":"International Journal of Innovative Research in Computer and Communication Engineering (IJIRCCE)"},{"key":"9681_CR402","doi-asserted-by":"crossref","unstructured":"Shakya, A., Gurung, B., Thapa, M.S., Rai, M., & Joshi, B. (2017). Music classification based on genre and mood. In\u00a0International conference on computational intelligence, communications, and business analytics\u00a0(pp. 168\u2013183). Singapore: Springer.","DOI":"10.1007\/978-981-10-6430-2_14"},{"key":"9681_CR403","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1016\/j.conb.2014.01.011","volume":"25","author":"S Shamma","year":"2014","unstructured":"Shamma, S., & Fritz, J. (2014). Adaptive auditory computations. Current Opinion in Neurobiology, 25, 164\u2013168.","journal-title":"Current Opinion in Neurobiology"},{"key":"9681_CR404","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1016\/j.neucom.2016.03.020","volume":"200","author":"RV Sharan","year":"2016","unstructured":"Sharan, R. V., & Moir, T. J. (2016). An overview of applications and advancements in automatic sound recognition. Neurocomputing, 200, 22\u201334.","journal-title":"Neurocomputing"},{"key":"9681_CR405","doi-asserted-by":"crossref","unstructured":"Sharma, S., Fulzele, P., & Sreedevi, I. (2018). Novel hybrid model for music genre classification based on support vector machine. In\u00a02018 IEEE symposium on computer applications & industrial electronics (ISCAIE)\u00a0(pp. 395\u2013400). Penang: IEEE.","DOI":"10.1109\/ISCAIE.2018.8405505"},{"key":"9681_CR406","doi-asserted-by":"crossref","unstructured":"Sharma, U., Maheshkar, S., & Mishra, A.N. (2015). Study of robust feature extraction techniques for speech recognition system. In\u00a02015 International conference on futuristic trends on computational analysis and knowledge management (ABLAZE)\u00a0(pp. 654\u2013658). Noida: IEEE.","DOI":"10.1109\/ABLAZE.2015.7154944"},{"key":"9681_CR407","doi-asserted-by":"crossref","unstructured":"Sharma, R., Murthy, Y. S., & Koolagudi, S. G. (2016). Audio songs classification based on music patterns. In\u00a0Proceedings of the second international conference on computer and communication technologies\u00a0(pp. 157\u2013166). New Delhi: Springer.","DOI":"10.1007\/978-81-322-2526-3_17"},{"issue":"1","key":"9681_CR408","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1007\/s11042-014-2292-8","volume":"75","author":"K Shirahama","year":"2016","unstructured":"Shirahama, K., & Grzegorzek, M. (2016). Towards large-scale multimedia retrieval enriched by knowledge about human interpretation. Multimedia Tools and Applications, 75(1), 297\u2013331.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR409","unstructured":"Siegler, M. A., Jain, U., Raj, B., & Stern, R. M. (1997). Automatic segmentation, classification and clustering of broadcast news audio. In\u00a0Proc. DARPA speech recognition workshop\u00a0(Vol. 1997)."},{"key":"9681_CR410","doi-asserted-by":"crossref","unstructured":"Singh, I., & Koolagudi, S. G. (2017). Classification of Punjabi folk musical instruments based on acoustic features. In\u00a0Proceedings of the international conference on data engineering and communication technology\u00a0(pp. 445\u2013454). Singapore: Springer.","DOI":"10.1007\/978-981-10-1675-2_44"},{"key":"9681_CR411","unstructured":"Smith, D., Cheng, E., & Burnett, I. S. (2010). Musical onset detection using MPEG-7 audio descriptors. In\u00a0Proceedings of the 20th international congress on acoustics (ICA), Sydney, Australia,\u00a0Vol. 2327, pp. 1014\u20131020."},{"key":"9681_CR412","unstructured":"Sonnleitner, R., Arzt, A., & Widmer, G. (2016). Landmark-based audio fingerprinting for DJ mix monitoring. In\u00a0ISMIR\u00a0(pp. 185\u2013191)."},{"key":"9681_CR413","doi-asserted-by":"crossref","unstructured":"Spanias, A. (2015). Advances in speech and audio processing and coding. In\u00a02015 6th international conference on information, intelligence, systems and applications (IISA)\u00a0(pp. 1\u20132). Corfu: IEEE.","DOI":"10.1109\/IISA.2015.7388064"},{"key":"9681_CR414","unstructured":"Stenzel, H., & Jackson, P. J. (2018). Perceptual thresholds of audio-visual spatial coherence for a variety of audio-visual objects. In\u00a0Audio engineering society conference: 2018 AES international conference on audio for virtual and augmented reality. Audio Engineering Society."},{"issue":"10","key":"9681_CR415","doi-asserted-by":"crossref","first-page":"1733","DOI":"10.1109\/TMM.2015.2428998","volume":"17","author":"D Stowell","year":"2015","unstructured":"Stowell, D., Giannoulis, D., Benetos, E., Lagrange, M., & Plumbley, M. D. (2015). Detection and classification of acoustic scenes and events. IEEE Transactions on Multimedia, 17(10), 1733\u20131746.","journal-title":"IEEE Transactions on Multimedia"},{"key":"9681_CR416","doi-asserted-by":"crossref","unstructured":"Strisciuglio, N., Vento, M., & Petkov, N. (2015). Bio-inspired filters for audio analysis. In\u00a0International workshop on brain-inspired computing\u00a0(pp. 101\u2013115). Cham: Springer.","DOI":"10.1007\/978-3-319-50862-7_8"},{"issue":"5","key":"9681_CR417","doi-asserted-by":"crossref","first-page":"571","DOI":"10.1525\/mp.2016.33.5.571","volume":"33","author":"J Stupacher","year":"2016","unstructured":"Stupacher, J., Hove, M. J., & Janata, P. (2016). Audio features underlying perceived groove and sensorimotor synchronization in music. Music Perception: An Interdisciplinary Journal, 33(5), 571\u2013589.","journal-title":"Music Perception: An Interdisciplinary Journal"},{"key":"9681_CR418","doi-asserted-by":"crossref","unstructured":"Subramaniam, A., Patel, V., Mishra, A., Balasubramanian, P., & Mittal, A. (2016). Bi-modal first impressions recognition using temporally ordered deep audio and stochastic visual features. In\u00a0European conference on computer vision\u00a0(pp. 337\u2013348). Cham: Springer.","DOI":"10.1007\/978-3-319-49409-8_27"},{"issue":"1","key":"9681_CR419","first-page":"486","volume":"7","author":"M Sudarma","year":"2017","unstructured":"Sudarma, M., & Harsemadi, I. G. (2017). Design and analysis system of KNN and ID3 algorithm for music classification based on mood feature extraction. International Journal of Electrical and Computer Engineering, 7(1), 486.","journal-title":"International Journal of Electrical and Computer Engineering"},{"issue":"1","key":"9681_CR420","doi-asserted-by":"crossref","first-page":"126","DOI":"10.1186\/1687-6180-2014-126","volume":"2014","author":"Y Suh","year":"2014","unstructured":"Suh, Y., & Kim, H. (2014). Discriminative likelihood score weighting based on acoustic-phonetic classification for speaker identification. EURASIP Journal on Advances in Signal Processing, 2014(1), 126.","journal-title":"EURASIP Journal on Advances in Signal Processing"},{"issue":"1","key":"9681_CR421","doi-asserted-by":"crossref","first-page":"253","DOI":"10.12928\/telkomnika.v17i1.11608","volume":"17","author":"L Sumarno","year":"2019","unstructured":"Sumarno, L., & Adi, K. (2019). The influence of sampling frequency on tone recognition of musical instruments. TELKOMNIKA, 17(1), 253\u2013260.","journal-title":"TELKOMNIKA"},{"key":"9681_CR422","doi-asserted-by":"crossref","unstructured":"Sur\u00eds, D., Duarte, A., Salvador, A., Torres, J., & Gir\u00f3-i-Nieto, X. (2018). Cross-modal embeddings for video and audio retrieval. In\u00a0Proceedings of the European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-11018-5_62"},{"key":"9681_CR423","unstructured":"Ta, K. (2016). Speaker recognition system usi stress Co."},{"issue":"6","key":"9681_CR424","doi-asserted-by":"crossref","first-page":"451","DOI":"10.17743\/jaes.2015.0056","volume":"63","author":"RG T\u00e1vora","year":"2015","unstructured":"T\u00e1vora, R. G., & Nascimento, F. A. (2015). Detecting replicas within audio evidence using an adaptive audio fingerprinting scheme. Journal of the Audio Engineering Society, 63(6), 451\u2013462.","journal-title":"Journal of the Audio Engineering Society"},{"key":"9681_CR425","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/j.procs.2017.11.004","volume":"121","author":"JP Teixeira","year":"2017","unstructured":"Teixeira, J. P., Fernandes, P. O., & Alves, N. (2017). Vocal acoustic analysis-classification of dysphonic voices with artificial neural networks. Procedia Computer Science, 121, 19\u201326.","journal-title":"Procedia Computer Science"},{"key":"9681_CR426","doi-asserted-by":"crossref","first-page":"114","DOI":"10.1016\/j.apacoust.2012.12.004","volume":"77","author":"T Thaler","year":"2014","unstructured":"Thaler, T., Poto\u010dnik, P., Bric, I., & Govekar, E. (2014). Chatter detection in band sawing based on discriminant analysis of sound features. Applied Acoustics, 77, 114\u2013121.","journal-title":"Applied Acoustics"},{"key":"9681_CR427","doi-asserted-by":"crossref","unstructured":"Tharwat, A., Gaber, T., Awad, Y. M., Dey, N., & Hassanien, A. E. (2016). Plants identification using feature fusion technique and bagging classifier. In\u00a0The 1st international conference on advanced intelligent system and informatics (AISI2015), November 28\u201330, 2015, Beni Suef, Egypt\u00a0(pp. 461\u2013471). Cham: Springer.","DOI":"10.1007\/978-3-319-26690-9_41"},{"issue":"11","key":"9681_CR428","doi-asserted-by":"crossref","first-page":"1","DOI":"10.5815\/ijitcs.2014.11.01","volume":"6","author":"T Theodorou","year":"2014","unstructured":"Theodorou, T., Mporas, I., & Fakotakis, N. (2014). An overview of automatic audio segmentation. International Journal of Information Technology and Computer Science (IJITCS), 6(11), 1.","journal-title":"International Journal of Information Technology and Computer Science (IJITCS)"},{"key":"9681_CR429","doi-asserted-by":"publisher","DOI":"10.1007\/s12652-017-0653-7","author":"SS Therese","year":"2017","unstructured":"Therese, S. S., & Lingam, C. (2017). A linear visual assessment tendency based clustering with power normalized cepstral coefficients for audio signal recognition system. Journal of Ambient Intelligence and Humanized Computing. https:\/\/doi.org\/10.1007\/s12652-017-0653-7.","journal-title":"Journal of Ambient Intelligence and Humanized Computing"},{"issue":"7","key":"9681_CR430","doi-asserted-by":"crossref","first-page":"2837","DOI":"10.1007\/s00034-017-0689-0","volume":"37","author":"R Thirumuru","year":"2018","unstructured":"Thirumuru, R., & Vuppala, A. K. (2018). Automatic detection of retroflex approximants in a continuous Tamil speech. Circuits, Systems, and Signal Processing, 37(7), 2837\u20132851.","journal-title":"Circuits, Systems, and Signal Processing"},{"key":"9681_CR431","doi-asserted-by":"crossref","unstructured":"Thiruvengatanadhan, R., Dhanalakshmi, P., & Palanivel, S. (2015). GMM based indexing and retrieval of music using MFCC and MPEG-7 features. In\u00a0Emerging ICT for bridging the future-proceedings of the 49th annual convention of the Computer Society of India (CSI) Vol. 1\u00a0(pp. 363\u2013370). Cham: Springer.","DOI":"10.1007\/978-3-319-13728-5_41"},{"key":"9681_CR432","doi-asserted-by":"crossref","unstructured":"Thomas, M., Murthy, Y. S., & Koolagudi, S. G. (2016). Detection of largest possible repeated patterns in indian audio songs using spectral features. In\u00a02016 IEEE Canadian conference on electrical and computer engineering (CCECE)\u00a0(pp. 1\u20135). Vancouver: IEEE.","DOI":"10.1109\/CCECE.2016.7726863"},{"key":"9681_CR433","doi-asserted-by":"crossref","unstructured":"Tian, M., & SANDLER, M. (2016). Music structural segmentation across genres with Gammatone features.","DOI":"10.1145\/2950066"},{"key":"9681_CR434","doi-asserted-by":"crossref","unstructured":"Torcoli, M., Freke-Morin, A., Paulus, J., Simon, C., & Shirley, B. (2019). Background ducking to produce esthetically pleasing audio for TV with clear speech. In\u00a0Audio Engineering Society convention 146. Audio Engineering Society.","DOI":"10.17743\/jaes.2019.0052"},{"key":"9681_CR435","unstructured":"Tralie, C. J., & Harer, J. (2017). Mobius beats: The twisted spaces of sliding window audio novelty functions with rhythmic subdivisions.\u00a0In 18th International Society for music information retrieval (ismir), late breaking session."},{"key":"9681_CR436","doi-asserted-by":"crossref","unstructured":"Trochidis, K., & Lui, S. (2015). Modeling affective responses to music using audio signal analysis and physiology. In\u00a0International symposium on computer music multidisciplinary research\u00a0(pp. 346\u2013357). Cham: Springer.","DOI":"10.1007\/978-3-319-46282-0_22"},{"key":"9681_CR437","doi-asserted-by":"crossref","unstructured":"Tu, W., Yang, Y., Du, B., Yang, W., Zhang, X., & Zheng, J. (2019). RNN-based signal classification for hybrid audio data compression.\u00a0Computing, pp.1\u201315.","DOI":"10.1007\/s00607-019-00713-8"},{"key":"9681_CR438","doi-asserted-by":"crossref","unstructured":"Twomey, R., & McCrea, M. (2017). Transforming the commonplace through machine perception: light field synthesis and audio feature extraction in the rover project. In\u00a0ACM SIGGRAPH 2017 art gallery\u00a0(pp. 400\u2013408). Los Angeles: ACM.","DOI":"10.1145\/3072940.3072967"},{"key":"9681_CR439","doi-asserted-by":"crossref","unstructured":"Upadhya, S. S., Cheeran, A. N., & Nirmal, J. H. (2017). Statistical comparison of jitter and shimmer voice features for healthy and Parkinson affected persons. In\u00a02017 second international conference on electrical, computer and communication technologies (ICECCT)\u00a0(pp. 1\u20136). Coimbatore: IEEE.","DOI":"10.1109\/ICECCT.2017.8117853"},{"issue":"7","key":"9681_CR440","doi-asserted-by":"crossref","first-page":"2679","DOI":"10.1016\/j.jfranklin.2015.04.001","volume":"352","author":"A Upadhyay","year":"2015","unstructured":"Upadhyay, A., & Pachori, R. B. (2015). Instantaneous voiced\/non-voiced detection in speech signals based on variational mode decomposition. Journal of the Franklin Institute, 352(7), 2679\u20132707.","journal-title":"Journal of the Franklin Institute"},{"key":"9681_CR441","unstructured":"Urbano, J., Bogdanov, D., Boyer, H., G\u00f3mez Guti\u00e9rrez, E., & Serra, X. (2014). What is the effect of audio quality on the robustness of MFCCs and chroma features?. In\u00a0Proceedings of the 15th conference of the international society for music information retrieval (ISMIR 2014); 2014 Oct 27-31; Taipei (pp. 573\u2013578). Taiwan: International Society for Music Information Retrieval."},{"issue":"5","key":"9681_CR442","first-page":"3511","volume":"8","author":"B Uzkent","year":"2012","unstructured":"Uzkent, B., Barkana, B. D., & Cevikalp, H. (2012). Non-speech environmental sound classification using SVMs with a new set of features. International Journal of Innovative Computing, Information and Control, 8(5), 3511\u20133524.","journal-title":"International Journal of Innovative Computing, Information and Control"},{"key":"9681_CR443","doi-asserted-by":"crossref","unstructured":"Valada, A., Spinello, L., & Burgard, W. (2018). Deep feature learning for acoustics-based terrain classification. In\u00a0Robotics research\u00a0(pp. 21\u201337). Cham: Springer.","DOI":"10.1007\/978-3-319-60916-4_2"},{"key":"9681_CR444","unstructured":"Valero, X., & Al\u00edas, F. (2012). Classification of audio scenes using narrow-band autocorrelation features. In\u00a02012 Proceedings of the 20th European signal processing conference (EUSIPCO). Bucharest: IEEE."},{"key":"9681_CR445","unstructured":"V\u00e4lim\u00e4ki, V. (2017). Analysis of audio signals."},{"key":"9681_CR446","unstructured":"van de Water, L. F. (2017).\u00a0Assessing stress at the workplace: An explorative study on measuring emotion using unobtrusive sensor techniques.\u00a0Master\u2019s thesis."},{"key":"9681_CR447","doi-asserted-by":"crossref","unstructured":"V\u00e1squez-Correa, J. C., Orozco-Arroyave, J. R., Arias-Londo\u00f1o, J. D., Vargas-Bonilla, J. F., & N\u00f6th, E. (2016). Non-linear dynamics characterization from wavelet packet transform for automatic recognition of emotional speech. In\u00a0Recent advances in nonlinear speech processing\u00a0(pp. 199\u2013207). Cham: Springer.","DOI":"10.1007\/978-3-319-28109-4_20"},{"issue":"3","key":"9681_CR448","doi-asserted-by":"crossref","first-page":"191","DOI":"10.1080\/09298215.2018.1458885","volume":"47","author":"G Velarde","year":"2018","unstructured":"Velarde, G., Cancino Chac\u00f3n, C., Meredith, D., Weyde, T., & Grachten, M. (2018). Convolution-based classification of audio and symbolic representations of music. Journal of New Music Research, 47(3), 191\u2013205.","journal-title":"Journal of New Music Research"},{"issue":"2","key":"9681_CR449","first-page":"67","volume":"2","author":"M Velayatipour","year":"2014","unstructured":"Velayatipour, M., & Mosleh, M. (2014). A review on speech-music discrimination methods. International Journal of Computer Science and Network Solution, 2(2), 67\u201378.","journal-title":"International Journal of Computer Science and Network Solution"},{"key":"9681_CR450","unstructured":"Verma, P., & Smith, J.O. (2018). Neural style transfer for audio spectograms.\u00a0http:\/\/arxiv.org\/abs\/1801.01589."},{"key":"9681_CR451","doi-asserted-by":"crossref","unstructured":"Vrysis, L., Tsipas, N., Dimoulas, C., & Papanikolaou, G. (2015). Mobile audio intelligence: From real time segmentation to crowd sourced semantics. In\u00a0Proceedings of the audio mostly 2015 on interaction with sound\u00a0(p. 37). Thessaloniki: ACM.","DOI":"10.1145\/2814895.2814906"},{"issue":"12","key":"9681_CR452","doi-asserted-by":"crossref","first-page":"1042","DOI":"10.17743\/jaes.2016.0051","volume":"64","author":"L Vrysis","year":"2016","unstructured":"Vrysis, L., Tsipas, N., Dimoulas, C., & Papanikolaou, G. (2016). Crowdsourcing audio semantics by means of hybrid bimodal segmentation with hierarchical classification. Journal of the Audio Engineering Society, 64(12), 1042\u20131054.","journal-title":"Journal of the Audio Engineering Society"},{"key":"9681_CR453","unstructured":"Vrysis, L., Tsipas, N., Dimoulas, C., & Papanikolaou, G. (2017). Extending Temporal Feature Integration for Semantic Audio Analysis. In\u00a0Audio Engineering Society convention 142. Audio Engineering Society."},{"key":"9681_CR454","doi-asserted-by":"crossref","first-page":"71","DOI":"10.1016\/j.dsp.2017.12.012","volume":"75","author":"S Waldekar","year":"2018","unstructured":"Waldekar, S., & Saha, G. (2018a). Classification of audio scenes with novel features in a fused system framework. Digital Signal Processing, 75, 71\u201382.","journal-title":"Digital Signal Processing"},{"key":"9681_CR455","doi-asserted-by":"crossref","unstructured":"Waldekar, S., & Saha, G. (2018b). Wavelet-based audio features for acoustic scene classification.\u00a0Tech. Rep., DCASE2018 challenge.","DOI":"10.21437\/Interspeech.2018-2083"},{"key":"9681_CR456","doi-asserted-by":"crossref","unstructured":"Wang, Y., & Hu, W. (2018). Speech emotion recognition based on improved MFCC. In\u00a0Proceedings of the 2nd international conference on computer science and application engineering\u00a0(p. 88). Hohhot: ACM.","DOI":"10.1145\/3207677.3278037"},{"issue":"4","key":"9681_CR457","doi-asserted-by":"crossref","first-page":"842","DOI":"10.1166\/jmihi.2018.2310","volume":"8","author":"C Wang","year":"2018","unstructured":"Wang, C., Li, Z., Dey, N., Li, Z., Ashour, A. S., Fong, S. J., et al. (2018). Histogram of oriented gradient based plantar pressure image feature extraction and classification employing fuzzy support vector machine. Journal of Medical Imaging and Health Informatics, 8(4), 842\u2013854.","journal-title":"Journal of Medical Imaging and Health Informatics"},{"issue":"2","key":"9681_CR458","doi-asserted-by":"crossref","first-page":"607","DOI":"10.1109\/TASE.2013.2285131","volume":"11","author":"JC Wang","year":"2013","unstructured":"Wang, J. C., Lin, C. H., Chen, B. W., & Tsai, M. K. (2013). Gabor-based nonuniform scale-frequency map for environmental sound classification in home automation. IEEE Transactions on Automation Science and Engineering, 11(2), 607\u2013613.","journal-title":"IEEE Transactions on Automation Science and Engineering"},{"issue":"5","key":"9681_CR459","first-page":"34","volume":"37","author":"H Wang","year":"2015","unstructured":"Wang, H., Liu, Z., & Song, Y. (2015). Analysis on wavelength components in pantograph-catenary contact force of electric railway based on multiple EEMD. Journal of the China Railway Society, 37(5), 34\u201341.","journal-title":"Journal of the China Railway Society"},{"key":"9681_CR460","doi-asserted-by":"crossref","unstructured":"Wang, Y., Neves, L., & Metze, F. (2016). Audio-based multimedia event detection using deep recurrent neural networks. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2742\u20132746). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7472176"},{"key":"9681_CR461","doi-asserted-by":"crossref","unstructured":"Wang, Y., Rawat, S., & Metze, F. (2014). Exploring audio semantic concepts for event-based video retrieval. In\u00a02014 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 1360\u20131364). Florence: IEEE.","DOI":"10.1109\/ICASSP.2014.6853819"},{"issue":"4","key":"9681_CR462","doi-asserted-by":"crossref","first-page":"282","DOI":"10.2174\/1574893614666190304125221","volume":"14","author":"Y Wang","year":"2019","unstructured":"Wang, Y., Shi, F., Cao, L., Dey, N., Wu, Q., Ashour, A. S., et al. (2019). Morphological segmentation analysis and texture-based support vector machines classification on mice liver fibrosis microscopic images. Current Bioinformatics, 14(4), 282\u2013294.","journal-title":"Current Bioinformatics"},{"key":"9681_CR463","unstructured":"Wang, J. C., Wang, J. F., He, K. W., & Hsu, C. S. (2006). Environmental sound classification using hybrid SVM\/KNN classifier and MPEG-7 audio low-level descriptor. In\u00a0The 2006 IEEE international joint conference on neural network proceedings\u00a0(pp. 1731\u20131735). Canada: IEEE."},{"key":"9681_CR464","doi-asserted-by":"crossref","unstructured":"Wang, K. C., Yang, Y. M., & Yang, Y. R. (2017). Speech\/music discrimination using hybrid-based feature extraction for audio data indexing. In\u00a02017 international conference on system science and engineering (ICSSE)\u00a0(pp. 515\u2013519). Ho Chi Minh City: IEEE.","DOI":"10.1109\/ICSSE.2017.8030927"},{"key":"9681_CR465","unstructured":"Wei\u00df, C. (2017).\u00a0Computational methods for tonality-based style analysis of classical music audio recordings. Doctoral dissertation, Technische Universit\u00e4t Ilmenau."},{"key":"9681_CR466","first-page":"33","volume":"32","author":"C Wei\u00df","year":"2015","unstructured":"Wei\u00df, C., & Schaab, M. (2015). On the Impact of key detection performance for identifying classical music styles. Work, 32, 33.","journal-title":"Work"},{"issue":"2","key":"9681_CR467","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1007\/s10844-017-0459-2","volume":"50","author":"A Wieczorkowska","year":"2018","unstructured":"Wieczorkowska, A., Kubera, E., S\u0142owik, T., & Skrzypiec, K. (2018). Spectral features for audio based vehicle and engine classification. Journal of Intelligent Information Systems, 50(2), 265\u2013290.","journal-title":"Journal of Intelligent Information Systems"},{"issue":"7\/8","key":"9681_CR468","doi-asserted-by":"crossref","first-page":"466","DOI":"10.17743\/jaes.2016.0029","volume":"64","author":"A Wilson","year":"2016","unstructured":"Wilson, A., & Fazenda, B. (2016). Variation in multitrack mixes: analysis of low-level audio signal features. Journal of the Audio Engineering Society, 64(7\/8), 466\u2013473.","journal-title":"Journal of the Audio Engineering Society"},{"key":"9681_CR469","doi-asserted-by":"crossref","unstructured":"Witkowski, M., Kacprzak, S., Zelasko, P., Kowalczyk, K., & Galka, J. (2017). Audio replay attack detection using high-frequency features. In\u00a0INTERSPEECH\u00a0(pp. 27\u201331).","DOI":"10.21437\/Interspeech.2017-776"},{"key":"9681_CR470","doi-asserted-by":"crossref","unstructured":"Won, M., Alsaadan, H., & Eun, Y. (2017). Adaptive audio classification for smartphone in noisy car environment. In\u00a0Proceedings of the 25th ACM international conference on Multimedia\u00a0(pp. 1672\u20131679). Mountain View: ACM.","DOI":"10.1145\/3123266.3123397"},{"key":"9681_CR471","doi-asserted-by":"crossref","unstructured":"Wu, Y., & Lee, T. (2018). Reducing model complexity for DNN based large-scale audio classification. In\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 331\u2013335). Calgary: IEEE.","DOI":"10.1109\/ICASSP.2018.8462168"},{"key":"9681_CR472","doi-asserted-by":"crossref","unstructured":"Wu, C. W., & Vinton, M. (2017). Blind bandwidth extension using k-means and support vector regression. In\u00a02017 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 721\u2013725). New Orleans: IEEE.","DOI":"10.1109\/ICASSP.2017.7952250"},{"key":"9681_CR473","unstructured":"Wyse, L. (2017). Audio spectrogram representations for processing with convolutional neural networks.\u00a0http:\/\/arxiv.org\/abs\/1706.09559."},{"key":"9681_CR475","doi-asserted-by":"crossref","unstructured":"Xiao, X., Zhao, S., Zhong, X., Jones, D. L., Chng, E. S., & Li, H. (2015). A learning-based approach to direction of arrival estimation in noisy and reverberant environments. In\u00a02015 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2814\u20132818). Brisbane: IEEE.","DOI":"10.1109\/ICASSP.2015.7178484"},{"key":"9681_CR476","unstructured":"Xie, J., Towsey, M., Truskinger, A., Eichinski, P., Zhang, J., & Roe, P. (2015). Acoustic classification of australian anurans using syllable features. In\u00a02015 IEEE tenth international conference on intelligent sensors, sensor networks and information processing (ISSNIP)\u00a0(pp. 1\u20136). Singapore: IEEE."},{"key":"9681_CR477","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1016\/j.eswa.2019.01.085","volume":"126","author":"J Xie","year":"2019","unstructured":"Xie, J., & Zhu, M. (2019). Investigation of acoustic and visual features for acoustic scene classification. Expert Systems with Applications, 126, 20\u201329.","journal-title":"Expert Systems with Applications"},{"key":"9681_CR478","doi-asserted-by":"crossref","unstructured":"Xu, Y., Kong, Q., Wang, W., & Plumbley, M. D. (2018). Large-scale weakly supervised audio classification using gated convolutional neural network. In\u00a02018 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 121\u2013125). Calgary: IEEE.","DOI":"10.1109\/ICASSP.2018.8461975"},{"issue":"5","key":"9681_CR479","doi-asserted-by":"crossref","first-page":"978","DOI":"10.1109\/TASLP.2016.2537203","volume":"24","author":"H Xu","year":"2016","unstructured":"Xu, H., & Ou, Z. (2016). Scalable discovery of audio fingerprint motifs in broadcast streams with determinantal point process based motif clustering. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 24(5), 978\u2013989.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"key":"9681_CR480","doi-asserted-by":"crossref","first-page":"385","DOI":"10.1016\/j.measurement.2016.05.073","volume":"91","author":"X Xu","year":"2016","unstructured":"Xu, X., Zhao, M., Lin, J., & Lei, Y. (2016). Envelope harmonic-to-noise ratio for periodic impulses detection and its application to bearing diagnosis. Measurement, 91, 385\u2013397.","journal-title":"Measurement"},{"key":"9681_CR482","doi-asserted-by":"crossref","unstructured":"Yadati, K., Liem, C., Larson, M., & Hanjalic, A. (2017). On the automatic identification of music for common activities. In\u00a0Proceedings of the 2017 ACM on international conference on multimedia retrieval\u00a0(pp. 192\u2013200). Bucharest: ACM.","DOI":"10.1145\/3078971.3078997"},{"key":"9681_CR483","doi-asserted-by":"crossref","unstructured":"Yamada, M., Doeda, O., Matsuo, A., Hara, Y., & Mine, K. (2017). A rhythm practice support system with annotation-free real-time onset detection. In\u00a02017 International conference on advanced informatics, concepts, theory, and applications (ICAICTA)\u00a0(pp. 1\u20136). Denpasar: IEEE.","DOI":"10.1109\/ICAICTA.2017.8090995"},{"issue":"1","key":"9681_CR484","doi-asserted-by":"crossref","first-page":"56","DOI":"10.3813\/AAA.919033","volume":"103","author":"L Yang","year":"2017","unstructured":"Yang, L., & Chen, K. (2017). Performance comparison of two types of auditory perceptual features in robust underwater target classification. Acta Acustica United with Acustica, 103(1), 56\u201366.","journal-title":"Acta Acustica United with Acustica"},{"issue":"11","key":"9681_CR485","doi-asserted-by":"crossref","first-page":"3101","DOI":"10.1007\/s00500-015-1994-9","volume":"21","author":"J Yang","year":"2017","unstructured":"Yang, J., Deng, J., Li, S., & Hao, Y. (2017a). Improved traffic detection with support vector machine based on restricted Boltzmann machine. Soft Computing, 21(11), 3101\u20133112.","journal-title":"Soft Computing"},{"issue":"1","key":"9681_CR486","doi-asserted-by":"crossref","first-page":"9","DOI":"10.1186\/s13636-016-0086-9","volume":"2016","author":"XK Yang","year":"2016","unstructured":"Yang, X. K., He, L., Qu, D., Zhang, W. Q., & Johnson, M. T. (2016a). Semi-supervised feature selection for audio classification based on constraint compensated Laplacian score. EURASIP Journal on Audio, Speech, and Music Processing, 2016(1), 9\u201318.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"9681_CR487","doi-asserted-by":"crossref","unstructured":"Yang, L., Jiang, D., He, L., Pei, E., Oveneke, M. C., & Sahli, H. (2016b). Decision tree based depression classification from audio video and language information. In\u00a0Proceedings of the 6th international workshop on audio\/visual emotion challenge\u00a0(pp. 89\u201396). Amsterdam: ACM.","DOI":"10.1145\/2988257.2988269"},{"issue":"6","key":"9681_CR488","doi-asserted-by":"crossref","first-page":"1315","DOI":"10.1109\/TASLP.2017.2690558","volume":"25","author":"W Yang","year":"2017","unstructured":"Yang, W., Krishnan, S., Yang, W., & Krishnan, S. (2017b). Combining temporal features by local binary pattern for acoustic scene classification. IEEE\/ACM Transactions on Audio, Speech and Language Processing, 25(6), 1315\u20131321.","journal-title":"IEEE\/ACM Transactions on Audio, Speech and Language Processing"},{"issue":"10","key":"9681_CR489","doi-asserted-by":"crossref","first-page":"3579","DOI":"10.1007\/s11042-013-1670-y","volume":"74","author":"SD You","year":"2015","unstructured":"You, S. D., & Chen, W. H. (2015). Comparative study of methods for reducing dimensionality of MPEG-7 audio signature descriptors. Multimedia Tools and Applications, 74(10), 3579\u20133598.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR490","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1016\/j.bspc.2016.11.005","volume":"33","author":"M You","year":"2017","unstructured":"You, M., Liu, Z., Chen, C., Liu, J., Xu, X. H., & Qiu, Z. M. (2017). Cough detection by ensembling multiple frequency subband features. Biomedical Signal Processing and Control, 33, 132\u2013140.","journal-title":"Biomedical Signal Processing and Control"},{"key":"9681_CR491","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1016\/j.ins.2014.11.040","volume":"298","author":"XC Yuan","year":"2015","unstructured":"Yuan, X. C., Pun, C. M., & Chen, C. P. (2015). Robust Mel-Frequency Cepstral coefficients feature detection and dual-tree complex wavelet transform for digital audio watermarking. Information Sciences, 298, 159\u2013179.","journal-title":"Information Sciences"},{"key":"9681_CR492","doi-asserted-by":"crossref","unstructured":"Zahid, S., Hussain, F., Rashid, M., Yousaf, M. H., & Habib, H. A. (2015). Optimized audio classification and segmentation algorithm by using ensemble methods.\u00a0Mathematical Problems in Engineering,\u00a02015.","DOI":"10.1155\/2015\/209814"},{"issue":"5","key":"9681_CR493","doi-asserted-by":"crossref","first-page":"899","DOI":"10.1109\/TASLP.2014.2312541","volume":"22","author":"L Zao","year":"2014","unstructured":"Zao, L., Coelho, R., & Flandrin, P. (2014). Speech enhancement with emd and hurst-based mode selection. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 22(5), 899\u2013911.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"9681_CR494","doi-asserted-by":"crossref","unstructured":"Zeiler, S., Nicheli, R., Ma, N., Brown, G. J., & Kolossa, D. (2016). Robust audiovisual speech recognition using noise-adaptive linear discriminant analysis. In\u00a02016 IEEE international conference on acoustics, speech and signal processing (ICASSP)\u00a0(pp. 2797\u20132801). Shanghai: IEEE.","DOI":"10.1109\/ICASSP.2016.7472187"},{"issue":"1","key":"9681_CR495","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1166\/jmihi.2016.1591","volume":"6","author":"N Zemmal","year":"2016","unstructured":"Zemmal, N., Azizi, N., Dey, N., & Sellami, M. (2016). Adaptive semi supervised support vector machine semi supervised learning with features cooperation for breast cancer classification. Journal of Medical Imaging and Health Informatics, 6(1), 53\u201362.","journal-title":"Journal of Medical Imaging and Health Informatics"},{"issue":"3","key":"9681_CR496","doi-asserted-by":"crossref","first-page":"3705","DOI":"10.1007\/s11042-017-5539-3","volume":"78","author":"Y Zeng","year":"2019","unstructured":"Zeng, Y., Mao, H., Peng, D., & Yi, Z. (2019). Spectrogram based multi-task audio classification. Multimedia Tools and Applications, 78(3), 3705\u20133722.","journal-title":"Multimedia Tools and Applications"},{"key":"9681_CR497","first-page":"225","volume":"462","author":"Y Zhang","year":"2014","unstructured":"Zhang, Y., Lv, D. J., & Wang, H. S. (2014). The application of multiple classifier system for environmental audio classification. Applied Mechanics and Materials, 462, 225\u2013229.","journal-title":"Applied Mechanics and Materials"},{"key":"9681_CR498","first-page":"3649","volume":"2019","author":"S Zhang","year":"2019","unstructured":"Zhang, S., Qin, Y., Sun, K., & Lin, Y. (2019). Few-shot audio classification with attentional graph neural networks. Proceedings of INTERSPEECH, 2019, 3649\u20133653.","journal-title":"Proceedings of INTERSPEECH"},{"key":"9681_CR499","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1016\/j.apacoust.2016.03.027","volume":"110","author":"L Zhang","year":"2016","unstructured":"Zhang, L., Towsey, M., Xie, J., Zhang, J., & Roe, P. (2016). Using multi-label classification for acoustic pattern detection and assisting bird species surveys. Applied Acoustics, 110, 91\u201398.","journal-title":"Applied Acoustics"},{"issue":"2","key":"9681_CR500","first-page":"311","volume":"6","author":"QY Zhang","year":"2015","unstructured":"Zhang, Q. Y., Xing, P. F., Huang, Y. B., Dong, R. H., & Yang, Z. P. (2015a). An efficient speech perceptual hashing authentication algorithm based on wavelet packet decomposition. Journal of Information Hiding and Multimedia Signal Processing, 6(2), 311\u2013322.","journal-title":"Journal of Information Hiding and Multimedia Signal Processing"},{"issue":"1","key":"9681_CR501","doi-asserted-by":"crossref","first-page":"6","DOI":"10.1186\/s13636-015-0050-0","volume":"2015","author":"X Zhang","year":"2015","unstructured":"Zhang, X., Zhu, B., Li, L., Li, W., Li, X., Wang, W., et al. (2015b). SIFT-based local spectrogram image descriptor: A novel feature for robust music identification. EURASIP Journal on Audio, Speech, and Music Processing, 2015(1), 6.","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"9681_CR502","doi-asserted-by":"crossref","unstructured":"Zhao, S., Zhang, Y., Xu, H., & Han, T. (2019). Ensemble classification based on feature selection for environmental sound recognition.\u00a0Mathematical Problems in Engineering,\u00a02019.","DOI":"10.1155\/2019\/4318463"},{"key":"9681_CR503","doi-asserted-by":"crossref","unstructured":"Zieli\u0144ski, S. K. (2018). Feature extraction of surround sound recordings for acoustic scene classification. In\u00a0International conference on artificial intelligence and soft computing\u00a0(pp. 475\u2013486). Cham: Springer.","DOI":"10.1007\/978-3-319-91262-2_43"},{"key":"9681_CR504","unstructured":"Zirmite, M. P. P., Patil, M. M. K., & Salgar, M. S. P. (2016). Separating voiced segments from music file using MFCC, ZCR and GMM."},{"key":"9681_CR505","first-page":"25","volume":"9","author":"YX Zong","year":"2016","unstructured":"Zong, Y. X., Zhang, L., Li, T. J., & Ding, Y. H. (2016a). System design for fault diagnosis based on EMD-ICA audio feature extraction. Machinery Design & Manufacture, 9, 25.","journal-title":"Machinery Design & Manufacture"},{"issue":"2","key":"9681_CR506","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1007\/s12193-015-0210-7","volume":"10","author":"Y Zong","year":"2016","unstructured":"Zong, Y., Zheng, W., Huang, X., Yan, K., Yan, J., & Zhang, T. (2016b). Emotion recognition in the wild via sparse transductive transfer linear discriminant analysis. Journal on Multimodal User Interfaces, 10(2), 163\u2013172.","journal-title":"Journal on Multimodal User Interfaces"},{"key":"9681_CR507","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1016\/j.buildenv.2018.05.001","volume":"139","author":"S Zuhaib","year":"2018","unstructured":"Zuhaib, S., Manton, R., Griffin, C., Hajdukiewicz, M., Keane, M. M., & Goggins, J. (2018). An indoor environmental quality (IEQ) assessment of a partially-retrofitted university building. Building and Environment, 139, 69\u201385.","journal-title":"Building and Environment"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-020-09681-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-020-09681-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-020-09681-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,26]],"date-time":"2023-09-26T05:40:59Z","timestamp":1695706859000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-020-09681-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,2,3]]},"references-count":504,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2021,12]]}},"alternative-id":["9681"],"URL":"https:\/\/doi.org\/10.1007\/s10772-020-09681-3","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,2,3]]},"assertion":[{"value":"15 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}