{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T12:54:22Z","timestamp":1773406462576,"version":"3.50.1"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2015,12,1]],"date-time":"2015-12-01T00:00:00Z","timestamp":1448928000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"name":"the Spanish Government and the European Union (FEDER)","award":["TIN2011-28169-C05-02"],"award-info":[{"award-number":["TIN2011-28169-C05-02"]}]},{"name":"the European Regional Development Fund and the Spanish Government","award":["TEC2012-38939-C03"],"award-info":[{"award-number":["TEC2012-38939-C03"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1186\/s13636-015-0076-3","type":"journal-article","created":{"date-parts":[[2015,11,30]],"date-time":"2015-11-30T23:04:46Z","timestamp":1448924686000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Albayz\u00edn-2014 evaluation: audio segmentation and classification in broadcast news domains"],"prefix":"10.1186","volume":"2015","author":[{"given":"Diego","family":"Cast\u00e1n","sequence":"first","affiliation":[]},{"given":"David","family":"Tavarez","sequence":"additional","affiliation":[]},{"given":"Paula","family":"Lopez-Otero","sequence":"additional","affiliation":[]},{"given":"Javier","family":"Franco-Pedroso","sequence":"additional","affiliation":[]},{"given":"H\u00e9ctor","family":"Delgado","sequence":"additional","affiliation":[]},{"given":"Eva","family":"Navas","sequence":"additional","affiliation":[]},{"given":"Laura","family":"Docio-Fern\u00e1ndez","sequence":"additional","affiliation":[]},{"given":"Daniel","family":"Ramos","sequence":"additional","affiliation":[]},{"given":"Javier","family":"Serrano","sequence":"additional","affiliation":[]},{"given":"Alfonso","family":"Ortega","sequence":"additional","affiliation":[]},{"given":"Eduardo","family":"Lleida","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2015,12,1]]},"reference":[{"key":"76_CR1","doi-asserted-by":"publisher","first-page":"993","DOI":"10.1109\/ICASSP.1996.543290","volume-title":"1996 IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings","author":"J Saunders","year":"1996","unstructured":"J Saunders, in 1996 IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings, 2. Real-time discrimination of broadcast speech\/music (IEEEAtlanta, 1996), pp. 993\u2013996."},{"key":"76_CR2","doi-asserted-by":"publisher","first-page":"1331","DOI":"10.1109\/ICASSP.1997.596192","volume":"2","author":"E Scheirer","year":"1997","unstructured":"E Scheirer, M Slaney, Construction and evaluation of a robust multifeature speech\/music discriminator. 1997 IEEE International Conference on Acoustics, Speech, and Signal Processing. 2, 1331\u20131334 (1997).","journal-title":"1997 IEEE International Conference on Acoustics, Speech, and Signal Processing"},{"key":"76_CR3","doi-asserted-by":"crossref","first-page":"393","DOI":"10.1145\/319463.319658","volume-title":"Proceedings of the Seventh ACM International Conference on Multimedia","author":"S Srinivasan","year":"1999","unstructured":"S Srinivasan, D Petkovic, D Ponceleon, in Proceedings of the Seventh ACM International Conference on Multimedia. Towards robust features for classifying audio in the CueVideo system (ACMNew York City, NY, 1999), pp. 393\u2013400."},{"issue":"3","key":"76_CR4","doi-asserted-by":"publisher","first-page":"1062","DOI":"10.1109\/TSA.2005.857573","volume":"14","author":"S Kiranyaz","year":"2006","unstructured":"S Kiranyaz, AF Qureshi, M Gabbouj, A generic audio classification and segmentation approach for multimedia indexing and retrieval. IEEE Trans. Audio Speech Lang. Process. 14(3), 1062\u20131081 (2006).","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"76_CR5","first-page":"2282","volume-title":"Proc. Interspeech","author":"Z Huang","year":"2013","unstructured":"Z Huang, Y-c Cheng, K Li, V Hautamaki, C-h Lee, in Proc. Interspeech. A blind segmentation approach to acoustic event detection based on i-vector (ISCALyon, 2013), pp. 2282\u20132286."},{"key":"76_CR6","unstructured":"NIST, TREC NIST Evaluations. http:\/\/www.itl.nist.gov\/iad\/mig\/\/tests\/sdr\/ Accessed 23 Nov 2015."},{"key":"76_CR7","first-page":"3","volume-title":"Interspeech","author":"S Galliano","year":"2005","unstructured":"S Galliano, E Geoffrois, D Mostefa, in Interspeech. The ESTER phase II evaluation campaign for the rich transcription of French broadcast news (ISCALisbon, 2005), pp. 3\u20136."},{"key":"76_CR8","volume-title":"Interspeech","author":"J Zibert","year":"2005","unstructured":"J Zibert, F Mihelic, J Martens, H Meinedo, J Neto, L Docio, C Garcia-Mateo, P David, E Al, in Interspeech. The COST278 broadcast news segmentation and speaker clustering evaluation-overview, methodology, systems, results (ISCALisbon, 2005)."},{"key":"76_CR9","first-page":"305","volume-title":"II Iberian SLTech","author":"T Butko","year":"2010","unstructured":"T Butko, CN Camprub\u00ed, H Schulz, in II Iberian SLTech. Albayzin-2010 audio segmentation evaluation: evaluation setup and results (FALAVigo, 2010), pp. 305\u2013308."},{"key":"76_CR10","unstructured":"A Ortega, D Castan, A Miguel, E Lleida, The Albayzin 2012 Audio Segmentation Evaluation (2012). http:\/\/dcastan.vivolab.es\/wp-content\/papercite-data\/pdf\/ortega2012.pdf Accessed 23 Nov 2015."},{"key":"76_CR11","unstructured":"Tecnoparla, Tecnoparla Project. http:\/\/www.talp.upc.edu\/tecnoparla ."},{"key":"76_CR12","volume-title":"Proceedings of the 21st ACM International Conference on Multimedia","author":"F Font","year":"2013","unstructured":"F Font, G Roma, X Serra, in Proceedings of the 21st ACM International Conference on Multimedia. Freesound technical demo (ACMBarcelona, Spain, 2013)."},{"key":"76_CR13","unstructured":"G Hu, 100 non-speech environmental sounds. http:\/\/www.cse.ohio-state.edu\/dwang\/pnl\/corpus\/HuCorpus.html Accessed 23 Nov 2015."},{"key":"76_CR14","unstructured":"NIST, The 2009 (RT-09) Rich transcription meeting recognition evaluation plan. http:\/\/www.itl.nist.gov\/iad\/mig\/tests\/rt\/2009\/docs\/rt09-meeting-eval-plan-v2.pdf Accessed 23 Nov 2015."},{"key":"76_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2009\/239892","volume":"2009","author":"Y Lavner","year":"2009","unstructured":"Y Lavner, D Ruinskiy, A decision-tree-based algorithm for speech\/music classification and segmentation. EURASIP J. Audio Speech Music Process. 2009, 1\u201315 (2009).","journal-title":"EURASIP J. Audio Speech Music Process."},{"key":"76_CR16","first-page":"1","volume":"1","author":"T Theodorou","year":"2014","unstructured":"T Theodorou, I Mporas, N Fakotakis, An overview of automatic audio segmentation. I.J. Inf. Technol. Comput. Sci. 1, 1\u20139 (2014).","journal-title":"I.J. Inf. Technol. Comput. Sci."},{"key":"76_CR17","first-page":"93","volume-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings","author":"S Imai","year":"1983","unstructured":"S Imai, in IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings. Cepstral analysis synthesis on the mel frequency scale (IEEEBoston, 1983), pp. 93\u201396."},{"key":"76_CR18","doi-asserted-by":"crossref","first-page":"323","DOI":"10.1109\/ICASSP.1996.541097","volume-title":"IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings","author":"R Vergin","year":"1996","unstructured":"R Vergin, D O\u2019Shaughnessy, V Gupta, in IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings. Compensated mel frequency cepstrum coefficients, vol. 1 (IEEEAtlanta, 1996), pp. 323\u2013326."},{"issue":"5","key":"76_CR19","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1109\/89.784104","volume":"7","author":"R Vergin","year":"1999","unstructured":"R Vergin, Generalized mel frequency cepstral coefficients for large-vocabulary speaker-independent continuous-speech recognition. IEEE Trans. Speech and Audio Process. 7(5), 525\u2013532 (1999).","journal-title":"IEEE Trans. Speech and Audio Process."},{"key":"76_CR20","first-page":"95","volume-title":"International Symposium on Intelligent Multimedia, Video and Speech Processing","author":"E Wong","year":"2001","unstructured":"E Wong, S Sridharan, in International Symposium on Intelligent Multimedia, Video and Speech Processing. Comparison of linear prediction cepstrum coefficients and mel-frequency cepstrum coefficients for language identification (Kowloon Shangri-LaHong Kong, 2001), pp. 95\u201398."},{"key":"76_CR21","unstructured":"M Hasan, M Jamil, M Rahman, in International Conference on Computer and Electrical Engineering. Speaker identification using Mel frequency cepstral coefficients (Dhaka, 2004), pp. 28\u201330."},{"issue":"1","key":"76_CR22","doi-asserted-by":"publisher","first-page":"716","DOI":"10.1016\/j.asoc.2009.12.033","volume":"11","author":"P Dhanalakshmi","year":"2011","unstructured":"P Dhanalakshmi, S Palanivel, V Ramalingam, Classification of audio signals using AANN and GMM. Appl. Soft Comput. 11(1), 716\u2013723 (2011).","journal-title":"Appl. Soft Comput."},{"issue":"2","key":"76_CR23","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1007\/s00530-010-0205-x","volume":"17","author":"L Xie","year":"2011","unstructured":"L Xie, W Fu, Z-H Feng, Y Luo, Pitch-density-based features and an SVM binary tree approach for multi-class audio classification in broadcast news. Multimedia Syst. 17(2), 101\u2013112 (2011).","journal-title":"Multimedia Syst."},{"key":"76_CR24","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1016\/S0167-8655(00)00119-7","volume":"22","author":"D Li","year":"2001","unstructured":"D Li, I Sethi, N Dimitrova, T McGee, Classification of general audio data for content-based retrieval. Elsevier, Pattern Recogn. Lett. 22, 533\u2013544 (2001).","journal-title":"Elsevier, Pattern Recogn. Lett."},{"issue":"7","key":"76_CR25","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1109\/TSA.2002.804546","volume":"10","author":"L Lu","year":"2002","unstructured":"L Lu, H Zhang, H Jiang, Content analysis for audio classification and segmentation. IEEE Trans. Speech Audio Process. 10(7), 504\u2013516 (2002).","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"76_CR26","first-page":"1065","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing","author":"TL Nwe","year":"2005","unstructured":"TL Nwe, H Li, in IEEE International Conference on Acoustics, Speech and Signal Processing. Broadcast news segmentation by audio type analysis, vol. 2 (IEEEPhiladelphia, 2005), p. 1065."},{"key":"76_CR27","volume-title":"Proc. TRECVID","author":"A Hauptmann","year":"2003","unstructured":"A Hauptmann, R Baron, M Chen, in Proc. TRECVID. Informedia at TRECVID 2003: analyzing and searching broadcast news video (NISTGaithersburg, 2003)."},{"key":"76_CR28","unstructured":"S Dharanipragada, M Franz, Story segmentation and topic detection in the broadcast news domain. DARPA Broadcast News Workshop, 1\u20134 (1999). http:\/\/www.itl.nist.gov\/iad\/mig\/publications\/proceedings\/darpa99\/html\/abstract.htm Accessed 23 Nov 2015."},{"issue":"7","key":"76_CR29","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1109\/LSP.2010.2049877","volume":"17","author":"A Gallardo-Antol\u00edn","year":"2010","unstructured":"A Gallardo-Antol\u00edn, J Montero, Histogram equalization-based features for speech, music, and song discrimination. Signal Process. Lett. 17(7), 659\u2013662 (2010).","journal-title":"Signal Process. Lett."},{"issue":"1","key":"76_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1687-4722-2011-1","volume":"2011","author":"T Butko","year":"2011","unstructured":"T Butko, C Nadeu, Audio segmentation of broadcast news in the Albayzin-2010 evaluation: overview, results, and discussion. EURASIP J. Audio Speech Music Process. 2011(1), 1 (2011).","journal-title":"EURASIP J. Audio Speech Music Process."},{"issue":"5","key":"76_CR31","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1016\/j.specom.2010.08.007","volume":"53","author":"M Markaki","year":"2011","unstructured":"M Markaki, Y Stylianou, Discrimination of speech from nonspeech in broadcast news based on modulation frequency features. Speech Commun. 53(5), 726\u2013735 (2011).","journal-title":"Speech Commun."},{"issue":"3","key":"76_CR32","doi-asserted-by":"publisher","first-page":"907","DOI":"10.1109\/TSA.2005.858057","volume":"14","author":"R Huang","year":"2006","unstructured":"R Huang, J Hansen, Advances in unsupervised audio classification and segmentation for the broadcast news and NGSW corpora. IEEE Trans. Audio Speech Lang. Process. 14(3), 907\u2013919 (2006).","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"76_CR33","unstructured":"SS Chen, PS Gopalakrishnan, in Proc. DARPA Broadcast News Workshop. Speaker, environment and channel change detection and clustering via the Bayesian information criterion (Lansdowne, 1998)."},{"issue":"1","key":"76_CR34","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1109\/TSA.2005.852992","volume":"14","author":"Y Wu","year":"2006","unstructured":"Y Wu, C-h Chiu, Automatic segmentation and identification of mixed-language speech using delta-BIC and LSA-based GMMs. IEEE Trans. Audio Speech Lang. Process. 14(1), 266\u2013276 (2006).","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"5","key":"76_CR35","doi-asserted-by":"publisher","first-page":"920","DOI":"10.1109\/TASL.2008.925152","volume":"16","author":"M Kotti","year":"2008","unstructured":"M Kotti, E Benetos, C Kotropoulos, Computationally efficient and robust BIC-based speaker segmentation. IEEE Trans. Audio Speech Lang. Process. 16(5), 920\u2013933 (2008).","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"76_CR36","volume-title":"Proc. Interspeech","author":"A Misra","year":"2012","unstructured":"A Misra, in Proc. Interspeech. Speech\/Nonspeech Segmentation in Web Videos (ISCAPortland, 2012)."},{"key":"76_CR37","first-page":"1","volume":"34","author":"D Cast\u00e1n","year":"2014","unstructured":"D Cast\u00e1n, A Ortega, A Miguel, E Lleida, Audio segmentation-by-classification approach based on factor analysis in broadcast news domain. EURASIP J. Audio Speech Music Process. 34, 1\u201313 (2014).","journal-title":"EURASIP J. Audio Speech Music Process"},{"key":"76_CR38","unstructured":"A Mesa, JLN Ortega, A Teixeira, EH P\u00e9rez, PQ Morales, AR Garcia, IG Moreno, IberSPEECH 2014: VIII Jornadas en Tecnolog\u00edas del Habla and IV Iberian SLTech Workshop (2014). http:\/\/iberspeech2014.ulpgc.es\/index.php\/online Accessed 23 Nov 2015."},{"key":"76_CR39","first-page":"2118","volume-title":"INTERSPEECH 2010, 11th Annual Conference of the International Speech Communication Association, Makuhari, Chiba, Japan, September 26-30, 2010","author":"X Anguera","year":"2010","unstructured":"X Anguera, J Bonastre, in INTERSPEECH 2010, 11th Annual Conference of the International Speech Communication Association, Makuhari, Chiba, Japan, September 26-30, 2010. A novel speaker binary key derived from anchor models (ISCAMakuhari, Chiba, Japan, 2010), pp. 2118\u20132121."},{"key":"76_CR40","first-page":"572","volume-title":"INTERSPEECH 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14\u201318, 2014","author":"H Delgado","year":"2014","unstructured":"H Delgado, C Fredouille, J Serrano, in INTERSPEECH 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14\u201318, 2014. Towards a complete binary key system for the speaker diarization task (ISCASingapore, 2014), pp. 572\u2013576."},{"issue":"1","key":"76_CR41","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/BF01238023","volume":"1","author":"J Kittler","year":"1998","unstructured":"J Kittler, Combining classifiers: A theoretical framework. Pattern Anal. Appl. 1(1), 18\u201327 (1998).","journal-title":"Pattern Anal. Appl."}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-015-0076-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13636-015-0076-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-015-0076-3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-015-0076-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T23:11:24Z","timestamp":1567379484000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13636-015-0076-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,12]]},"references-count":41,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2015,12]]}},"alternative-id":["76"],"URL":"https:\/\/doi.org\/10.1186\/s13636-015-0076-3","relation":{},"ISSN":["1687-4722"],"issn-type":[{"value":"1687-4722","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,12]]},"article-number":"33"}}