{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T22:14:27Z","timestamp":1778796867847,"version":"3.51.4"},"reference-count":80,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,1,13]],"date-time":"2016-01-13T00:00:00Z","timestamp":1452643200000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1186\/s13636-016-0080-2","type":"journal-article","created":{"date-parts":[[2016,1,19]],"date-time":"2016-01-19T11:14:37Z","timestamp":1453202077000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Comparison of ALBAYZIN query-by-example spoken term detection 2012 and 2014 evaluations"],"prefix":"10.1186","volume":"2016","author":[{"given":"Javier","family":"Tejedor","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Doroteo T.","family":"Toledano","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Paula","family":"Lopez-Otero","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Laura","family":"Docio-Fernandez","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carmen","family":"Garcia-Mateo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,1,13]]},"reference":[{"key":"80_CR1","first-page":"3001","volume-title":"Hierarchical classification of audio data for archiving and retrieving","author":"T Zhang","year":"1999","unstructured":"T Zhang, C-CJ Kuo, in Hierarchical classification of audio data for archiving and retrieving. Proc. of ICASSP (IEEEWashington DC, USA, 1999), pp. 3001\u20133004."},{"key":"80_CR2","first-page":"225","volume-title":"Query by example of audio signals using Euclidean distance between Gaussian Mixture Models","author":"M Hel\u00e9n","year":"2007","unstructured":"M Hel\u00e9n, T Virtanen, in Query by example of audio signals using Euclidean distance between Gaussian Mixture Models. Proc. of ICASSP (IEEEWashington DC, USA, 2007), pp. 225\u2013228."},{"key":"80_CR3","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1186\/1687-4722-2010-179303","volume":"2010","author":"M Hel\u00e9n","year":"2010","unstructured":"M Hel\u00e9n, T Virtanen, Audio query by example using similarity measures between probability density functions of features. EURASIP, Journal on Audio, Speech, and Music Processing. 2010:, 2\u20131212 (2010).","journal-title":"EURASIP, Journal on Audio, Speech, and Music Processing"},{"key":"80_CR4","first-page":"31","volume-title":"Pitch histograms in audio and symbolic music information retrieval","author":"G Tzanetakis","year":"2002","unstructured":"G Tzanetakis, A Ermolinskyi, P Cook, in Pitch histograms in audio and symbolic music information retrieval. Proc. of ISMIR (ISMIRParis, France, 2002), pp. 31\u201338."},{"key":"80_CR5","first-page":"1863","volume-title":"A query-by-example framework to retrieve music documents by singer","author":"W-H Tsai","year":"2004","unstructured":"W-H Tsai, H-M Wang, in A query-by-example framework to retrieve music documents by singer. Proc. of ICME (IEEEWashington DC, USA, 2004), pp. 1863\u20131866."},{"key":"80_CR6","first-page":"363","volume-title":"A lattice-based approach to query-by-example spoken document retrieval","author":"TK Chia","year":"2008","unstructured":"TK Chia, KC Sim, H Li, HT Ng, in A lattice-based approach to query-by-example spoken document retrieval. Proc. of ACM SIGIR (ACMNew York, USA, 2008), pp. 363\u2013370."},{"key":"80_CR7","first-page":"921","volume-title":"Zero-resource audio-only spoken term detection based on a combination of template matching techniques","author":"A Muscariello","year":"2011","unstructured":"A Muscariello, G Gravier, F Bimbot, in Zero-resource audio-only spoken term detection based on a combination of template matching techniques. Proc. of Interspeech (ISCABaixas, France, 2011), pp. 921\u2013924."},{"issue":"3","key":"80_CR8","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1145\/2328967.2328971","volume":"30","author":"J Tejedor","year":"2012","unstructured":"J Tejedor, M Fap\u0161o, I Sz\u00f6ke, \u010cernocky, H\u0301, F Gr\u00e9zl, Comparison of methods for language-dependent and language-independent query-by-example spoken term detection. ACM Trans. Inf. Syst. 30(3), 18\u201311834 (2012).","journal-title":"ACM Trans. Inf. Syst"},{"key":"80_CR9","first-page":"8515","volume-title":"Speed improvements to information retrieval-based dynamic time warping using hierarchical k-means clustering","author":"G Mantena","year":"2013","unstructured":"G Mantena, X Anguera, in Speed improvements to information retrieval-based dynamic time warping using hierarchical k-means clustering. Proc. of ICASSP (IEEEWashington DC, USA, 2013), pp. 8515\u20138519."},{"issue":"5","key":"80_CR10","doi-asserted-by":"publisher","first-page":"946","DOI":"10.1109\/TASLP.2014.2311322","volume":"22","author":"G Mantena","year":"2014","unstructured":"G Mantena, S Achanta, K Prahallad, Query-by-example spoken term detection using frequency domain linear prediction and non-segmental dynamic time warping. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(5), 946\u2013955 (2014).","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process"},{"key":"80_CR11","first-page":"73","volume-title":"TUKE at MediaEval 2013 spoken web search task","author":"J Vavrek","year":"2013","unstructured":"J Vavrek, M Pleva, M Lojka, P Viszlay, Kiktova, E\u0301, D Hl\u00e1dek, J Juh\u00e1r, in TUKE at MediaEval 2013 spoken web search task. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 73\u20131732."},{"key":"80_CR12","first-page":"79","volume-title":"UNIZA system for the spoken web search task at MediaEval 2013","author":"R Jarina","year":"2013","unstructured":"R Jarina, M Kuba, R Gubka, M Chmulik, M Paralic, in UNIZA system for the spoken web search task at MediaEval 2013. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 79\u20131792."},{"key":"80_CR13","first-page":"86","volume-title":"Spoken web search using and ergodic hidden Markov model of speech","author":"A Ali","year":"2013","unstructured":"A Ali, MA Clements, in Spoken web search using and ergodic hidden Markov model of speech. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 86\u20131862."},{"key":"80_CR14","first-page":"72","volume-title":"SpeeD@MediaEval 2014: Spoken term detection with robust multilingual phone recognition","author":"A Buzo","year":"2014","unstructured":"A Buzo, H Cucu, C Burileanu, in SpeeD@MediaEval 2014: Spoken term detection with robust multilingual phone recognition. Proc. of MediaEval (CEURAachen, Germany, 2014), pp. 72\u20131722."},{"key":"80_CR15","volume-title":"IIIT-H system for MediaEval 2014 QUESST","author":"S Kesiraju","year":"2014","unstructured":"S Kesiraju, G Mantena, K Prahallad, in IIIT-H system for MediaEval 2014 QUESST. Proc. of MediaEval (CEURAachen, Germany, 2014). pp. 76\u20131762."},{"key":"80_CR16","first-page":"384","volume-title":"An IWAPU STD system for OOV query terms and spoken queries","author":"J Takahashi","year":"2014","unstructured":"J Takahashi, T Hashimoto, R Konno, S Sugawara, K Ouchi, S Oshima, T Akyu, Y Itoh, in An IWAPU STD system for OOV query terms and spoken queries. Proc. of NTCIR-11 (National Institute of InformaticsTokyo, Japan, 2014), pp. 384\u2013389."},{"key":"80_CR17","first-page":"413","volume-title":"Combining subword and state-level dissimilarity measures for improved spoken term detection in NTCIR-11 SpokenQuery &Doc task","author":"M Makino","year":"2014","unstructured":"M Makino, A Kai, in Combining subword and state-level dissimilarity measures for improved spoken term detection in NTCIR-11 SpokenQuery &Doc task. Proc. of NTCIR-11 (National Institute of InformaticsTokyo, Japan, 2014), pp. 413\u2013418."},{"key":"80_CR18","first-page":"2639","volume-title":"Calibration of distance measures for unsupervised query-by-example","author":"M Gubian","year":"2013","unstructured":"M Gubian, L Boves, M Versteegh, in Calibration of distance measures for unsupervised query-by-example. Proc. of Interspeech (ISCABaixas, France, 2013), pp. 2639\u20132643."},{"key":"80_CR19","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2013.6607546","volume-title":"Memory efficient subsequence DTW for query-by-example spoken term detection","author":"X Anguera","year":"2013","unstructured":"X Anguera, M Ferrarons, in Memory efficient subsequence DTW for query-by-example spoken term detection. Proc. of ICME (IEEEWashington DC, USA, 2013)."},{"key":"80_CR20","first-page":"68","volume-title":"The CUHK spoken web search system for MediaEval 2013","author":"H Wang","year":"2013","unstructured":"H Wang, T Lee, in The CUHK spoken web search system for MediaEval 2013. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 68\u20131682."},{"key":"80_CR21","first-page":"77","volume-title":"LIA@MediaEval 2013 spoken web search task: An I-Vector based approach","author":"M Bouallegue","year":"2013","unstructured":"M Bouallegue, G Senay, M Morchid, D Matrouf, G Linares, R Dufour, in LIA@MediaEval 2013 spoken web search task: An I-Vector based approach. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 77\u20131772."},{"key":"80_CR22","first-page":"83","volume-title":"GTTS systems for the SWS task at MediaEval 2013","author":"LJ Rodriguez-Fuentes","year":"2013","unstructured":"LJ Rodriguez-Fuentes, A Varona, M Penagarikano, G Bordel, M Diez, in GTTS systems for the SWS task at MediaEval 2013. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 83\u20131832."},{"key":"80_CR23","first-page":"8545","volume-title":"Using parallel tokenizers with DTW matrix combination for low-resource spoken term detection","author":"H Wang","year":"2013","unstructured":"H Wang, T Lee, C-C Leung, B Ma, H Li, in Using parallel tokenizers with DTW matrix combination for low-resource spoken term detection. Proc. of ICASSP (IEEEWashington DC, USA, 2013), pp. 8545\u20138549."},{"key":"80_CR24","first-page":"73","volume-title":"CUHK system for QUESST task of MediaEval 2014","author":"H Wang","year":"2014","unstructured":"H Wang, T Lee, in CUHK system for QUESST task of MediaEval 2014. Proc. of MediaEval (CEURAachen, Germany, 2014), pp. 73\u20131732."},{"key":"80_CR25","first-page":"74","volume-title":"The SPL-IT query by example search on speech system for MediaEval 2014","author":"J Proenca","year":"2014","unstructured":"J Proenca, A Veiga, F Perdig\u00e3o, in The SPL-IT query by example search on speech system for MediaEval 2014. Proc. of MediaEval (CEURAachen, Germany, 2014), pp. 74\u20131742."},{"key":"80_CR26","first-page":"1722","volume-title":"Intrinsic spectral analysis based on temporal context features for query-by-example spoken term detection","author":"P Yang","year":"2014","unstructured":"P Yang, C-C Leung, L Xie, B Ma, H Li, in Intrinsic spectral analysis based on temporal context features for query-by-example spoken term detection. Proc. of Interspeech (ISCABaixas, France, 2014), pp. 1722\u20131726."},{"key":"80_CR27","first-page":"1742","volume-title":"Unsupervised query-by-example spoken term detection using bag of acoustic words and non-segmental dynamic time warping","author":"B George","year":"2014","unstructured":"B George, A Saxena, G Mantena, K Prahallad, B Yegnanarayana, in Unsupervised query-by-example spoken term detection using bag of acoustic words and non-segmental dynamic time warping. Proc. of Interspeech (ISCABaixas, France, 2014), pp. 1742\u20131746."},{"key":"80_CR28","first-page":"421","volume-title":"Query-by-example spoken term detection using phonetic posteriorgram templates","author":"TJ Hazen","year":"2009","unstructured":"TJ Hazen, W Shen, CM White, in Query-by-example spoken term detection using phonetic posteriorgram templates. Proc. of ASRU (IEEEWashington DC, USA, 2009), pp. 421\u2013426."},{"key":"80_CR29","first-page":"85","volume-title":"The L2F spoken web search system for MediaEval 2013","author":"A Abad","year":"2013","unstructured":"A Abad, RF Astudillo, I Trancoso, in The L2F spoken web search system for MediaEval 2013. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 85\u20131852."},{"key":"80_CR30","first-page":"20","volume-title":"On the calibration and fusion of heterogeneous spoken term detection systems","author":"A Abad","year":"2013","unstructured":"A Abad, LJ Rodr\u00edguez-Fuentes, M Penagarikano, A Varona, G Bordel, in On the calibration and fusion of heterogeneous spoken term detection systems. Proc. of Interspeech (ISCABaixas, France, 2013), pp. 20\u201324."},{"key":"80_CR31","first-page":"62","volume-title":"BUT QUESST 2014 system description","author":"I Sz\u00f6ke","year":"2014","unstructured":"I Sz\u00f6ke, M Sk\u00e1cel, L Burget, in BUT QUESST 2014 system description. Proc. of MediaEval (CEURAachen, Germany, 2014), pp. 62\u20131622."},{"key":"80_CR32","first-page":"69","volume-title":"The NNI query-by-example system for MediaEval 2014","author":"P Yang","year":"2014","unstructured":"P Yang, H Xu, X Xiao, L Xie, C-C Leung, H Chen, J Yu, H Lv, L Wang, SJ Leow, B Ma, ES Chng, H Li, in The NNI query-by-example system for MediaEval 2014. Proc. of MediaEval (CEURAachen, Germany, 2014), pp. 69\u20131692."},{"key":"80_CR33","first-page":"7849","volume-title":"Calibration and fusion of query-by-example systems - BUT SWS 2013","author":"I Sz\u00f6ke","year":"2014","unstructured":"I Sz\u00f6ke, L Burget, F Gr\u00e9zl, JH \u010cernock\u00fd, L Ondel, in Calibration and fusion of query-by-example systems - BUT SWS 2013. Proc. of ICASSP (IEEEWashington DC, USA, 2014), pp. 7849\u20137853."},{"issue":"2","key":"80_CR34","doi-asserted-by":"publisher","first-page":"264","DOI":"10.1109\/TASLP.2014.2387382","volume":"23","author":"H Wang","year":"2015","unstructured":"H Wang, T Lee, C-C Leung, B Ma, H Li, Acoustic segment modeling with spectral clustering methods. IEEE\/ACM Trans. Audio Speech Lang. Process. 23(2), 264\u2013277 (2015).","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process"},{"key":"80_CR35","first-page":"5231","volume-title":"Enhancing automatically discovered multi-level acoustic patterns considering context consistency with applications in spoken term detection","author":"C-T Chung","year":"2015","unstructured":"C-T Chung, W-N Hsu, C-Y Lee, L-S Lee, in Enhancing automatically discovered multi-level acoustic patterns considering context consistency with applications in spoken term detection. Proc. of ICASSP (IEEEWashington DC, USA, 2015), pp. 5231\u20135235."},{"key":"80_CR36","unstructured":"NIST, The Ninth Text REtrieval Conference (TREC 9) (2000). \n                    http:\/\/trec.nist.gov\n                    \n                  . Accessed 8 January 2016."},{"key":"80_CR37","first-page":"1","volume-title":"Overview of the NTCIR-11, SpokenQuery&Doc Task","author":"H Joho","year":"2014","unstructured":"H Joho, K Kishida, in Overview of the NTCIR-11, SpokenQuery&Doc Task. Proc. of NTCIR-11 (National Institute of InformaticsTokyo, Japan, 2014), pp. 1\u20137."},{"key":"80_CR38","first-page":"1","volume-title":"The spoken web search task","author":"X Anguera","year":"2013","unstructured":"X Anguera, F Metze, A Buzo, I Sz\u00f6ke, LJ Rodriguez-Fuentes, in The spoken web search task. Proc. of MediaEval (CEURAachen, Germany, 2013), pp. 1\u20132."},{"key":"80_CR39","first-page":"1","volume-title":"Query by example search on speech at Mediaeval 2014","author":"X Anguera","year":"2014","unstructured":"X Anguera, LJ Rodriguez-Fuentes, I Sz\u00f6ke, A Buzo, F Metze, in Query by example search on speech at Mediaeval 2014. Proc. of MediaEval (CEURAachen, Germany, 2014), pp. 1\u20132."},{"key":"80_CR40","volume-title":"Draft KWS14 Keyword Search Evaluation Plan","author":"NIST","year":"2013","unstructured":"NIST, Draft KWS14 Keyword Search Evaluation Plan (National Institute of Standards and Technology (NIST), Gaithersburg, MD, USA, 2013). National Institute of Standards and Technology (NIST). \n                    http:\/\/www.nist.gov\/itl\/iad\/mig\/upload\/KWS14-evalplan-v11.pdf\n                    \n                  . Accessed 8 January 2016."},{"key":"80_CR41","first-page":"1","volume":"2011","author":"B Taras","year":"2011","unstructured":"B Taras, C Nadeu, Audio segmentation of broadcast news in the Albayzin-2010 evaluation: overview, results, and discussion. EURASIP Journal on Audio, Speech, and Music Processing. 2011:, 1\u20131110 (2011).","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"80_CR42","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1186\/1687-4722-2012-19","volume":"2012","author":"M Zelen\u00e1k","year":"2012","unstructured":"M Zelen\u00e1k, H Schulz, J Hernando, Speaker diarization of broadcast news in Albayzin 2010 evaluation campaign. EURASIP Journal on Audio, Speech, and Music Processing.2012:, 19\u20131199 (2012).","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing."},{"key":"80_CR43","first-page":"1529","volume-title":"The Albayzin 2010 Language Recognition Evaluation","author":"LJ Rodr\u00edguez-Fuentes","year":"2011","unstructured":"LJ Rodr\u00edguez-Fuentes, M Penagarikano, A Varona, M D\u00edez, G Bordel, in The Albayzin 2010 Language Recognition Evaluation. Proc. of Interspeech (ISCABaixas, France, 2011), pp. 1529\u20131532."},{"key":"80_CR44","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1186\/1687-4722-2013-23","volume":"2013","author":"J Tejedor","year":"2013","unstructured":"J Tejedor, DT Toledano, X Anguera, A Varona, LF Hurtado, A Miguel, J Col\u00e1s, Query-by-example spoken term detection ALBAYZIN 2012 evaluation: overview, systems, results, and discussion. EURASIP, Journal on Audio, Speech, and Music Processing. 2013:, 23\u201312317 (2013).","journal-title":"EURASIP, Journal on Audio, Speech, and Music Processing"},{"key":"80_CR45","first-page":"317","volume-title":"The Albayzin 2010 text-to-speech evaluation","author":"F M\u00e9ndez","year":"2010","unstructured":"F M\u00e9ndez, L Doc\u00edo, M Arza, F Campillo, in The Albayzin 2010 text-to-speech evaluation. Proc. of FALA (Spanish Thematic Network on Speech TechnologyMadrid, Spain, 2010), pp. 317\u2013340."},{"key":"80_CR46","volume-title":"Multilingual speech recognition: the 1996 Byblos callhome system","author":"J Billa","year":"1997","unstructured":"J Billa, KW Ma, JW McDonough, G Zavaliagkos, DR Miller, KN Ross, A El-Jaroudi, in Multilingual speech recognition: the 1996 Byblos callhome system. Proc. of Eurospeech (ISCABaixas, France, 1997)."},{"key":"80_CR47","first-page":"156","volume-title":"Out-of-vocabulary word modeling and rejection for spanish keyword spotting systems","author":"H Cuayahuitl","year":"2002","unstructured":"H Cuayahuitl, B Serridge, in Out-of-vocabulary word modeling and rejection for spanish keyword spotting systems. Proc. of MICAI (SpringerLondon, United Kingdom, 2002), pp. 156\u2013165."},{"key":"80_CR48","first-page":"3141","volume-title":"Grapheme based speech recognition","author":"M Killer","year":"2003","unstructured":"M Killer, S Stuker, T Schultz, in Grapheme based speech recognition. Proc. of Eurospeech (ISCABaixas, France, 2003), pp. 3141\u20133144."},{"key":"80_CR49","volume-title":"Contributions to Keyword Spotting and Spoken Term Detection For Information Retrieval in Audio Mining. PhD thesis, Universidad Aut\u00f3noma de Madrid, Madrid, Spain","author":"J Tejedor","year":"2009","unstructured":"J Tejedor, Contributions to Keyword Spotting and Spoken Term Detection For Information Retrieval in Audio Mining. PhD thesis, Universidad Aut\u00f3noma de Madrid, Madrid, Spain (Universidad Aut\u00c33noma de Madrid, Madrid, Spain, 2009)."},{"key":"80_CR50","first-page":"4334","volume-title":"Multilingual acoustic modeling for speech recognition based on subspace gaussian mixture models","author":"L Burget","year":"2010","unstructured":"L Burget, P Schwarz, M Agarwal, P Akyazi, K Feng, A Ghoshal, O Glembek, N Goel, M Karafiat, D Povey, A Rastrow, RC Rose, S Thomas, in Multilingual acoustic modeling for speech recognition based on subspace gaussian mixture models. Proc. of ICASSP (IEEEWashington DC, USA, 2010), pp. 4334\u20134337."},{"issue":"5","key":"80_CR51","doi-asserted-by":"publisher","first-page":"1083","DOI":"10.1016\/j.csl.2013.09.008","volume":"28","author":"J Tejedor","year":"2014","unstructured":"J Tejedor, DT Toledano, D Wang, S King, J Col\u00e1s, Feature analysis for discriminative confidence estimation in spoken term detection. Comput. Speech Lang. 28(5), 1083\u20131114 (2014).","journal-title":"Comput. Speech Lang"},{"key":"80_CR52","first-page":"1747","volume-title":"An empirical study of multilingual and low-resource spoken term detection using deep neural networks","author":"J Li","year":"2014","unstructured":"J Li, X Wang, B Xu, in An empirical study of multilingual and low-resource spoken term detection using deep neural networks. Proc. of Interspeech (ISCABaixas, France, 2014), pp. 1747\u20131751."},{"key":"80_CR53","volume-title":"The Spoken Term Detection (STD) 2006 Evaluation Plan","author":"NIST","year":"2006","unstructured":"NIST, The Spoken Term Detection (STD) 2006 Evaluation Plan, 10th edn. (National Institute of Standards and Technology (NIST), Gaithersburg, MD, USA, 2006). National Institute of Standards and Technology (NIST). \n                    http:\/\/www.nist.gov\/speech\/tests\/std\n                    \n                  . Accessed 8 January 2016."},{"key":"80_CR54","first-page":"45","volume-title":"Results of the 2006 spoken term detection evaluation","author":"JG Fiscus","year":"2007","unstructured":"JG Fiscus, J Ajot, JS Garofolo, G Doddingtion, in Results of the 2006 spoken term detection evaluation. Proc. of SSCS (ACMNew York, USA, 2007), pp. 45\u201350."},{"key":"80_CR55","first-page":"1895","volume-title":"The DET curve in assessment of detection task performance","author":"A Martin","year":"1997","unstructured":"A Martin, G Doddington, T Kamm, M Ordowski, M Przybocki, in The DET curve in assessment of detection task performance. Proc. of Eurospeech (ISCABaixas, France, 1997), pp. 1895\u20131898."},{"key":"80_CR56","volume-title":"NIST Speech Tools and APIs: 2006","author":"NIST","year":"1996","unstructured":"NIST, NIST Speech Tools and APIs: 2006 (National Institute of Standards and Technology (NIST), Gaithersburg, MD, USA, 1996). National Institute of Standards and Technology (NIST). \n                    http:\/\/www.nist.gov\/speech\/tools\/index.htm\n                    \n                  . Accessed 8 January 2016."},{"key":"80_CR57","first-page":"1","volume-title":"Spoken web search","author":"N Rajput","year":"2011","unstructured":"N Rajput, F Metze, in Spoken web search. Proc. of MediaEval (CEURAachen, Germany, 2011), pp. 1\u20132."},{"key":"80_CR58","first-page":"1","volume-title":"The spoken web search task","author":"F Metze","year":"2012","unstructured":"F Metze, E Barnard, M Davel, Heerden C van, X Anguera, G Gravier, N Rajput, in The spoken web search task. Proc. of MediaEval (CEURAachen, Germany, 2012), pp. 1\u20132."},{"key":"80_CR59","unstructured":"NTCIR-11 Spoken Query and Spoken Document Retrieval Task Organizers, Definition of SQ-STD Task at NTCIR-11 SpokenQuery &Doc (2014). \n                    http:\/\/www.nlp.cs.tut.ac.jp\/~sdpwg\/ntcir11\/SQ-STD.pdf\n                    \n                  . Accessed 8 January 2016."},{"key":"80_CR60","volume-title":"The Kaldi speech recognition toolkit","author":"D Povey","year":"2011","unstructured":"D Povey, A Ghoshal, G Boulianne, L Burget, O Glembek, N Goel, M Hannemann, P Motlicek, Y Qian, P Schwarz, J Silovsky, G Stemmer, K Vesely, in The Kaldi speech recognition toolkit. Proc. of ASRU (IEEEWashington DC, USA, 2011)."},{"key":"80_CR61","volume-title":"TC-STAR 2006 automatic speech recognition evaluation: The uvigo system","author":"L Doc\u00edo-Fern\u00e1ndez","year":"2006","unstructured":"L Doc\u00edo-Fern\u00e1ndez, A Cardenal-L\u00f3pez, C Garc\u00eda-Mateo, in TC-STAR 2006 automatic speech recognition evaluation: The uvigo system. Proc. of TC-STAR Workshop on Speech-to-Speech Translation (META-NETBerlin, Germany, 2006)."},{"key":"80_CR62","first-page":"901","volume-title":"SRILM - an extensible language modeling toolkit","author":"A Stolcke","year":"2002","unstructured":"A Stolcke, in SRILM - an extensible language modeling toolkit. Proc. of ICSLP (ISCABaixas, France, 2002), pp. 901\u2013904."},{"key":"80_CR63","first-page":"4213","volume-title":"Proc. of ICASSP","author":"D Povey","year":"2012","unstructured":"D Povey, M Hannemann, G Boulianne, L Burget, A Ghoshal, M Janda, M Karafiat, S Kombrink, P Motlicek, Y Qian, K Riedhammer, K Vesely, NT Vu, in Proc. of ICASSP. Generating exact lattices in the WFST framework (IEEEWashington DC, USA, 2012), pp. 4213\u20134216."},{"key":"80_CR64","first-page":"8560","volume-title":"Quantifying the value of pronunciation lexicons for keyword search in low resource languages","author":"G Chen","year":"2013","unstructured":"G Chen, S Khudanpur, D Povey, J Trmal, D Yarowsky, O Yilmaz, in Quantifying the value of pronunciation lexicons for keyword search in low resource languages. Proc. of ICASSP (IEEEWashington DC, USA, 2013), pp. 8560\u20138564."},{"key":"80_CR65","first-page":"430","volume-title":"System and keyword dependent fusion for spoken term detection","author":"VT Pham","year":"2014","unstructured":"VT Pham, NF Chen, S Sivadas, H Xu, I-F Chen, C Ni, ES Chng, H Li, in System and keyword dependent fusion for spoken term detection. Proc. of SLT (IEEEWashington DC, USA, 2014), pp. 430\u2013435."},{"issue":"8","key":"80_CR66","doi-asserted-by":"publisher","first-page":"2338","DOI":"10.1109\/TASL.2011.2134087","volume":"19","author":"D Can","year":"2011","unstructured":"D Can, M Saraclar, Lattice indexing for spoken term detection. IEEE Trans. Audio Speech Lang. Process. 19(8), 2338\u20132347 (2011).","journal-title":"IEEE Trans. Audio Speech Lang. Process"},{"key":"80_CR67","first-page":"568","volume-title":"Introducing a framework for the evaluation of music detection tools","author":"P Lopez-Otero","year":"2014","unstructured":"P Lopez-Otero, L Docio-Fernandez, C Garcia-Mateo, in Introducing a framework for the evaluation of music detection tools. Proc. of LREC (European Language Resources AssociationParis, France, 2014), pp. 568\u2013572."},{"key":"80_CR68","unstructured":"C Neves, A Veiga, Sa, L\u0301, F Perdig\u00e3o, in Audio fingerprinting system for broadcast streams, 1. Proc. of ConfTele (Santa Maria da Feira, Instituto de Telecomunica\u00e7\u00f5es, Campus Universit\u00e3rio de Santiago, Aveiro, Portugal, 2009), pp. 481\u2013484."},{"key":"80_CR69","volume-title":"Proc. of the 10th Conference on Digital Audio Effects","author":"K Seyerlehner","year":"2007","unstructured":"K Seyerlehner, G Widmer, T Pohle, M Sched, in Proc. of the 10th Conference on Digital Audio Effects. Automatic music detection in television productions (LaBRI, Universit\u00c3\u24b8 BordeauxBordeaux, France, 2007)."},{"key":"80_CR70","first-page":"1261","volume-title":"Music fingerprint extraction for classical music cover song identification","author":"S Kim","year":"2008","unstructured":"S Kim, E Unal, S Narayanan, in Music fingerprint extraction for classical music cover song identification. Proc. of ICME (IEEEWashington DC, USA, 2008), pp. 1261\u20131264."},{"key":"80_CR71","first-page":"107","volume-title":"A highly robust audio fingerprinting system","author":"J Haitsma","year":"2002","unstructured":"J Haitsma, T Kalker, in A highly robust audio fingerprinting system. Proc. of ISMIR (ISMIRParis, France, 2002), pp. 107\u2013115."},{"key":"80_CR72","first-page":"421","volume-title":"Query-by-example spoken term detection using phonetic posteriorgram templates","author":"TJ Hazen","year":"2009","unstructured":"TJ Hazen, W Shen, CM White, in Query-by-example spoken term detection using phonetic posteriorgram templates. Proc. of ASRU (IEEEWashington DC, USA, 2009), pp. 421\u2013426."},{"key":"80_CR73","volume-title":"Phoneme recognition based on long temporal context. PhD thesis, Brno University of Technology","author":"P Schwarz","year":"2009","unstructured":"P Schwarz, Phoneme recognition based on long temporal context. PhD thesis, Brno University of Technology (Brno University of Technology, Brno, Czech Republic, 2009)."},{"key":"80_CR74","first-page":"9","volume-title":"The L2F spoken web search system for mediaeval 2012","author":"A Abad","year":"2012","unstructured":"A Abad, RF Astudillo, in The L2F spoken web search system for mediaeval 2012. Proc. of MediaEval (CEURAachen, Germany, 2012), pp. 9\u201310."},{"key":"80_CR75","unstructured":"N Br\u00fcmmer, E de Villiers, The BOSARIS toolkit user guide: Theory, algorithms and code for binary classifier score processing. Technical report (2011). \n                    https:\/\/sites.google.com\/site\/nikobrummer\n                    \n                  . Accessed 8 January 2016."},{"key":"80_CR76","first-page":"2345","volume-title":"Sequence-discriminative training of deep neural networks","author":"A Ghoshal","year":"2013","unstructured":"Vesely, K\u0301, A Ghoshal, L Burget, D Povey, in Sequence-discriminative training of deep neural networks. Proc. of Interspeech (ISCABaixas, France, 2013), pp. 2345\u20132349."},{"key":"80_CR77","unstructured":"IberSPEECH 2012, \u201cVII Jornadas en Tecnolog\u00eda del Habla\u201d and \u201cIII Iberian SLTech Workshop\u201d (2012). \n                    http:\/\/iberspeech2012.ii.uam.es\n                    \n                  . Accessed 8 January 2016."},{"issue":"4","key":"80_CR78","doi-asserted-by":"publisher","first-page":"688","DOI":"10.1109\/TASL.2010.2058800","volume":"19","author":"D Wang","year":"2011","unstructured":"D Wang, S King, J Frankel, Stochastic pronunciation modelling for out-of-vocabulary spoken term detection. IEEE Trans. Audio Speech Lang. Process. 19(4), 688\u2013698 (2011).","journal-title":"IEEE Trans. Audio Speech Lang. Process"},{"key":"80_CR79","first-page":"540","volume-title":"Automatic speech segmentation based on acoustical clustering","author":"JA G\u00f3mez","year":"2010","unstructured":"JA G\u00f3mez, E Sanchis, MJ Castro-Bleda, in Automatic speech segmentation based on acoustical clustering. Proc. of the Joint IAPR International Conference on Structural, Syntactic, and Statistical Pattern Recognition (SpringerLondon, United Kingdom, 2010), pp. 540\u2013548."},{"key":"80_CR80","first-page":"53","volume-title":"Towards unsupervised pattern discovery in speech","author":"A Park","year":"2005","unstructured":"A Park, JR Glass, in Towards unsupervised pattern discovery in speech. Proc. of ASRU (IEEEWashington DC, USA, 2005), pp. 53\u201358."}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-016-0080-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13636-016-0080-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-016-0080-2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-016-0080-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,26]],"date-time":"2019-03-26T16:07:54Z","timestamp":1553616474000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13636-016-0080-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1,13]]},"references-count":80,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2016,12]]}},"alternative-id":["80"],"URL":"https:\/\/doi.org\/10.1186\/s13636-016-0080-2","relation":{},"ISSN":["1687-4722"],"issn-type":[{"value":"1687-4722","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,1,13]]},"article-number":"1"}}