{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T06:12:06Z","timestamp":1775283126727,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2001,12,1]],"date-time":"2001-12-01T00:00:00Z","timestamp":1007164800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2001,12,1]],"date-time":"2001-12-01T00:00:00Z","timestamp":1007164800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Multimedia Tools and Applications"],"published-print":{"date-parts":[[2001,12]]},"DOI":"10.1023\/a:1012491016871","type":"journal-article","created":{"date-parts":[[2002,12,23]],"date-time":"2002-12-23T13:38:06Z","timestamp":1040650686000},"page":"269-290","source":"Crossref","is-referenced-by-count":39,"title":["Indexing and Retrieval of Audio: A Survey"],"prefix":"10.1007","volume":"15","author":[{"given":"Goujun","family":"Lu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"381123_CR1","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1007\/BF00393937","volume":"3","author":"P. Aigrain","year":"1996","unstructured":"P. Aigrain, H. Zhang, and D. Petkovic, \u201cContent-based representation and retrieval of visual media: A stateof the-art review,\u201d Journal of Multimedia Tools and Applications, Vol. 3, pp. 179\u2013202, 1996.","journal-title":"Journal of Multimedia Tools and Applications"},{"key":"381123_CR2","doi-asserted-by":"crossref","unstructured":"J.R. Bach, \u201cThe virage image search engine: An open framework for image management,\u201d in Proceedings of Conference on Storage and Retrieval for Image and Video Databases IV (SPIE Proceedings Vol. 2670), 1\u20132 Feb., San Jose, California, 1996, pp. 76\u201387.","DOI":"10.1117\/12.234785"},{"key":"381123_CR3","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/1486.001.0001","volume-title":"Auditory Scene Analysis\u2014The Perception Organization of Sound","author":"A.S. Bregman","year":"1990","unstructured":"A.S. Bregman, Auditory Scene Analysis\u2014The Perception Organization of Sound, The MIT Press: Cambridge, MA, 1990."},{"issue":"12","key":"381123_CR4","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1109\/6.642967","volume":"34","author":"R. Comerford","year":"1997","unstructured":"R. Comerford, J. Makhoul, and R. Schwartz, \u201cThe voice of the computer is heard in the land (and it listens too!),\u201d IEEE Spectrum, Vol. 34, No. 12, pp. 39\u201347, 1997.","journal-title":"IEEE Spectrum"},{"key":"381123_CR5","doi-asserted-by":"crossref","first-page":"765","DOI":"10.1109\/ICASSP.1999.759781","volume":"II","author":"V. Digalakis","year":"1999","unstructured":"V. Digalakis, S. Berkowitz, E. Bocchieri, C. Boulis, W. Byrne, H. Collier, A. Corduneanu, A. Kannan, S. Khudanpur, and A. Sankar, \u201cRapid speech recognizer adaptation to new speakers,\u201d in 1999 IEEE International Conference on Acoustics, Speech, and Signal Processing, March 15\u201319, Phoenix, Arizona, Vol. II, 1999, pp. 765\u2013768.","journal-title":"1999 IEEE International Conference on Acoustics, Speech, and Signal Processing, March 15\u201319, Phoenix, Arizona"},{"key":"381123_CR6","unstructured":"J.T. Foote, \u201cA similarity measure for automatic audio classification,\u201d in Pro. AAAI 1997 Spring Symposium on Intelligent Integration and Use of Text, Image, Video and Audio Corpora, Stanford, Palo Alto, CA, Mar. 1997."},{"key":"381123_CR7","volume-title":"Information Retrieval: Data structures and Algorithms","year":"1992","unstructured":"W.B. Frakes and R. Baeza-Yates (Eds.), Information Retrieval: Data structures and Algorithms, Prentice Hall: Englewood Cliffs, NJ, 1992."},{"key":"381123_CR8","doi-asserted-by":"crossref","unstructured":"A. Ghias et al., \u201cQuery by humming\u2014Musical information retrieval in an audio database,\u201d in Proceedings of ACM Multimedia 95, November 5\u20139, San Francisco, California, 1995.","DOI":"10.1145\/217279.215273"},{"key":"381123_CR9","volume-title":"Multimedia Programming\u2014Objects, Environments and Frameworks","author":"S.J. Gibbs","year":"1995","unstructured":"S.J. Gibbs and D.C. Tsichritzis, Multimedia Programming\u2014Objects, Environments and Frameworks, Addison-Wesley Publishing Company: Reading, MA, 1995."},{"key":"381123_CR10","doi-asserted-by":"crossref","unstructured":"A.G. Hauptmann, M.J. Witbrock, A.I. Rudnicky, and S. Reed, \u201cSpeech for multimedia information retrieval,\u201d in UIST-95 Proceedings of the User Interface Software Technology Conference, Pittsburgh, Nov. 1995.","DOI":"10.1145\/215585.215667"},{"key":"381123_CR11","volume-title":"Voice Recognition","author":"R.L. Klevans","year":"1997","unstructured":"R.L. Klevans and R.D. Rodman, Voice Recognition, Artech House: Boston, MA, 1997."},{"key":"381123_CR12","unstructured":"G. Lu and T. Hankinson, \u201cA technique towards automatic audio classification and retrieval,\u201d in Proceedings of International Conference on Signal Processing, Oct. 12\u201316, Beijing, China, 1998."},{"key":"381123_CR13","volume-title":"Introductory Digital Signal Processing with Computer Applications","author":"P.A. Lynn","year":"1989","unstructured":"P.A. Lynn and W. Fuerst, Introductory Digital Signal Processing with Computer Applications, John Wiley & Sons: New York, 1989."},{"key":"381123_CR14","unstructured":"K.D. Martin, \u201cAutomatic transcription of simple polyphonic music: Robust front end processing,\u201d M.I.T. Media Laboratory Perceptual Computing Section Technical Report No. 399, 1996, available at http:\/\/sound.media.mit.edu\/papers.html."},{"key":"381123_CR15","doi-asserted-by":"crossref","unstructured":"R.J. McNab et al., \u201cThe New Zealand digital library MELody inDex,\u201d D-Lib Magazine, May 1997, available at http:\/\/mirrored.ukoln.ac.uk\/lis-journals\/dlib\/dlib\/dlib\/may97\/meldex\/05written.html.","DOI":"10.1045\/may97-witten"},{"key":"381123_CR16","doi-asserted-by":"crossref","unstructured":"K. Minami et al., \u201cEnhanced video handling based on audio analysis,\u201d in Proceedings of IEEE International Conference on Multimedia Computing and Systems, June 3\u20136, Ottawa, Canada, 1997, pp. 219\u2013226.","DOI":"10.1109\/MMCS.1997.609596"},{"key":"381123_CR17","doi-asserted-by":"crossref","DOI":"10.1163\/9789004658820","volume-title":"An Introduction to Psychology of Hearing","author":"B.C.J. Moore","year":"1997","unstructured":"B.C.J. Moore, An Introduction to Psychology of Hearing, Academic Press: New York, 1997."},{"key":"381123_CR18","doi-asserted-by":"crossref","DOI":"10.1007\/978-1-4615-3950-6","volume-title":"Neural Networks and Speech Processing","author":"D.P. Morgan","year":"1991","unstructured":"D.P. Morgan and C.L. Scofield, Neural Networks and Speech Processing, Kluwer: Dordrecht, 1991."},{"key":"381123_CR19","doi-asserted-by":"crossref","unstructured":"W. Niblack, X. Zhu, J.L. Hafner, T. Breuel, D.B. Panceleon, D. Petkovic, M.D. Flickner, E. Upfal, S.I. Nin, S. Sull, B.E. Dom, B.-L. Yeo, S. Srinivansan, D. Zivkovic and M. Penner, \u201cUpdates to the QBIC system,\u201d in Proceedings of Conference on Storage and Retrieval for Image and Video Databases VI (SPIE Proceedings Vol. 3312), 28\u201330 Jan., San Jose, California, 1998, pp. 150\u2013161.","DOI":"10.1117\/12.298439"},{"key":"381123_CR20","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1117\/12.234776","volume":"2670","author":"N.V. Patel","year":"1996","unstructured":"N.V. Patel and I.K. Sethi, \u201cAudio characterization for video indexing,\u201d SPIE Proceedings, Vol. 2670, pp. 373\u2013384, 1996.","journal-title":"SPIE Proceedings"},{"key":"381123_CR21","unstructured":"A.W. Peevers, \u201cA real time 3D signal analysis\/synthesis tool based on the short time fourier transform,\u201d http:\/\/cnmat.CNMAT.Berkeley.EDU\/~alan\/MS-html\/MSthesis.v2ToC.html."},{"key":"381123_CR22","unstructured":"S. Pfeiffer, S. Fischer, and W. Effelsberg, \u201cAutomatic audio content analysis,\u201d http:\/\/www.informatik.unimannheim.de\/informatic\/pi4\/projects\/MoCA\/."},{"key":"381123_CR23","unstructured":"R. Polikar, \u201cThe wavelet tutorial,\u201d http:\/\/www.public.iastate.edu\/\u00a1\u00abrpolikar\/WAVELETS\/WTtutorial.htm."},{"key":"381123_CR24","doi-asserted-by":"crossref","unstructured":"L.R. Rabiner, \u201cA tutorial on hidden Markov models and selected applications in speech recognition,\u201d in Proceedings of The IEEE, Vol. 77, No. 2, 1989.","DOI":"10.1109\/5.18626"},{"key":"381123_CR25","volume-title":"Fundamentals of Speech Recognition","author":"L.R. Rabiner","year":"1993","unstructured":"L.R. Rabiner and B.-H. Juang, Fundamentals of Speech Recognition, Prentice Hall: Englewood Cliffs, NJ, 1993."},{"key":"381123_CR26","volume-title":"Introduction to Modern Information Retrieval","author":"G. Salton","year":"1983","unstructured":"G. Salton and M.J. McGill, Introduction to Modern Information Retrieval, McGraw-Hill: New York, 1983."},{"key":"381123_CR27","first-page":"993","volume":"2","author":"J. Saunders","year":"1996","unstructured":"J. Saunders, \u201cReal-time discrimination of broadcast speech\/music,\u201d in Proceedings ACASSP'96, Vol. 2, 1996, pp. 993\u2013996.","journal-title":"Proceedings ACASSP'96"},{"key":"381123_CR28","unstructured":"E.D. Scheirer, \u201cTempo and beat analysis of acoustic music signals,\u201d http:\/\/sound.media.mit.edu\/~eds\/papers\/ beat-track.html."},{"key":"381123_CR29","unstructured":"E.D. Scheirer, \u201cThe MPEG-4 structured audio standard,\u201d in Proc. IEEE ICASSP 1998, also available at http:\/\/sound.media.mit.edu\/papers.html."},{"key":"381123_CR30","unstructured":"E.D. Scheirer, \u201cUsing musical knowledge to extract expressive performance information from audio recordings,\u201d available at http:\/\/sound.media.mit.edu\/papers.html."},{"key":"381123_CR31","unstructured":"E. Scheirer and M. Slaney, \u201cConstruction and evaluation of a robust multifeature speech\/music discriminator,\u201d in Proceedings of the 1997 International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 21\u201324, Munich, Germany, 1997. Also available at http:\/\/web.interval.com\/papers\/1996-085\/index.html."},{"key":"381123_CR32","doi-asserted-by":"crossref","unstructured":"J.R. Smith and S.-F. Chang, \u201cVisually searching the web for content,\u201d IEEE Multimedia Magazine, July\u2013Sept., pp. 12\u201319, 1997.","DOI":"10.1109\/93.621578"},{"key":"381123_CR33","doi-asserted-by":"crossref","unstructured":"S. Subramanya et al., \u201cTransform-based indexing of audio data for multimedia databases,\u201d in Proceedings of IEEE International Conference on Multimedia Computing and Systems, June 3\u20136, Ottawa, Canada, 1997, pp. 211\u2013218.","DOI":"10.1109\/MMCS.1997.609595"},{"key":"381123_CR34","unstructured":"The CMU Speech Project, http:\/\/www.speech.cs.cmu.edu\/speech."},{"key":"381123_CR35","unstructured":"M.J. Witbrock and A.G. Hauptmann, \u201cSpeech recognition and information retrieval,\u201d in Proceedings of the 1997 DARPA Speech Recognition Workshop, February 2\u20135, 1997."},{"issue":"3","key":"381123_CR36","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1109\/93.556537","volume":"3","author":"E. Wold","year":"1996","unstructured":"E. Wold et al., \u201cContent-based classification, search, and retrieval of audio,\u201d IEEE Multimedia, Vol. 3, No. 3, pp. 27\u201336, 1996.","journal-title":"IEEE Multimedia"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1012491016871.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1023\/A:1012491016871\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1023\/A:1012491016871.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T06:34:47Z","timestamp":1748241287000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1023\/A:1012491016871"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2001,12]]},"references-count":36,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2001,12]]}},"alternative-id":["381123"],"URL":"https:\/\/doi.org\/10.1023\/a:1012491016871","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2001,12]]}}}