{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T11:46:40Z","timestamp":1774439200729,"version":"3.50.1"},"reference-count":38,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2018,12,1]],"date-time":"2018-12-01T00:00:00Z","timestamp":1543622400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100008530","name":"FEDER","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100008530","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Spanish Government","award":["TIN2014-59641-C2-1-P"],"award-info":[{"award-number":["TIN2014-59641-C2-1-P"]}]},{"name":"Spanish Government","award":["TIN2014-54728-REDC"],"award-info":[{"award-number":["TIN2014-54728-REDC"]}]},{"name":"Spanish Government","award":["BIA2016-76957-C3-1-R"],"award-info":[{"award-number":["BIA2016-76957-C3-1-R"]}]},{"name":"Spanish Government","award":["FPU14\/06329"],"award-info":[{"award-number":["FPU14\/06329"]}]},{"name":"Spanish Government","award":["ISIC\/2012\/004"],"award-info":[{"award-number":["ISIC\/2012\/004"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/taslp.2018.2865615","type":"journal-article","created":{"date-parts":[[2018,8,15]],"date-time":"2018-08-15T18:44:36Z","timestamp":1534358676000},"page":"2381-2392","source":"Crossref","is-referenced-by-count":7,"title":["Adaptive Mid-Term Representations for Robust Audio Event Classification"],"prefix":"10.1109","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2115-0193","authenticated-orcid":false,"given":"Irene","family":"Martin-Morato","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7318-3192","authenticated-orcid":false,"given":"Maximo","family":"Cobos","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1543-3568","authenticated-orcid":false,"given":"Francesc J.","family":"Ferri","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref38","article-title":"A survey of sound source localization methods in wireless acoustic sensor networks","volume":"2017","author":"cobos","year":"2017","journal-title":"Wireless Commun Mobile Comput"},{"key":"ref33","article-title":"Detection of rare sound events.","author":"heittola","year":"0"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2587218"},{"key":"ref31","article-title":"Sound event detection in synthetic audio","author":"heittola","year":"2016"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2016.7738834"},{"key":"ref37","article-title":"Soundnet: Learning sound representations from unlabeled video.","author":"yusuf aytar","year":"2016"},{"key":"ref36","first-page":"1","article-title":"Analysis of data fusion techniques for multi-microphone audio event detection in adverse environments","author":"mart\u00edn-morat\u00f3","year":"2017","journal-title":"Proc IEEE 19th Int Workshop Multimedia Signal Process"},{"key":"ref35","article-title":"Simscene: Simulation of acoustic scenes","author":"lagrange","year":"0"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2017.8169985"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2690575"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2592698"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1121\/1.2750160"},{"key":"ref13","first-page":"892","article-title":"Soundnet: Learning sound representations from unlabeled video","author":"aytar","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2008.2008216"},{"key":"ref15","first-page":"399","author":"petridis","year":"2010","journal-title":"A Multi-class Method for Detecting Audio Events in News Broadcasts"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-08-099388-1.00004-2"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ISSPA.1999.815754"},{"key":"ref18","first-page":"1731","article-title":"Environmental sound classification using hybrid SVM\/KNN classifier and MPEG-7 audio low-level descriptor","author":"wang","year":"2006","journal-title":"Proc IEEE Int Joint Conf Neural Netw"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1121\/1.415968"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1186\/s13636-016-0086-9"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ESPA.2012.6152455"},{"key":"ref27","first-page":"420","author":"aggarwal","year":"0","journal-title":"On the Surprising Behavior of Distance Metrics in High Dimensional Space"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-13326-3_10"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2013.6701847"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/SMAP.2010.5706867"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-540-68585-2","volume":"4625","author":"stiefelhagen","year":"2008","journal-title":"Multimodal Technologies for Perception of Humans"},{"key":"ref8","first-page":"1267","article-title":"Acoustic event detection in real life recordings","author":"mesaros","year":"2010","journal-title":"Proc 18th Eur Signal Process Conf"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472079"},{"key":"ref2","first-page":"180","article-title":"Personalized acoustic interfaces for human-computer interaction","author":"rennies","year":"2010","journal-title":"Proc Human-Centered Des E-Health Technol Concepts Methods Appl"},{"key":"ref9","article-title":"An MFCC-GMM approach for event detection and classification","author":"lode","year":"2013"},{"key":"ref1","first-page":"159","article-title":"Acoustic-based technologies for ambient assisted living","author":"cobos","year":"2016","journal-title":"Introduction to Smart eHealth and eCare Technologies"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1661377"},{"key":"ref22","first-page":"171","author":"sekhar","year":"2002","journal-title":"Recognition of Consonant-Vowel (CV) Units of Speech in a Broadcast News Corpus Using Support Vector Machines"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2004.1422993"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2013.02.042"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1995.479597"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1981.1171161"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8440863\/08437138.pdf?arnumber=8437138","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T16:32:01Z","timestamp":1643214721000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8437138\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":38,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2018.2865615","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,12]]}}}