{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:37:14Z","timestamp":1753601834394,"version":"3.41.2"},"reference-count":50,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2018,9,15]],"date-time":"2018-09-15T00:00:00Z","timestamp":1536969600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J AUDIO SPEECH MUSIC PROC."],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1186\/s13636-018-0137-5","type":"journal-article","created":{"date-parts":[[2018,9,16]],"date-time":"2018-09-16T03:47:07Z","timestamp":1537069627000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["AudioPairBank: towards a large-scale tag-pair-based audio content analysis"],"prefix":"10.1186","volume":"2018","author":[{"given":"Sebastian","family":"S\u00e4ger","sequence":"first","affiliation":[]},{"given":"Benjamin","family":"Elizalde","sequence":"additional","affiliation":[]},{"given":"Damian","family":"Borth","sequence":"additional","affiliation":[]},{"given":"Christian","family":"Schulze","sequence":"additional","affiliation":[]},{"given":"Bhiksha","family":"Raj","sequence":"additional","affiliation":[]},{"given":"Ian","family":"Lane","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,9,15]]},"reference":[{"key":"137_CR1","unstructured":"P. Sch\u00e4uble, Multimedia information retrieval: content-based information retrieval from large text and audio databases, vol. 397 (Springer Science & Business Media, 2012)."},{"key":"137_CR2","unstructured":"P. Natarajan, P. Natarajan, S. Wu, X. Zhuang, A. Vazquez Reina, S. N. Vitaladevuni, K. Tsourides, C. Andersen, R. Prasad, G. Ye, D. Liu, S. -F. Chang, I. Saleemi, M. Shah, Y. Ng, B. White, L. Davis, A. Gupta, I. Haritaoglu, in Proceedings of TRECVID 2012. BBN VISER TRECVID 2012 multimedia event detection and multimedia event recounting systems (NISTUSA, 2012)."},{"key":"137_CR3","unstructured":"Z. Lan, L. Jiang, S. -I. Yu, C. Gao, S. Rawat, Y. Cai, S. Xu, H. Shen, X. Li, Y. Wang, W. Sze, Y. Yan, Z. Ma, N. Ballas, D. Meng, W. Tong, Y. Yang, S. Burger, F. Metze, R. Singh, B. Raj, R. Stern, T. Mitamura, E. Nyberg, A. Hauptmann, in Proceedings of TRECVID 2013. Informedia @ TRECVID 2013 (NISTUSA, 2013)."},{"key":"137_CR4","unstructured":"H. Cheng, J. Liu, S. Ali, O. Javed, Q. Yu, A. Tamrakar, A. Divakaran, H. S. Sawhney, R. Manmatha, J. Allan, et al., in Proceedings of TRECVID. Sri-sarnoff aurora system at trecvid 2012: Multimedia event detection and recounting, (2012)."},{"key":"137_CR5","doi-asserted-by":"crossref","unstructured":"J. Maxime, X. Alameda-Pineda, L. Girin, R. Horaud, in 2014 IEEE International Conference on Robotics and Automation (ICRA). Sound representation and classification benchmark for domestic robots (IEEE, 2014), pp. 6285\u20136292.","DOI":"10.1109\/ICRA.2014.6907786"},{"key":"137_CR6","doi-asserted-by":"crossref","unstructured":"M. Janvier, X. Alameda-Pineda, L. Girinz, R. Horaud, in 2012 12th IEEE-RAS International Conference on Humanoid Robots (Humanoids 2012). Sound-event recognition with a companion humanoid (IEEE, 2012), pp. 104\u2013111.","DOI":"10.1109\/HUMANOIDS.2012.6651506"},{"key":"137_CR7","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/192426.192443","volume-title":"Proceedings of the 7th Annual ACM Symposium on User Interface Software and Technology, UIST \u201994","author":"W. K. Edwards","year":"1994","unstructured":"W. K. Edwards, E. D. Mynatt, in Proceedings of the 7th Annual ACM Symposium on User Interface Software and Technology, UIST \u201994. An architecture for transforming graphical interfaces (ACMNew York, 1994), pp. 39\u201347. https:\/\/doi.org\/10.1145\/192426.192443 ."},{"key":"137_CR8","doi-asserted-by":"publisher","first-page":"1128","DOI":"10.1109\/EUSIPCO.2016.7760424","volume-title":"Signal Processing Conference (EUSIPCO), 2016 24th European","author":"A. Mesaros","year":"2016","unstructured":"A. Mesaros, T. Heittola, T. Virtanen, in Signal Processing Conference (EUSIPCO), 2016 24th European. TUT database for acoustic scene classification and sound event detection (IEEEBudapest, 2016), pp. 1128\u20131132."},{"key":"137_CR9","doi-asserted-by":"crossref","unstructured":"J. Salamon, C. Jacoby, J. P. Bello, in Proceedings of the 22nd ACM international conference on Multimedia. A dataset and taxonomy for urban sound research (Orlando, 2014), pp. 1041\u20131044.","DOI":"10.1145\/2647868.2655045"},{"issue":"1","key":"137_CR10","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1121\/1.4807800","volume":"134","author":"M. Yang","year":"2013","unstructured":"M. Yang, J. Kang, Psychoacoustical evaluation of natural and urban sounds in soundscapes. J. Acoust. Soc. Am. 134(1), 840\u2013851 (2013).","journal-title":"J. Acoust. Soc. Am"},{"key":"137_CR11","unstructured":"K. Hiramatsu, K. Minoura, in Proc. Internoise. Response to urban sounds in relation to the residents\u2019 connection with the sound sources (Societ\u00e9 Fran\u00e7aise d\u2019Acoustique. CD-Rom. Niza (Francia), 2000)."},{"key":"137_CR12","unstructured":"D. Giannoulis, E. Benetos, D. Stowell, M. Rossignol, M. Lagrange, M. D. Plumbley, Detection and classification of acoustic scenes and events: an IEEE AASP challenge."},{"key":"137_CR13","unstructured":"K. J. Piczak, in Proceedings of the 23rd Annual ACM Conference on Multimedia Conference, MM \u201915, Brisbane, Australia, October 26 - 30. ESC: dataset for environmental sound classification, (2015)."},{"key":"137_CR14","doi-asserted-by":"crossref","unstructured":"J. Salamon, C. Jacoby, J. P. Bello, in Proceedings of the 22nd ACM international conference on Multimedia. A dataset and taxonomy for urban sound research (Orlando, 2014), pp. 1041\u20131044.","DOI":"10.1145\/2647868.2655045"},{"key":"137_CR15","doi-asserted-by":"publisher","first-page":"776","DOI":"10.1109\/ICASSP.2017.7952261","volume-title":"Acoustics, Speech and Signal Processing (ICASSP), 2017 IEEE International Conference on","author":"J. F. Gemmeke","year":"2017","unstructured":"J. F. Gemmeke, D. P. W. Ellis, D. Freedman, A. Jansen, W. Lawrence, R. C. Moore, M. Plakal, M. Ritter, in Acoustics, Speech and Signal Processing (ICASSP), 2017 IEEE International Conference on. Audio set: an ontology and human-labeled dataset for audio events (IEEENew Orleans, 2017), pp. 776\u2013780."},{"key":"137_CR16","unstructured":"A. Mesaros, T. Heittola, A. Diment, B. Elizalde, A. Shah, E. Vincent, B. Raj, T. Virtanen, in Proceedings of the Detection and Classification of Acoustic Scenes and Events 2017 Workshop (DCASE2017). DCASE 2017 challenge setup: tasks, datasets and baseline system, (2017)."},{"issue":"3","key":"137_CR17","doi-asserted-by":"publisher","first-page":"1694","DOI":"10.1121\/1.4977749","volume":"141","author":"S. Ntalampiras","year":"2017","unstructured":"S. Ntalampiras, A transfer learning framework for predicting the emotional content of generalized sound events. J. Acoust. Soc. Am.141(3), 1694\u20131701 (2017).","journal-title":"J. Acoust. Soc. Am."},{"issue":"1","key":"137_CR18","doi-asserted-by":"publisher","first-page":"315","DOI":"10.3758\/BRM.40.1.315","volume":"40","author":"R. A. Stevenson","year":"2008","unstructured":"R. A. Stevenson, T. W. James, Affective auditory stimuli: characterization of the International Affective Digitized Sounds (IADS) by discrete emotional categories. Behav. Res. Methods. 40(1), 315\u2013321 (2008).","journal-title":"Behav. Res. Methods"},{"issue":"10","key":"137_CR19","doi-asserted-by":"publisher","first-page":"1854","DOI":"10.1109\/TASLP.2016.2587218","volume":"24","author":"G. Lafay","year":"2016","unstructured":"G. Lafay, M. Lagrange, M. Rossignol, E. Benetos, A. Roebel, A morphological model for simulating acoustic scenes and its application to sound event detection. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(10), 1854\u20131864 (2016).","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process"},{"key":"137_CR20","doi-asserted-by":"publisher","first-page":"96","DOI":"10.1016\/j.neubiorev.2016.05.002","volume":"68","author":"S. Fr\u00fchholz","year":"2016","unstructured":"S. Fr\u00fchholz, W. Trost, S. A. Kotz, The sound of emotions\u2014towards a unifying neural network perspective of affective sound processing. Neurosci. Biobehav. Rev. 68:, 96\u2013110 (2016).","journal-title":"Neurosci. Biobehav. Rev"},{"key":"137_CR21","first-page":"38","volume-title":"Human\u2013Computer Interaction","author":"A. Darvishi","year":"1995","unstructured":"A. Darvishi, E. Munteanu, V. Guggiana, H. Schauer, M. Motavalli, M. Rauterberg, in Human\u2013Computer Interaction. Designing environmental sounds based on the results of interaction between objects in the real world (SpringerBoston, 1995), pp. 38\u201342."},{"key":"137_CR22","unstructured":"R. M. Schafer, The soundscape: our sonic environment and the tuning of the world (Inner Traditions\/Bear, 1993)."},{"issue":"3","key":"137_CR23","doi-asserted-by":"publisher","first-page":"1252","DOI":"10.1121\/1.1635840","volume":"115","author":"B. Gygi","year":"2004","unstructured":"B. Gygi, G. R. Kidd, C. S. Watson, Spectral-temporal factors in the identification of environmental sounds. J. Acoust. Soc. Am. 115(3), 1252\u20131265 (2004).","journal-title":"J. Acoust. Soc. Am"},{"issue":"1","key":"137_CR24","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1177\/0013916587191005","volume":"19","author":"J. A. Ballas","year":"1987","unstructured":"J. A. Ballas, J. H. Howard Jr, Interpreting the language of environmental sounds. Environ. Behav. 19(1), 91\u2013114 (1987).","journal-title":"Environ. Behav"},{"issue":"6","key":"137_CR25","first-page":"865","volume":"92","author":"D. Dubois","year":"2006","unstructured":"D. Dubois, C. Guastavino, M. Raimbault, A cognitive approach to urban soundscapes: using verbal data to access everyday life auditory categories. Acta Acustica U. Acustica. 92(6), 865\u2013874 (2006).","journal-title":"Acta Acustica U. Acustica"},{"key":"137_CR26","doi-asserted-by":"crossref","unstructured":"A. Owens, P. Isola, J. McDermott, A. Torralba, E. H. Adelson, W. T. Freeman, in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. Visually indicated sounds, (2016).","DOI":"10.1109\/CVPR.2016.264"},{"key":"137_CR27","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1145\/2502081.2502282","volume-title":"Proceedings of the 21st ACM International Conference on Multimedia","author":"D. Borth","year":"2013","unstructured":"D. Borth, R. Ji, T. Chen, T. Breuel, S. -F. Chang, in Proceedings of the 21st ACM International Conference on Multimedia. Large-scale visual sentiment ontology and detectors using adjective noun pairs, MM \u201913 (ACMNew York, 2013), pp. 223\u2013232."},{"key":"137_CR28","unstructured":"T. Chen, D. Borth, T. Darrell, S. -F. Chang, Deepsentibank: visual sentiment concept classification with deep convolutional neural networks. arXiv preprint arXiv:1410.8586 (2014)."},{"issue":"6","key":"137_CR29","doi-asserted-by":"publisher","first-page":"633","DOI":"10.1016\/j.cviu.2013.01.013","volume":"117","author":"J. M. Chaquet","year":"2013","unstructured":"J. M. Chaquet, E. J. Carmona, A. Fern\u00e1ndez-Caballero, A survey of video datasets for human action and activity recognition. Comp. Vision Image Underst. 117(6), 633\u2013659 (2013).","journal-title":"Comp. Vision Image Underst"},{"issue":"6","key":"137_CR30","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1016\/j.imavis.2009.11.014","volume":"28","author":"R. Poppe","year":"2010","unstructured":"R. Poppe, A survey on vision-based human action recognition. Image Vis. Comput. 28(6), 976\u2013990 (2010).","journal-title":"Image Vis. Comput"},{"key":"137_CR31","unstructured":"S. Baccianella, A. Esuli, F. Sebastiani, in Lrec, 10. Sentiwordnet 3.0: An enhanced lexical resource for sentiment analysis and opinion mining, (2010), pp. 2200\u20132204."},{"issue":"1","key":"137_CR32","first-page":"35","volume":"9","author":"J. Zhong","year":"2012","unstructured":"J. Zhong, Y. Cheng, S. Yang, L. Wen, Music sentiment classification integrating audio with lyrics. J. Inf. Comput. Sci.9(1), 35\u201344 (2012).","journal-title":"J. Inf. Comput. Sci."},{"issue":"4","key":"137_CR33","doi-asserted-by":"publisher","first-page":"621","DOI":"10.1145\/234782.234805","volume":"27","author":"R. W. Picard","year":"1995","unstructured":"R. W. Picard, Computer learning of subjectivity. ACM Comput. Surv. (CSUR). 27(4), 621\u2013623 (1995).","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"137_CR34","doi-asserted-by":"publisher","first-page":"1349","DOI":"10.1145\/2733373.2806415","volume-title":"Proceedings of the 23rd ACM International Conference on Multimedia","author":"M. Soleymani","year":"2015","unstructured":"M. Soleymani, Y. -H. Yang, Y. -G. Jiang, S. -F. Chang, in Proceedings of the 23rd ACM International Conference on Multimedia. Asm\u201915: The 1st international workshop on affect and sentiment in multimedia (ACMNew York, 2015), pp. 1349\u20131349."},{"key":"137_CR35","unstructured":"S. Sager, Audiopairbank - large-scale vocabulary for audio concepts and detectors, Master\u2019s thesis (Technische Universitat KaisersLautern, 2016)."},{"issue":"2","key":"137_CR36","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.apacoust.2012.05.010","volume":"74","author":"W. Davies","year":"2013","unstructured":"W. Davies, M. Adams, N. Bruce, R. Cain, A. Carlyle, P. Cusack, D. Hall, K. Hume, A. Irwin, P. Jennings, M. Marselle, C. Plack, J. Poxon, Perception of soundscapes: An interdisciplinary approach. Appl. Acoust. 74(2), 224\u2013231 (2013).","journal-title":"Appl. Acoust"},{"issue":"5","key":"137_CR37","doi-asserted-by":"publisher","first-page":"2836","DOI":"10.1121\/1.3493436","volume":"128","author":"\u00d6. Axelsson","year":"2010","unstructured":"\u00d6. Axelsson, M. E. Nilsson, B. Berglund, A principal components model of soundscape perception. J. Acoust. Soc. Am. 128(5), 2836\u20132846 (2010).","journal-title":"J. Acoust. Soc. Am"},{"key":"137_CR38","unstructured":"D. Stowell, M. Plumbley, in Audio Engineering Society Conference: 53rd International Conference: Semantic Audio. An open dataset for research on audio field recording archives: freefield1010, (2014)."},{"issue":"2","key":"137_CR39","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/2812802","volume":"59","author":"B. Thomee","year":"2016","unstructured":"B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni, D. Poland, D. Borth, L. -J. Li, Yfcc100m: The new data in multimedia research. Commun. ACM. 59(2), 64\u201373 (2016).","journal-title":"Commun. ACM"},{"key":"137_CR40","doi-asserted-by":"crossref","unstructured":"K. J. Piczak, in IEEE 25th International Workshop on Machine Learning for Signal Processing (MLSP). Environmental sound classification with convolutional neural networks (IEEE, 2015).","DOI":"10.1109\/MLSP.2015.7324337"},{"key":"137_CR41","doi-asserted-by":"crossref","unstructured":"H. Lei, J. Choi, A. Janin, G. Friedland, in 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). User verification: matching the uploaders of videos across accounts (IEEE, 2011), pp. 2404\u20132407.","DOI":"10.1109\/ICASSP.2011.5946968"},{"issue":"6","key":"137_CR42","doi-asserted-by":"publisher","first-page":"1142","DOI":"10.1109\/TASL.2009.2017438","volume":"17","author":"S. Chu","year":"2009","unstructured":"S. Chu, S. Narayanan, C. C. J. Kuo, Environmental sound recognition with time-frequency audio features. IEEE Trans. Audio Speech Lang. Process. 17(6), 1142\u20131158 (2009).","journal-title":"IEEE Trans. Audio Speech Lang. Process"},{"key":"137_CR43","unstructured":"B. Mathieu, S. Essid, T. Fillon, J. Prado, G. Richard, in Proceedings of the 11th International Society for Music Information Retrieval Conference. Yaafe, an easy to use and efficient audio feature extraction software (Utrecht, 2010)."},{"key":"137_CR44","doi-asserted-by":"crossref","unstructured":"F. Metze, S. Rawat, Y. Wang, in Multimedia and Expo (ICME), 2014 IEEE International Conference on. Improved audio features for large-scale multimedia event detection (IEEE, 2014), pp. 1\u20136.","DOI":"10.1109\/ICME.2014.6890234"},{"key":"137_CR45","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1145\/1178677.1178722","volume-title":"MIR \u201906: Proceedings of the 8th ACM International Workshop on Multimedia Information Retrieval","author":"A. F. Smeaton","year":"2006","unstructured":"A. F. Smeaton, P. Over, W. Kraaij, in MIR \u201906: Proceedings of the 8th ACM International Workshop on Multimedia Information Retrieval. Evaluation campaigns and TRECVid (Association for Computing MachineryNew York, 2006), pp. 321\u2013330."},{"key":"137_CR46","first-page":"2825","volume":"12","author":"F. Pedregosa","year":"2011","unstructured":"F. Pedregosa, G. Varoquaux, A. Gramfort, V. Michel, B. Thirion, O. Grisel, M. Blondel, et al., Scikit-learn: machine learning in Python. J. Mach. Learn. Res. 12:, 2825\u20132830 (2011).","journal-title":"J. Mach. Learn. Res"},{"key":"137_CR47","doi-asserted-by":"crossref","unstructured":"S. Hershey, S. Chaudhuri, D. P. Ellis, J. F. Gemmeke, A. Jansen, R. C. Moore, M. Plakal, D. Platt, R. A. Saurous, B. Seybold, et al., in Acoustics, Speech and Signal Processing (ICASSP), 2017 IEEE International Conference On. CNN architectures for large-scale audio classification (IEEE, 2017), pp. 131\u2013135.","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"137_CR48","unstructured":"A. Neviarouskaya, H. Prendinger, M. Ishizuka, in 2009 3rd International Conference on Affective Computing and Intelligent Interaction and Workshops. Sentiful: generating a reliable lexicon for sentiment analysis, (2009), pp. 1\u20136."},{"key":"137_CR49","doi-asserted-by":"crossref","unstructured":"A. Datta, M. Shah, N. D. V. Lobo, in Pattern Recognition, 2002. Proceedings. 16th International Conference On, vol 1. Person-on-person violence detection in video data (IEEE, 2002), pp. 433\u2013438.","DOI":"10.1109\/ICPR.2002.1044748"},{"issue":"6","key":"137_CR50","first-page":"945","volume":"92","author":"C. Guastavino","year":"2006","unstructured":"C. Guastavino, The ideal urban soundscape: investigating the sound quality of french cities. Acta Acustica U. Acustica. 92(6), 945\u2013951 (2006).","journal-title":"Acta Acustica U. Acustica"}],"container-title":["EURASIP Journal on Audio, Speech, and Music Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-018-0137-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1186\/s13636-018-0137-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1186\/s13636-018-0137-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,7]],"date-time":"2025-07-07T11:20:21Z","timestamp":1751887221000},"score":1,"resource":{"primary":{"URL":"https:\/\/asmp-eurasipjournals.springeropen.com\/articles\/10.1186\/s13636-018-0137-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,9,15]]},"references-count":50,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,12]]}},"alternative-id":["137"],"URL":"https:\/\/doi.org\/10.1186\/s13636-018-0137-5","relation":{},"ISSN":["1687-4722"],"issn-type":[{"type":"electronic","value":"1687-4722"}],"subject":[],"published":{"date-parts":[[2018,9,15]]},"assertion":[{"value":"24 January 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 August 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 September 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare that they have no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Publisher\u2019s Note"}}],"article-number":"12"}}