{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:25:44Z","timestamp":1740122744817,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,1]],"date-time":"2023-09-01T00:00:00Z","timestamp":1693526400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s10772-023-10049-6","type":"journal-article","created":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T09:02:12Z","timestamp":1698310932000},"page":"801-816","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Unsupervised spoken term discovery using pseudo lexical induction"],"prefix":"10.1007","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6066-6889","authenticated-orcid":false,"given":"P.","family":"Sudhakar","sequence":"first","affiliation":[]},{"given":"K.","family":"Sreenivasa Rao","sequence":"additional","affiliation":[]},{"given":"Pabitra","family":"Mitra","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,26]]},"reference":[{"key":"10049_CR1","doi-asserted-by":"crossref","unstructured":"Bhati, S., Nayak, S., & Murty, K. S. R. (2018). Unsupervised segmentation of speech signals using kernel-gram matrices. In Computer vision, pattern recognition, image processing, and graphics (pp. 139\u2013149).","DOI":"10.1007\/978-981-13-0020-2_13"},{"issue":"10","key":"10049_CR2","doi-asserted-by":"publisher","first-page":"10008","DOI":"10.1088\/1742-5468\/2008\/10\/P10008","volume":"2008","author":"VD Blondel","year":"2008","unstructured":"Blondel, V. D., Guillaume, J.-L., Lambiotte, R., & Lefebvre, E. (2008). Fast unfolding of communities in large networks. Journal of Statistical Mechanics: Theory and Experiment, 2008(10), 10008.","journal-title":"Journal of statistical mechanics: theory and experiment"},{"issue":"6","key":"10049_CR3","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.70.066111","volume":"70","author":"A Clauset","year":"2004","unstructured":"Clauset, A., Newman, M. E., & Moore, C. (2004). Finding community structure in very large networks. Physical Review E, 70(6), 066111.","journal-title":"Physical review E"},{"key":"10049_CR4","doi-asserted-by":"crossref","unstructured":"Cui, J., Kingsbury, B., Ramabhadran, B., Sethy, A., Audhkhasi, K., Cui, X., Kislal, E., Mangu, L., Nussbaum-Thom, M., & Picheny, M., Tuske, Z., Golik, P., Schluter, R., Ney, H., Gales, M. J. F., Knill, K., Ragni, A., Wang, H., & Woodland, P. C. (2015). Multilingual representations for low resource speech recognition and keyword search. In 2015 IEEE workshop on automatic speech recognition and understanding (ASRU) (pp. 259\u2013266). IEEE.","DOI":"10.1109\/ASRU.2015.7404803"},{"key":"10049_CR5","doi-asserted-by":"crossref","unstructured":"Dumpala, S. H., Raju Alluri, K. N. R. K., Gangashetty, S. V., & Vuppala, A. K. (2015). Analysis of constraints on segmental dtw for the task of query-by-example spoken term detection. In 2015 Annual IEEE India conference (INDICON) (pp. 1\u20136).","DOI":"10.1109\/INDICON.2015.7443702"},{"key":"10049_CR6","unstructured":"Garofolo, J. S., Lamel, L. F., Fisher, W. M., Fiscus, J. G., & Pallett, D. S. (2020). Darpa timit acoustic-phonetic continous speech corpus. CD-ROM. NIST Speech Disc 1-1.1. NASA STI\/Recon Technical Report No. 93, 27403x."},{"key":"10049_CR7","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., & Schmidhuber, J. (2006). Connectionist temporal classification: Labelling unsegmented sequence data with recurrent neural networks. In Proceedings of the 23rd international conference on machine learning (pp. 369\u2013376).","DOI":"10.1145\/1143844.1143891"},{"key":"10049_CR8","doi-asserted-by":"crossref","unstructured":"Gupta, V., Ajmera, J., Kumar, A., & Verma, A. (2011). A language independent approach to audio search. In 12th Annual conference of the international speech communication association (INTERSPEECH).","DOI":"10.21437\/Interspeech.2011-380"},{"key":"10049_CR9","doi-asserted-by":"crossref","unstructured":"Jansen, A., & Van Durme, B. (2011). Efficient spoken term discovery using randomized algorithms. In 2011 IEEE workshop on automatic speech recognition understanding (ASRU) (pp. 401\u2013406).","DOI":"10.1109\/ASRU.2011.6163965"},{"key":"10049_CR10","doi-asserted-by":"crossref","unstructured":"Kamper, H., Livescu, K., & Goldwater, S. (2017). An embedded segmental k-means model for unsupervised segmentation and clustering of speech. In 2017 IEEE automatic speech recognition and understanding workshop (ASRU) (pp. 719\u2013726).","DOI":"10.1109\/ASRU.2017.8269008"},{"key":"10049_CR11","doi-asserted-by":"crossref","unstructured":"Karthik, P. D., Saranya, M., & Murthy, H. A. (2016). A fast query-by-example spoken term detection for zero resource languages. In 2016 International conference on signal processing and communications (SPCOM) (pp. 1\u20135). IEEE.","DOI":"10.1109\/SPCOM.2016.7746600"},{"key":"10049_CR12","doi-asserted-by":"crossref","unstructured":"Knill, K., Gales, M., Ragni, A., & Rath, S. P. (2014). Language independent and unsupervised acoustic models for speech recognition and keyword spotting. In Proceedings of the annual conference of the international speech communication association, INTERSPEECH (pp. 16\u201320).","DOI":"10.21437\/Interspeech.2014-4"},{"key":"10049_CR13","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/BF00337288","volume":"43","author":"T Kohonen","year":"1982","unstructured":"Kohonen, T. (1982). Self-organized formation of topologically correct feature maps. Biological Cybernetics, 43, 59\u201369.","journal-title":"Biological Cybernetics"},{"key":"10049_CR14","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/BF00317973","volume":"44","author":"T Kohonen","year":"1982","unstructured":"Kohonen, T. (1982). Analysis of a simple self-organizing process. Biological Cybernetics, 44, 135\u2013140.","journal-title":"Biological Cybernetics"},{"key":"10049_CR15","unstructured":"Ludusan, B., Versteegh, M., Jansen, A., Gravier, G., Cao, X.-N., Johnson, M., & Dupoux, E. (2014). Bridging the gap between speech technology and natural language processing: An evaluation toolbox for term discovery systems. In Proceedings of the 9th international conference on language resources and evaluation (LREC\u201914) (pp. 560\u2013567)."},{"key":"10049_CR16","doi-asserted-by":"crossref","unstructured":"Lyzinski, V., Sell, G., & Jansen, A. (2015). An evaluation of graph clustering methods for unsupervised term discovery. In INTERSPEECH 2015 (pp. 3209\u20133213).","DOI":"10.21437\/Interspeech.2015-646"},{"key":"10049_CR17","doi-asserted-by":"crossref","unstructured":"Mantena, G., & Prahallad, K. (2014). Use of articulatory bottle-neck features for query-by-example spoken term detection in low resource scenarios. In 2014 IEEE international conference on acoustics, speech and signal processing (ICASSP) (pp. 7128\u20137132).","DOI":"10.1109\/ICASSP.2014.6854983"},{"issue":"7","key":"10049_CR18","doi-asserted-by":"publisher","first-page":"2031","DOI":"10.1109\/TASL.2012.2194283","volume":"20","author":"A Muscariello","year":"2012","unstructured":"Muscariello, A., Gravier, G., & Bimbot, F. (2012). Unsupervised motif acquisition in speech via seeded discovery and template matching combination. IEEE Transactions on Audio, Speech, and Language Processing, 20(7), 2031\u20132044.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"6","key":"10049_CR19","doi-asserted-by":"publisher","first-page":"623","DOI":"10.1109\/TASSP.1980.1163491","volume":"28","author":"C Myers","year":"1980","unstructured":"Myers, C., Rabiner, L., & Rosenberg, A. (1980). Performance tradeoffs in dynamic time warping algorithms for isolated word recognition. IEEE Transactions on Acoustics, Speech, and Signal Processing, 28(6), 623\u2013635.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"issue":"5","key":"10049_CR20","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevE.70.056131","volume":"70","author":"ME Newman","year":"2004","unstructured":"Newman, M. E. (2004). Analysis of weighted networks. Physical review E, 70(5), 056131.","journal-title":"Physical review E"},{"key":"10049_CR21","doi-asserted-by":"crossref","unstructured":"Oosterveld, B., Veale, R., & Scheutz, M. (2017). A parallelized dynamic programming approach to zero resource spoken term discovery. In 2017 IEEE International conference on acoustics, speech and signal processing (ICASSP) (pp. 5800\u20135804).","DOI":"10.1109\/ICASSP.2017.7953268"},{"key":"10049_CR22","doi-asserted-by":"crossref","unstructured":"Park, A., & Glass, J. R. (2005). Towards unsupervised pattern discovery in speech. In IEEE workshop on automatic speech recognition and understanding (pp. 53\u201358).","DOI":"10.1109\/ASRU.2005.1566529"},{"key":"10049_CR23","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.909282","author":"AS Park","year":"2007","unstructured":"Park, A. S., & Glass, J. R. (2007). Unsupervised pattern discovery in speech. IEEE Transactions on Audio, Speech, and Language Processing. https:\/\/doi.org\/10.1109\/TASL.2007.909282","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"1","key":"10049_CR24","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1109\/TASL.2007.909282","volume":"16","author":"AS Park","year":"2008","unstructured":"Park, A. S., & Glass, J. R. (2008). Unsupervised pattern discovery in speech. IEEE Transactions on Audio, Speech, and Language Processing, 16(1), 186\u2013197.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"key":"10049_CR25","doi-asserted-by":"crossref","unstructured":"Rakthanmanon, T., Campana, B., Mueen, A., Batista, G., Westover, B., Zhu, Q., Zakaria, J., & Keogh, E. (2012). Searching and mining trillions of time series subsequences under dynamic time warping. In Proceedings of the 18th ACM SIGKDD international conference on knowledge discovery and data mining (pp. 262\u2013270).","DOI":"10.1145\/2339530.2339576"},{"key":"10049_CR26","doi-asserted-by":"crossref","unstructured":"Ram, D., Miculicich, L., & Bourlard, H. (2019). Multilingual bottleneck features for query by example spoken term detection. In 2019 IEEE automatic speech recognition and understanding workshop (ASRU) (pp. 621\u2013628). IEEE.","DOI":"10.1109\/ASRU46091.2019.9003752"},{"key":"10049_CR27","doi-asserted-by":"crossref","unstructured":"R\u00e4s\u00e4nen, O., & Bland\u00f3n, M. A. C. (2020). Unsupervised discovery of recurring speech patterns using probabilistic adaptive metrics. Arxiv Preprint. arXiv: 2008.00731","DOI":"10.21437\/Interspeech.2020-1738"},{"key":"10049_CR28","doi-asserted-by":"crossref","unstructured":"R\u00e4s\u00e4nen, O., Doyle, G., & Frank, M. C. (2015). Unsupervised word discovery from speech using automatic segmentation into syllable-like units. In INTERSPEECH 2015 (pp. 3204\u20133208).","DOI":"10.21437\/Interspeech.2015-645"},{"key":"10049_CR29","doi-asserted-by":"crossref","unstructured":"Schatz, T., Bach, F., & Dupoux, E. (2018). Evaluating automatic speech recognition systems as quantitative models of cross-lingual phonetic category perception. Journal of the Acoustical Society of America, 143(5), 373\u2013377.","DOI":"10.1121\/1.5037615"},{"issue":"1","key":"10049_CR30","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1145\/230514.571645","volume":"27","author":"M Sipser","year":"1996","unstructured":"Sipser, M. (1996). Introduction to the theory of computation. ACM Sigact News, 27(1), 27\u201329.","journal-title":"ACM Sigact News"},{"key":"10049_CR31","unstructured":"Sung, M., & Lee, T. (2020). Unsupervised spoken term discovery based on re-clustering of hypothesized speech segments with siamese and triplet networks. CoRR. arXiv:abs\/2011.14062"},{"key":"10049_CR32","doi-asserted-by":"crossref","unstructured":"Thual, A., Dancette, C., Karadayi, J., Benjumea, J., & Dupoux, E. (2018). A k-nearest neighbours approach to unsupervised spoken term discovery. In 2018 IEEE spoken language technology workshop (SLT) (pp. 491\u2013497). IEEE.","DOI":"10.1109\/SLT.2018.8639515"},{"key":"10049_CR33","doi-asserted-by":"crossref","unstructured":"Thual, A., Dancette, C., Karadayi, J., Benjumea, J., & Dupoux, E. (2018). A k-nearest neighbours approach to unsupervised spoken term discovery. In 2018 IEEE spoken language technology workshop (SLT) (pp. 491\u2013497).","DOI":"10.1109\/SLT.2018.8639515"},{"key":"10049_CR34","doi-asserted-by":"crossref","unstructured":"Versteegh, M., Thiolli\u2018ere, R., Schatz, T., Cao, X.N., Anguera, X., Jansen, A., & Dupoux, E. (2015). The zero resource speech challenge. In INTERSPEECH-2015 (pp. 3169\u20133173).","DOI":"10.21437\/Interspeech.2015-638"},{"key":"10049_CR35","doi-asserted-by":"crossref","unstructured":"White, J., Oard, D., Jansen, A., Paik, J., & Sankepally, R. (2015). Using zero-resource spoken term discovery for ranked retrieval. In Proceedings of the 2015 conference of the North American chapter of the association for computational linguistics: Human language technologies (pp. 588\u2013597).","DOI":"10.3115\/v1\/N15-1061"},{"key":"10049_CR36","first-page":"1988","volume":"28","author":"Y Yuan","year":"2020","unstructured":"Yuan, Y., Xie, L., Leung, C.-C., Chen, H., & Ma, B. (2020). Fast query-by-example speech search using attention-based deep binary embeddings. IEEE\/ACM Transactions on Audio, Speech, and Language Processing, 28, 1988\u20132000.","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10049_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, Y., & Glass, J. R. (2010). Towards multi-speaker unsupervised speech pattern discovery. In 2010 IEEE international conference on acoustics, speech and signal processing (pp. 4366\u20134369).","DOI":"10.1109\/ICASSP.2010.5495637"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10049-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-023-10049-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-023-10049-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T14:12:03Z","timestamp":1699625523000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-023-10049-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9]]},"references-count":37,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["10049"],"URL":"https:\/\/doi.org\/10.1007\/s10772-023-10049-6","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2023,9]]},"assertion":[{"value":"9 January 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 September 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 October 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}