{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T05:36:21Z","timestamp":1775108181219,"version":"3.50.1"},"reference-count":76,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T00:00:00Z","timestamp":1775001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001501","name":"University Grants Commission","doi-asserted-by":"publisher","award":["220520074950"],"award-info":[{"award-number":["220520074950"]}],"id":[{"id":"10.13039\/501100001501","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Speech Communication"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1016\/j.specom.2026.103368","type":"journal-article","created":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T16:23:00Z","timestamp":1771604580000},"page":"103368","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Object detection for cross-linguistic vowel analysis: A novel language-agnostic method for forensic speech processing"],"prefix":"10.1016","volume":"179","author":[{"given":"Soham","family":"Gangopadhyay","sequence":"first","affiliation":[]},{"given":"Inderpreet","family":"Singh","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7257-3754","authenticated-orcid":false,"given":"Prateek","family":"Pandya","sequence":"additional","affiliation":[]},{"given":"Ashish","family":"Mani","sequence":"additional","affiliation":[]},{"given":"Sumit","family":"Goswami","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"4","key":"10.1016\/j.specom.2026.103368_b1","doi-asserted-by":"crossref","first-page":"1205","DOI":"10.3390\/s21041205","article-title":"Deep learning-based detection of articulatory features in arabic and english speech","volume":"21","author":"Algabri","year":"2021","journal-title":"Sensors"},{"key":"10.1016\/j.specom.2026.103368_b2","doi-asserted-by":"crossref","first-page":"54663","DOI":"10.1109\/ACCESS.2020.2980452","article-title":"Towards deep object detection techniques for phoneme recognition","volume":"8","author":"Algabri","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.specom.2026.103368_b3","series-title":"On the Theory and Practice of Voice Identification","year":"1979"},{"key":"10.1016\/j.specom.2026.103368_b4","series-title":"Introduction to Speech Processing: 2nd Edition","author":"B\u00e4ckstr\u00f6m","year":"2022"},{"issue":"4","key":"10.1016\/j.specom.2026.103368_b5","doi-asserted-by":"crossref","first-page":"591","DOI":"10.1006\/jpho.2002.0177","article-title":"Language-specific patterns of vowel-to-vowel coarticulation: Acoustic structures and their perceptual correlates","volume":"30","author":"Beddor","year":"2002","journal-title":"J. Phon."},{"key":"10.1016\/j.specom.2026.103368_b6","series-title":"Praat: Doing phonetics by computer","author":"Boersma","year":"2025"},{"issue":"2","key":"10.1016\/j.specom.2026.103368_b7","doi-asserted-by":"crossref","first-page":"597","DOI":"10.1121\/1.1911935","article-title":"Speaker identification by speech spectrograms: A scientists\u2019 view of its reliability for legal purposes","volume":"47","author":"Bolt","year":"1970","journal-title":"J. Acoust. Soc. Am."},{"issue":"2","key":"10.1016\/j.specom.2026.103368_b8","doi-asserted-by":"crossref","first-page":"1973","DOI":"10.1007\/s13369-022-07086-9","article-title":"Accent recognition using a spectrogram image feature-based convolutional neural network","volume":"48","author":"Cetin","year":"2023","journal-title":"Arab. J. Sci. Eng."},{"key":"10.1016\/j.specom.2026.103368_b9","first-page":"62","article-title":"Understanding kiswahili vowels","volume":"2","author":"Choge","year":"2009","journal-title":"J. Pan-African Stud."},{"issue":"2","key":"10.1016\/j.specom.2026.103368_b10","doi-asserted-by":"crossref","first-page":"167","DOI":"10.1016\/j.wocn.2009.08.004","article-title":"Unmasking the acoustic effects of vowel-to-vowel coarticulation: A statistical modeling approach","volume":"38","author":"Cole","year":"2010","journal-title":"J. Phon."},{"key":"10.1016\/j.specom.2026.103368_b11","series-title":"Acoustic structure of consonants","author":"Coleman","year":"2015"},{"key":"10.1016\/j.specom.2026.103368_b12","series-title":"Phonologies of Asia and Africa","first-page":"841","article-title":"Swahili phonology","author":"Contini-Morava","year":"1997"},{"key":"10.1016\/j.specom.2026.103368_b13","unstructured":"Csap\u00f3, T.G., Gr\u00f3sz, T., Mark\u00f3, A., Feld, B., Lulich, S.M., 2009. Patterns of Hungarian Vowel Production and Perception with Regard to Subglottal Resonances. In: Proceedings of Interspeech. pp. 2999\u20133002."},{"issue":"1\u20134","key":"10.1016\/j.specom.2026.103368_b14","first-page":"183","article-title":"A comparison of syllable length conditioning among languages","volume":"4","author":"Delattre","year":"1966","journal-title":"Int. Rev. Appl. Linguist. Lang. Teach."},{"key":"10.1016\/j.specom.2026.103368_b15","series-title":"Working Procedure Manual: Speaker Identification & Audio-Video Analysis","author":"Directorate of Forensic Science Services","year":"2022"},{"key":"10.1016\/j.specom.2026.103368_b16","series-title":"Methodological Guidelines for Best Practice in Forensic Semiautomatic and Automatic Speaker Recognition","author":"Drygajlo","year":"2015"},{"key":"10.1016\/j.specom.2026.103368_b17","series-title":"Best Practice Manual for the Methodology of Forensic Speaker Comparison","author":"ENFSI Forensic Speech and Audio Analysis Working Group","year":"2021"},{"key":"10.1016\/j.specom.2026.103368_b18","series-title":"Acoustic Theory of Speech Production","author":"Fant","year":"1960"},{"issue":"6","key":"10.1016\/j.specom.2026.103368_b19","doi-asserted-by":"crossref","first-page":"3623","DOI":"10.1121\/1.409931","article-title":"Auditory and categorical effects on cross-language vowel perception","volume":"95","author":"Flege","year":"1994","journal-title":"J. Acoust. Soc. Am."},{"key":"10.1016\/j.specom.2026.103368_b20","article-title":"Domain generalization for language-independent automatic speech recognition","volume":"Volume 5 - 2022","author":"Gao","year":"2022","journal-title":"Front. Artif. Intell."},{"issue":"10","key":"10.1016\/j.specom.2026.103368_b21","doi-asserted-by":"crossref","first-page":"2342","DOI":"10.3390\/electronics12102342","article-title":"Two-tier feature extraction with metaheuristics-based automated forensic speaker verification model","volume":"12","author":"Gaurav","year":"2023","journal-title":"Electronics"},{"issue":"2","key":"10.1016\/j.specom.2026.103368_b22","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1558\/ijsll.v18i2.293","article-title":"International practices in forensic speaker comparison","volume":"18","author":"Gold","year":"2011","journal-title":"Int. J. Speech Lang. Law"},{"key":"10.1016\/j.specom.2026.103368_b23","series-title":"Approaches To Hungarian","first-page":"29","article-title":"Hungarian vowel acoustics","volume":"Vol. 14","author":"G\u00f3sy","year":"2015"},{"key":"10.1016\/j.specom.2026.103368_b24","article-title":"Forensic speaker and gender identification from voice samples recorded through mobile phones and social media applications: A statistical and machine learning approach","volume":"223","author":"Gouri","year":"2024","journal-title":"Appl. Acoust."},{"issue":"4","key":"10.1016\/j.specom.2026.103368_b25","doi-asserted-by":"crossref","first-page":"822","DOI":"10.1353\/lan.0.0169","article-title":"Natural and unnatural constraints in Hungarian vowel harmony","volume":"85","author":"Hayes","year":"2009","journal-title":"Language"},{"issue":"5","key":"10.1016\/j.specom.2026.103368_b26","doi-asserted-by":"crossref","first-page":"3099","DOI":"10.1121\/1.411872","article-title":"Acoustic characteristics of American English vowels","volume":"97","author":"Hillenbrand","year":"1995","journal-title":"J. Acoust. Soc. Am."},{"key":"10.1016\/j.specom.2026.103368_b27","series-title":"The Bantu Languages","first-page":"42","article-title":"Bantu segmental phonology","author":"Hyman","year":"1999"},{"issue":"1","key":"10.1016\/j.specom.2026.103368_b28","doi-asserted-by":"crossref","DOI":"10.1080\/08839514.2025.2459476","article-title":"Spectrogram features-based automatic speaker identification for smart services","volume":"39","author":"Jahangir","year":"2025","journal-title":"Appl. Artif. Intell."},{"key":"10.1016\/j.specom.2026.103368_b29","series-title":"Artificial Intelligence and Speech Technology","first-page":"311","article-title":"Explaining spectrograms in machine learning: A study on neural networks for speech classification","volume":"Vol. 2267","author":"James","year":"2025"},{"issue":"4","key":"10.1016\/j.specom.2026.103368_b30","doi-asserted-by":"crossref","first-page":"1749","DOI":"10.1007\/s40747-020-00172-1","article-title":"Speaker recognition based on characteristic spectrograms and an improved self-organizing feature map neural network","volume":"7","author":"Jia","year":"2021","journal-title":"Complex Intell. Syst."},{"key":"10.1016\/j.specom.2026.103368_b31","series-title":"Ultralytics YOLOv5","author":"Jocher","year":"2020"},{"key":"10.1016\/j.specom.2026.103368_b32","series-title":"2021 IEEE International Conference on Smart Information Systems and Technologies","first-page":"1","article-title":"Speaker recognition from spectrogram images","author":"Kadyrov","year":"2021"},{"key":"10.1016\/j.specom.2026.103368_b33","article-title":"The voiceprint technique: Its structure and reliability","volume":"6","author":"Kamine","year":"1969","journal-title":"San Diego L. Rev."},{"issue":"1\u20132","key":"10.1016\/j.specom.2026.103368_b34","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1177\/002383099203500207","article-title":"An acoustical basis for universal phonotactic constraints","volume":"35","author":"Kawasaki-Fukumori","year":"1992","journal-title":"Lang. Speech"},{"key":"10.1016\/j.specom.2026.103368_b35","series-title":"Proceedings of the 16th Annual Conference of the International Speech Communication Association","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"Ko","year":"2015"},{"key":"10.1016\/j.specom.2026.103368_b36","unstructured":"Kominek, J., Black, A.W., 2004. The CMU Arctic speech databases. In: 5th ISCA Workshop on Speech Synthesis. SSW 5, pp. 223\u2013224."},{"key":"10.1016\/j.specom.2026.103368_b37","series-title":"Vowels and Consonants","author":"Ladefoged","year":"2012"},{"key":"10.1016\/j.specom.2026.103368_b38","series-title":"The Voiceprint Mystique","first-page":"126","author":"Ladefoged","year":"1967"},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b39","first-page":"350","article-title":"Mora-timed, stress-timed, and syllable-timed rhythm classes: Clues in English speech production by bilingual speakers","volume":"68","author":"Liu","year":"2021","journal-title":"Acta Linguist. Acad."},{"issue":"4","key":"10.1016\/j.specom.2026.103368_b40","doi-asserted-by":"crossref","first-page":"312","DOI":"10.1016\/j.specom.2007.10.005","article-title":"An investigation of dependencies between frequency components and speaker characteristics for text-independent speaker identification","volume":"50","author":"Lu","year":"2008","journal-title":"Speech Commun."},{"key":"10.1016\/j.specom.2026.103368_b41","series-title":"Interspeech 2017","article-title":"Montreal forced aligner: Trainable text-speech alignment using kaldi","author":"McAuliffe","year":"2017"},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b42","first-page":"381","article-title":"Effect of syllable structure on vowel-to-vowel coarticulation","volume":"54","author":"Mok","year":"2012","journal-title":"Speech Commun."},{"key":"10.1016\/j.specom.2026.103368_b43","series-title":"An Introduction to the Psychology of Hearing","author":"Moore","year":"2012"},{"issue":"4","key":"10.1016\/j.specom.2026.103368_b44","doi-asserted-by":"crossref","first-page":"298","DOI":"10.1016\/j.scijus.2009.09.002","article-title":"Forensic voice comparison and the paradigm shift","volume":"49","author":"Morrison","year":"2009","journal-title":"Sci. Justice"},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b45","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1016\/j.scijus.2013.07.004","article-title":"Distinguishing between forensic science and forensic pseudoscience: Testing of validity and reliability, and approaches to forensic voice comparison","volume":"54","author":"Morrison","year":"2014","journal-title":"Sci. Justice"},{"key":"10.1016\/j.specom.2026.103368_b46","series-title":"The Routledge Handbook of Phonetics","first-page":"599","article-title":"Introduction to forensic voice comparison","author":"Morrison","year":"2019"},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b47","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1016\/j.scijus.2021.02.002","article-title":"Consensus on validation of forensic voice comparison","volume":"61","author":"Morrison","year":"2021","journal-title":"Sci. Justice"},{"issue":"1","key":"10.1016\/j.specom.2026.103368_b48","doi-asserted-by":"crossref","first-page":"576","DOI":"10.1121\/1.2931949","article-title":"Acoustic and perceptual similarity of Japanese and American English vowels","volume":"124","author":"Nishi","year":"2008","journal-title":"J. Acoust. Soc. Am."},{"key":"10.1016\/j.specom.2026.103368_b49","series-title":"The Phonetic Bases of Speaker Recognition","first-page":"1","article-title":"The phonetic bases of speaker recognition","author":"Nolan","year":"2009"},{"issue":"2","key":"10.1016\/j.specom.2026.103368_b50","doi-asserted-by":"crossref","first-page":"143","DOI":"10.1558\/sll.2005.12.2.143","article-title":"A case for formant analysis in forensic speaker identification","volume":"12","author":"Nolan","year":"2005","journal-title":"Int. J. Speech Lang. Law"},{"key":"10.1016\/j.specom.2026.103368_b51","series-title":"Swahili and Sabaki: A Linguistic History","author":"Nurse","year":"1993"},{"key":"10.1016\/j.specom.2026.103368_b52","doi-asserted-by":"crossref","unstructured":"Panayotov, V., Chen, G., Povey, D., Khudanpur, S., 2015. Librispeech: An ASR corpus based on public domain audio books. In: 2015 IEEE International Conference on Acoustics, Speech and Signal Processing. ICASSP, pp. 5206\u20135210.","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"10.1016\/j.specom.2026.103368_b53","series-title":"Proceedings of Interspeech 2019","first-page":"2613","article-title":"SpecAugment: A simple data augmentation method for automatic speech recognition","author":"Park","year":"2019"},{"key":"10.1016\/j.specom.2026.103368_b54","series-title":"CSS10: A collection of single speaker speech datasets for 10 languages","author":"Park","year":"2019"},{"key":"10.1016\/j.specom.2026.103368_b55","series-title":"Advances in Neural Information Processing Systems 32","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","author":"Paszke","year":"2019"},{"issue":"11","key":"10.1016\/j.specom.2026.103368_b56","doi-asserted-by":"crossref","first-page":"16654","DOI":"10.1007\/s11227-024-06098-6","article-title":"Automatic phoneme recognition by deep neural networks","volume":"80","author":"Pereira","year":"2024","journal-title":"J. Supercomput."},{"issue":"5","key":"10.1016\/j.specom.2026.103368_b57","doi-asserted-by":"crossref","first-page":"1574","DOI":"10.1121\/1.394510","article-title":"Evidence for mora timing in Japanese","volume":"81","author":"Port","year":"1987","journal-title":"J. Acoust. Soc. Am."},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b58","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1016\/S0010-0277(99)00058-X","article-title":"Correlates of linguistic rhythm in the speech signal","volume":"73","author":"Ramus","year":"1999","journal-title":"Cognition"},{"issue":"6","key":"10.1016\/j.specom.2026.103368_b59","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.specom.2026.103368_b60","series-title":"A kiswahili dataset for development of text-to-speech system","author":"Rono","year":"2021"},{"key":"10.1016\/j.specom.2026.103368_b61","series-title":"Forensic Speaker Identification","author":"Rose","year":"2002"},{"key":"10.1016\/j.specom.2026.103368_b62","series-title":"Identifying sounds in spectrograms","author":"Russell","year":"2005"},{"issue":"5736","key":"10.1016\/j.specom.2026.103368_b63","doi-asserted-by":"crossref","first-page":"892","DOI":"10.1126\/science.1111565","article-title":"The coming paradigm shift in forensic identification science","volume":"309","author":"Saks","year":"2005","journal-title":"Science"},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b64","doi-asserted-by":"crossref","first-page":"1839","DOI":"10.1007\/s00034-023-02542-9","article-title":"Deep learning-based end-to-end speaker identification using time\u2013frequency representation of speech signal","volume":"43","author":"Saritha","year":"2024","journal-title":"Circuits Systems Signal Process."},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b65","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1006\/jpho.1997.0044","article-title":"Major trends in vowel system inventories","volume":"25","author":"Schwartz","year":"1997","journal-title":"J. Phon."},{"key":"10.1016\/j.specom.2026.103368_b66","series-title":"2024 2nd International Conference on Disruptive Technologies","first-page":"159","article-title":"Phoneme based hindi speech recognition using deep learning","author":"Singh","year":"2024"},{"key":"10.1016\/j.specom.2026.103368_b67","series-title":"Vowel detection dataset","author":"Singh","year":"2024"},{"key":"10.1016\/j.specom.2026.103368_b68","series-title":"The Phonology of Hungarian","author":"Sipt\u00e1r","year":"2000"},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b69","doi-asserted-by":"crossref","first-page":"2248","DOI":"10.1121\/10.0036222","article-title":"Formant-based vowel categorization for cross-lingual phone recognition","volume":"157","author":"Stepanovi\u0107","year":"2025","journal-title":"J. Acoust. Soc. Am."},{"issue":"4","key":"10.1016\/j.specom.2026.103368_b70","doi-asserted-by":"crossref","first-page":"1872","DOI":"10.1121\/1.1458026","article-title":"Toward a model for lexical access based on acoustic landmarks and distinctive features","volume":"111","author":"Stevens","year":"2002","journal-title":"J. Acoust. Soc. Am."},{"issue":"3","key":"10.1016\/j.specom.2026.103368_b71","doi-asserted-by":"crossref","first-page":"695","DOI":"10.1121\/1.389855","article-title":"Dynamic specification of coarticulated vowels","volume":"74","author":"Strange","year":"1983","journal-title":"J. Acoust. Soc. Am."},{"key":"10.1016\/j.specom.2026.103368_b72","series-title":"Ritesh sinha v. State of uttar pradesh and anr.","author":"Supreme Court of India","year":"2019"},{"key":"10.1016\/j.specom.2026.103368_b73","series-title":"Primary spectral moments of the first four vowel formants as a source of speaker discriminant information","author":"Suthar","year":"2023"},{"key":"10.1016\/j.specom.2026.103368_b74","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1121\/AT.2020.16.2.56","article-title":"Speech acoustics of the world\u2019s languages","volume":"16","author":"Tucker","year":"2020","journal-title":"Acoust. Today"},{"issue":"1\u20132","key":"10.1016\/j.specom.2026.103368_b75","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1159\/000028486","article-title":"Japanese mora-timing: A review","volume":"58","author":"Warner","year":"2001","journal-title":"Phonetica"},{"key":"10.1016\/j.specom.2026.103368_b76","series-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit (version 0.92)","author":"Yamagishi","year":"2019"}],"container-title":["Speech Communication"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639326000166?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167639326000166?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T04:09:11Z","timestamp":1775102951000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167639326000166"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4]]},"references-count":76,"alternative-id":["S0167639326000166"],"URL":"https:\/\/doi.org\/10.1016\/j.specom.2026.103368","relation":{"is-supplemented-by":[{"id-type":"uri","id":"https:\/\/universe.roboflow.com\/inderpreet-singh-soham-gangopadhyay\/vowel-detection","asserted-by":"subject"}]},"ISSN":["0167-6393"],"issn-type":[{"value":"0167-6393","type":"print"}],"subject":[],"published":{"date-parts":[[2026,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Object detection for cross-linguistic vowel analysis: A novel language-agnostic method for forensic speech processing","name":"articletitle","label":"Article Title"},{"value":"Speech Communication","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.specom.2026.103368","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"103368"}}