{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:24:12Z","timestamp":1750220652523,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":72,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,12,7]],"date-time":"2020-12-07T00:00:00Z","timestamp":1607299200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,12,7]]},"DOI":"10.1145\/3427228.3427274","type":"proceedings-article","created":{"date-parts":[[2020,12,9]],"date-time":"2020-12-09T22:20:18Z","timestamp":1607552418000},"page":"939-950","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["SEEF-ALDR: A Speaker Embedding Enhancement Framework via Adversarial Learning based Disentangled Representation"],"prefix":"10.1145","author":[{"given":"Jianwei","family":"Tai","sequence":"first","affiliation":[{"name":"Key Laboratory of Network Assessment Technology, and Institute of Information Engineering, Chinese Academy of Sciences; Beijing Key Laboratory of Network Security and Protection Technology, and Institute of Information Engineering, Chinese Academy of Sciences; School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"given":"Xiaoqi","family":"Jia","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences"}]},{"given":"Qingjia","family":"Huang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Network Assessment Technology, and Institute of Information Engineering, Chinese Academy of Sciences; Beijing Key Laboratory of Network Security and Protection Technology, and Institute of Information Engineering, Chinese Academy of Sciences; School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"given":"Weijuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Key Laboratory of Network Assessment Technology, and Institute of Information Engineering, Chinese Academy of Sciences; Beijing Key Laboratory of Network Security and Protection Technology, and Institute of Information Engineering, Chinese Academy of Sciences; School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"given":"Haichao","family":"Du","sequence":"additional","affiliation":[{"name":"Key Laboratory of Network Assessment Technology, and Institute of Information Engineering, Chinese Academy of Sciences; Beijing Key Laboratory of Network Security and Protection Technology, and Institute of Information Engineering, Chinese Academy of Sciences; School of Cyber Security, University of Chinese Academy of Sciences, China"}]},{"given":"Shengzhi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Metropolitan College Boston University, USA"}]}],"member":"320","published-online":{"date-parts":[[2020,12,8]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Gautam Bhattacharya Md\u00a0Jahangir Alam and Patrick Kenny. 2017. Deep Speaker Embeddings for Short-Duration Speaker Verification.. In Interspeech. 1517\u20131521.  Gautam Bhattacharya Md\u00a0Jahangir Alam and Patrick Kenny. 2017. Deep Speaker Embeddings for Short-Duration Speaker Verification.. In Interspeech. 1517\u20131521.","DOI":"10.21437\/Interspeech.2017-1575"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682064"},{"key":"e_1_3_2_1_3_1","unstructured":"Weicheng Cai Jinkun Chen and Ming Li. 2018. Analysis of length normalization in end-to-end speaker verification system. arXiv preprint arXiv:1806.03209(2018).  Weicheng Cai Jinkun Chen and Ming Li. 2018. Analysis of length normalization in end-to-end speaker verification system. arXiv preprint arXiv:1806.03209(2018)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.21437\/Odyssey.2018-11"},{"key":"e_1_3_2_1_5_1","volume-title":"You Can Hear But You Cannot Steal: Defending Against Voice Impersonation Attacks on Smartphones. In 2017 IEEE 37th International Conference on Distributed Computing Systems (ICDCS). 183\u2013195","author":"Chen Si","year":"2017","unstructured":"Si Chen , Kui Ren , Sixu Piao , Cong Wang , Qian Wang , Jian Weng , Lu Su , and Aziz Mohaisen . 2017 . You Can Hear But You Cannot Steal: Defending Against Voice Impersonation Attacks on Smartphones. In 2017 IEEE 37th International Conference on Distributed Computing Systems (ICDCS). 183\u2013195 . Si Chen, Kui Ren, Sixu Piao, Cong Wang, Qian Wang, Jian Weng, Lu Su, and Aziz Mohaisen. 2017. You Can Hear But You Cannot Steal: Defending Against Voice Impersonation Attacks on Smartphones. In 2017 IEEE 37th International Conference on Distributed Computing Systems (ICDCS). 183\u2013195."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1929"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2003.818350"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1558\/ijsll.v1i2.169"},{"key":"e_1_3_2_1_9_1","unstructured":"Yixiao Ge Zhuowan Li Haiyu Zhao Guojun Yin Shuai Yi Xiaogang Wang 2018. FD-GAN: Pose-guided feature distilling GAN for robust person re-identification. In Advances in Neural Information Processing Systems. 1222\u20131233.  Yixiao Ge Zhuowan Li Haiyu Zhao Guojun Yin Shuai Yi Xiaogang Wang 2018. FD-GAN: Pose-guided feature distilling GAN for robust person re-identification. In Advances in Neural Information Processing Systems. 1222\u20131233."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1558\/ijsll.v18i2.293"},{"key":"e_1_3_2_1_11_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative Adversarial Nets. In Advances in Neural Information Processing Systems 27. 2672\u20132680.  Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative Adversarial Nets. In Advances in Neural Information Processing Systems 27. 2672\u20132680."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Craig\u00a0S. Greenberg Vincent\u00a0M. Stanford Alvin\u00a0F. Martin Meghana Yadagiri George\u00a0R. Doddington John\u00a0J. Godfrey and Jaime Hernandez-Cordero. 2013. The 2012 NIST speaker recognition evaluation.. In INTERSPEECH. 1971\u20131975.  Craig\u00a0S. Greenberg Vincent\u00a0M. Stanford Alvin\u00a0F. Martin Meghana Yadagiri George\u00a0R. Doddington John\u00a0J. Godfrey and Jaime Hernandez-Cordero. 2013. The 2012 NIST speaker recognition evaluation.. In INTERSPEECH. 1971\u20131975.","DOI":"10.21437\/Interspeech.2013-469"},{"key":"e_1_3_2_1_14_1","unstructured":"Mahdi Hajibabaei and Dengxin Dai. 2018. Unified Hypersphere Embedding for Speaker Recognition.arXiv preprint arXiv:1807.08312(2018).  Mahdi Hajibabaei and Dengxin Dai. 2018. Unified Hypersphere Embedding for Speaker Recognition.arXiv preprint arXiv:1807.08312(2018)."},{"key":"e_1_3_2_1_15_1","volume-title":"Deep Speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567(2014).","author":"Hannun Y.","year":"2014","unstructured":"Awni\u00a0 Y. Hannun , Carl Case , Jared Casper , Bryan Catanzaro , Greg Diamos , Erich Elsen , Ryan Prenger , Sanjeev Satheesh , Shubho Sengupta , Adam Coates , and Andrew\u00a0 Y. Ng . 2014 . Deep Speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567(2014). Awni\u00a0Y. Hannun, Carl Case, Jared Casper, Bryan Catanzaro, Greg Diamos, Erich Elsen, Ryan Prenger, Sanjeev Satheesh, Shubho Sengupta, Adam Coates, and Andrew\u00a0Y. Ng. 2014. Deep Speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567(2014)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"e_1_3_2_1_19_1","volume-title":"Reducing the dimensionality of data with neural networks. science 313, 5786","author":"Hinton E","year":"2006","unstructured":"Geoffrey\u00a0 E Hinton and Ruslan\u00a0 R Salakhutdinov . 2006. Reducing the dimensionality of data with neural networks. science 313, 5786 ( 2006 ), 504\u2013507. Geoffrey\u00a0E Hinton and Ruslan\u00a0R Salakhutdinov. 2006. Reducing the dimensionality of data with neural networks. science 313, 5786 (2006), 504\u2013507."},{"key":"e_1_3_2_1_20_1","unstructured":"Geoffrey\u00a0E Hinton and Richard\u00a0S Zemel. 1994. Autoencoders minimum description length and Helmholtz free energy. In Advances in neural information processing systems. 3\u201310.  Geoffrey\u00a0E Hinton and Richard\u00a0S Zemel. 1994. Autoencoders minimum description length and Helmholtz free energy. In Advances in neural information processing systems. 3\u201310."},{"key":"e_1_3_2_1_21_1","volume-title":"Niesr: Nuisance invariant end-to-end speech recognition. arXiv preprint arXiv:1907.03233(2019).","author":"Hsu I","year":"2019","unstructured":"I Hsu , Ayush Jaiswal , Premkumar Natarajan , 2019 . Niesr: Nuisance invariant end-to-end speech recognition. arXiv preprint arXiv:1907.03233(2019). I Hsu, Ayush Jaiswal, Premkumar Natarajan, 2019. Niesr: Nuisance invariant end-to-end speech recognition. arXiv preprint arXiv:1907.03233(2019)."},{"key":"e_1_3_2_1_22_1","unstructured":"Wei-Ning Hsu Yu Zhang and James Glass. 2017. Unsupervised learning of disentangled and interpretable representations from sequential data. In Advances in neural information processing systems. 1878\u20131889.  Wei-Ning Hsu Yu Zhang and James Glass. 2017. Unsupervised learning of disentangled and interpretable representations from sequential data. In Advances in neural information processing systems. 1878\u20131889."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.267"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1006\/csla.1993.1007"},{"key":"e_1_3_2_1_25_1","volume-title":"Multi-task Discriminative Training of Hybrid DNN-TVM Model for Speaker Verification with Noisy and Far-Field Speech. In proceedings of Proceedings of Interspeech","author":"Jati Arindam","year":"2019","unstructured":"Arindam Jati , Raghuveer Peri , Monisankha Pal , Tae\u00a0Jin Park , Naveen Kumar , Ruchir Travadi , Panayiotis Georgiou , and Shrikanth Narayanan . 2019 . Multi-task Discriminative Training of Hybrid DNN-TVM Model for Speaker Verification with Noisy and Far-Field Speech. In proceedings of Proceedings of Interspeech (2019). Arindam Jati, Raghuveer Peri, Monisankha Pal, Tae\u00a0Jin Park, Naveen Kumar, Ruchir Travadi, Panayiotis Georgiou, and Shrikanth Narayanan. 2019. Multi-task Discriminative Training of Hybrid DNN-TVM Model for Speaker Verification with Noisy and Far-Field Speech. In proceedings of Proceedings of Interspeech (2019)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2323"},{"volume-title":"ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Kim Insoo","key":"e_1_3_2_1_27_1","unstructured":"Insoo Kim , Kyuhong Kim , Jiwhan Kim , and Changkyu Choi . 2019. Deep Speaker Representation Using Orthogonal Decomposition and Recombination for Speaker Verification . In ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) . IEEE , 6126\u20136130. Insoo Kim, Kyuhong Kim, Jiwhan Kim, and Changkyu Choi. 2019. Deep Speaker Representation Using Orthogonal Decomposition and Recombination for Speaker Verification. In ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 6126\u20136130."},{"key":"e_1_3_2_1_28_1","unstructured":"Diederik\u00a0P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114(2013).  Diederik\u00a0P Kingma and Max Welling. 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114(2013)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964310"},{"key":"e_1_3_2_1_30_1","unstructured":"Guillaume Lample Neil Zeghidour Nicolas Usunier Antoine Bordes Ludovic Denoyer and Marc\u2019Aurelio Ranzato. 2017. Fader networks: Manipulating images by sliding attributes. In Advances in Neural Information Processing Systems. 5967\u20135976.  Guillaume Lample Neil Zeghidour Nicolas Usunier Antoine Bordes Ludovic Denoyer and Marc\u2019Aurelio Ranzato. 2017. Fader networks: Manipulating images by sliding attributes. In Advances in Neural Information Processing Systems. 5967\u20135976."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953183"},{"key":"e_1_3_2_1_32_1","unstructured":"Chao Li Xiaokong Ma Bing Jiang Xiangang Li Xuewei Zhang Xiao Liu Ying Cao Ajay Kannan and Zhenyao Zhu. 2017. Deep Speaker: an End-to-End Neural Speaker Embedding System.arXiv preprint arXiv:1705.02304(2017).  Chao Li Xiaokong Ma Bing Jiang Xiangang Li Xuewei Zhang Xiao Liu Ying Cao Ajay Kannan and Zhenyao Zhu. 2017. Deep Speaker: an End-to-End Neural Speaker Embedding System.arXiv preprint arXiv:1705.02304(2017)."},{"key":"e_1_3_2_1_33_1","unstructured":"Mu Li Wangmeng Zuo and David Zhang. 2016. Deep identity-aware transfer of facial attributes. arXiv preprint arXiv:1610.05586(2016).  Mu Li Wangmeng Zuo and David Zhang. 2016. Deep identity-aware transfer of facial attributes. arXiv preprint arXiv:1610.05586(2016)."},{"volume-title":"Multisource I-Vectors Domain Adaptation Using Maximum Mean Discrepancy Based Autoencoders","year":"2018","key":"e_1_3_2_1_34_1","unstructured":"LinWei-wei, MakMan-Wai, and ChienJen-Tzung. 2018. Multisource I-Vectors Domain Adaptation Using Maximum Mean Discrepancy Based Autoencoders . IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP) ( 2018 ). LinWei-wei, MakMan-Wai, and ChienJen-Tzung. 2018. Multisource I-Vectors Domain Adaptation Using Maximum Mean Discrepancy Based Autoencoders. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP) (2018)."},{"key":"e_1_3_2_1_35_1","volume-title":"SphereFace: Deep Hypersphere Embedding for Face Recognition. In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 6738\u20136746","author":"Liu Weiyang","year":"2017","unstructured":"Weiyang Liu , Yandong Wen , Zhiding Yu , Ming Li , Bhiksha Raj , and Le Song . 2017 . SphereFace: Deep Hypersphere Embedding for Face Recognition. In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 6738\u20136746 . Weiyang Liu, Yandong Wen, Zhiding Yu, Ming Li, Bhiksha Raj, and Le Song. 2017. SphereFace: Deep Hypersphere Embedding for Face Recognition. In 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). 6738\u20136746."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Yi Liu Liang He and Jia Liu. 2019. Large margin softmax loss for speaker verification. arXiv preprint arXiv:1904.03479(2019).  Yi Liu Liang He and Jia Liu. 2019. Large margin softmax loss for speaker verification. arXiv preprint arXiv:1904.03479(2019).","DOI":"10.21437\/Interspeech.2019-2357"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00222"},{"key":"e_1_3_2_1_38_1","unstructured":"Christos Louizos Kevin Swersky Yujia Li Max Welling and Richard Zemel. 2015. The variational fair autoencoder. arXiv preprint arXiv:1511.00830(2015).  Christos Louizos Kevin Swersky Yujia Li Max Welling and Richard Zemel. 2015. The variational fair autoencoder. arXiv preprint arXiv:1511.00830(2015)."},{"key":"e_1_3_2_1_39_1","first-page":"2579","article-title":"Visualizing data using t-SNE","author":"van\u00a0der Maaten Laurens","year":"2008","unstructured":"Laurens van\u00a0der Maaten and Geoffrey Hinton . 2008 . Visualizing data using t-SNE . Journal of machine learning research 9 , Nov (2008), 2579 \u2013 2605 . Laurens van\u00a0der Maaten and Geoffrey Hinton. 2008. Visualizing data using t-SNE. Journal of machine learning research 9, Nov (2008), 2579\u20132605.","journal-title":"Journal of machine learning research 9"},{"key":"e_1_3_2_1_40_1","unstructured":"Davide Maltoni Dario Maio Anil\u00a0K. Jain and Salil Prabhakar. 2005. Handbook of Fingerprint Recognition.  Davide Maltoni Dario Maio Anil\u00a0K. Jain and Salil Prabhakar. 2005. Handbook of Fingerprint Recognition."},{"key":"e_1_3_2_1_41_1","unstructured":"Michael\u00a0F Mathieu Junbo\u00a0Jake Zhao Junbo Zhao Aditya Ramesh Pablo Sprechmann and Yann LeCun. 2016. Disentangling factors of variation in deep representation using adversarial training. In Advances in neural information processing systems. 5040\u20135048.  Michael\u00a0F Mathieu Junbo\u00a0Jake Zhao Junbo Zhao Aditya Ramesh Pablo Sprechmann and Yann LeCun. 2016. Disentangling factors of variation in deep representation using adversarial training. In Advances in neural information processing systems. 5040\u20135048."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682488"},{"key":"e_1_3_2_1_43_1","unstructured":"M.G. [n.d.]. Transparency Market Research. https:\/\/www.researchmoz.us\/publisher..  M.G. [n.d.]. Transparency Market Research. https:\/\/www.researchmoz.us\/publisher.."},{"key":"e_1_3_2_1_44_1","unstructured":"Lindasalwa Muda Mumtaj Begam and I. Elamvazuthi. 2010. Voice Recognition Algorithms using Mel Frequency Cepstral Coefficient (MFCC) and Dynamic Time Warping (DTW) Techniques. arXiv preprint arXiv:1003.4083(2010).  Lindasalwa Muda Mumtaj Begam and I. Elamvazuthi. 2010. Voice Recognition Algorithms using Mel Frequency Cepstral Coefficient (MFCC) and Dynamic Time Warping (DTW) Techniques. arXiv preprint arXiv:1003.4083(2010)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2019.101027"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-950"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Koji Okabe Takafumi Koshinaka and Koichi Shinoda. 2018. Attentive statistics pooling for deep speaker embedding. arXiv preprint arXiv:1803.10963(2018).  Koji Okabe Takafumi Koshinaka and Koichi Shinoda. 2018. Attentive statistics pooling for deep speaker embedding. arXiv preprint arXiv:1803.10963(2018).","DOI":"10.21437\/Interspeech.2018-993"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8852143"},{"key":"e_1_3_2_1_49_1","unstructured":"Raghuveer Peri Monisankha Pal Arindam Jati Krishna Somandepalli and Shrikanth Narayanan. 2019. Robust speaker recognition using unsupervised adversarial invariance. arXiv preprint arXiv:1911.00940(2019).  Raghuveer Peri Monisankha Pal Arindam Jati Krishna Somandepalli and Shrikanth Narayanan. 2019. Robust speaker recognition using unsupervised adversarial invariance. arXiv preprint arXiv:1911.00940(2019)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.879790"},{"key":"e_1_3_2_1_51_1","volume-title":"The Kaldi Speech Recognition Toolkit. In IEEE 2011 Workshop on Automatic Speech Recognition and Understanding.","author":"Povey Daniel","year":"2011","unstructured":"Daniel Povey , Arnab Ghoshal , Gilles Boulianne , Lukas Burget , Ondrej Glembek , Nagendra Goel , Mirko Hannemann , Petr Motlicek , Yanmin Qian , Petr Schwarz , Jan Silovsky , Georg Stemmer , and Karel Vesely . 2011 . The Kaldi Speech Recognition Toolkit. In IEEE 2011 Workshop on Automatic Speech Recognition and Understanding. Daniel Povey, Arnab Ghoshal, Gilles Boulianne, Lukas Burget, Ondrej Glembek, Nagendra Goel, Mirko Hannemann, Petr Motlicek, Yanmin Qian, Petr Schwarz, Jan Silovsky, Georg Stemmer, and Karel Vesely. 2011. The Kaldi Speech Recognition Toolkit. In IEEE 2011 Workshop on Automatic Speech Recognition and Understanding."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4409052"},{"key":"e_1_3_2_1_53_1","volume-title":"Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks. In ICLR 2016 : International Conference on Learning Representations","author":"Radford Alec","year":"2016","unstructured":"Alec Radford , Luke Metz , and Soumith Chintala . 2016 . Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks. In ICLR 2016 : International Conference on Learning Representations 2016. Alec Radford, Luke Metz, and Soumith Chintala. 2016. Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks. In ICLR 2016 : International Conference on Learning Representations 2016."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461587"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"crossref","unstructured":"Desh Raj David Snyder Daniel Povey and Sanjeev Khudanpur. 2019. Probing the Information Encoded in x-vectors. arXiv preprint arXiv:1909.06351(2019).  Desh Raj David Snyder Daniel Povey and Sanjeev Khudanpur. 2019. Probing the Information Encoded in x-vectors. arXiv preprint arXiv:1909.06351(2019).","DOI":"10.1109\/ASRU46091.2019.9003979"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Salah Rifai Pascal Vincent Xavier Muller Xavier Glorot and Yoshua Bengio. 2011. Contractive auto-encoders: Explicit invariance during feature extraction. (2011).  Salah Rifai Pascal Vincent Xavier Muller Xavier Glorot and Yoshua Bengio. 2011. Contractive auto-encoders: Explicit invariance during feature extraction. (2011).","DOI":"10.1007\/978-3-642-23783-6_41"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639622"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"crossref","unstructured":"David Snyder Daniel Garcia-Romero Daniel Povey and Sanjeev Khudanpur. 2017. Deep Neural Network Embeddings for Text-Independent Speaker Verification.. In Interspeech. 999\u20131003.  David Snyder Daniel Garcia-Romero Daniel Povey and Sanjeev Khudanpur. 2017. Deep Neural Network Embeddings for Text-Independent Speaker Verification.. In Interspeech. 999\u20131003.","DOI":"10.21437\/Interspeech.2017-620"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461375"},{"key":"e_1_3_2_1_60_1","unstructured":"T.B.[n.d.]. Voiceprint. https:\/\/www.tdbank.com\/bank\/tdvoiceprint.html..  T.B.[n.d.]. Voiceprint. https:\/\/www.tdbank.com\/bank\/tdvoiceprint.html.."},{"key":"e_1_3_2_1_61_1","volume-title":"Equal Error Rate Minimization for Biometrics Fusion. ICEIC : International Conference on Electronics, Informations and Communications","author":"Toh Kar-Ann","year":"2008","unstructured":"Kar-Ann Toh , Jaihie Kim , and Sangyoun Lee . 2008. Equal Error Rate Minimization for Biometrics Fusion. ICEIC : International Conference on Electronics, Informations and Communications ( 2008 ), 513\u2013516. Kar-Ann Toh, Jaihie Kim, and Sangyoun Lee. 2008. Equal Error Rate Minimization for Biometrics Fusion. ICEIC : International Conference on Electronics, Informations and Communications (2008), 513\u2013516."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.141"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2168"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390294"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462665"},{"key":"e_1_3_2_1_66_1","unstructured":"Matt Warman. 2013. Say goodbye to the pin: voice recognition takes over at Barclays Wealth. https:\/\/www.telegraph.co.uk\/technology\/news\/10044493\/Say-goodbye-to-the-pin-voice-recognition-takes-over-at-Barclays-Wealth.html.  Matt Warman. 2013. Say goodbye to the pin: voice recognition takes over at Barclays Wealth. https:\/\/www.telegraph.co.uk\/technology\/news\/10044493\/Say-goodbye-to-the-pin-voice-recognition-takes-over-at-Barclays-Wealth.html."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1769"},{"key":"e_1_3_2_1_68_1","volume-title":"Utterance-level Aggregation for Speaker Recognition in the Wild. In ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 5791\u20135795","author":"Xie Weidi","year":"2019","unstructured":"Weidi Xie , Arsha Nagrani , Joon\u00a0Son Chung , and Andrew Zisserman . 2019 . Utterance-level Aggregation for Speaker Recognition in the Wild. In ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 5791\u20135795 . Weidi Xie, Arsha Nagrani, Joon\u00a0Son Chung, and Andrew Zisserman. 2019. Utterance-level Aggregation for Speaker Recognition in the Wild. In ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). 5791\u20135795."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1015"},{"key":"e_1_3_2_1_70_1","volume-title":"International Conference on Machine Learning. 5670\u20135679","author":"Yingzhen Li","year":"2018","unstructured":"Li Yingzhen and Stephan Mandt . 2018 . Disentangled sequential autoencoder . In International Conference on Machine Learning. 5670\u20135679 . Li Yingzhen and Stephan Mandt. 2018. Disentangled sequential autoencoder. In International Conference on Machine Learning. 5670\u20135679."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Chunlei Zhang and Kazuhito Koishida. 2017. End-to-End Text-Independent Speaker Verification with Triplet Loss on Short Utterances.. In Interspeech. 1487\u20131491.  Chunlei Zhang and Kazuhito Koishida. 2017. End-to-End Text-Independent Speaker Verification with Triplet Loss on Short Utterances.. In Interspeech. 1487\u20131491.","DOI":"10.21437\/Interspeech.2017-1608"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683828"}],"event":{"name":"ACSAC '20: Annual Computer Security Applications Conference","acronym":"ACSAC '20","location":"Austin USA"},"container-title":["Annual Computer Security Applications Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3427228.3427274","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3427228.3427274","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:02:24Z","timestamp":1750197744000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3427228.3427274"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,7]]},"references-count":72,"alternative-id":["10.1145\/3427228.3427274","10.1145\/3427228"],"URL":"https:\/\/doi.org\/10.1145\/3427228.3427274","relation":{},"subject":[],"published":{"date-parts":[[2020,12,7]]},"assertion":[{"value":"2020-12-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}