{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T19:12:09Z","timestamp":1770059529817,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","funder":[{"name":"Key Research and Development Program of Hainan Province","award":["ZDYF2025\uff08LALH\uff09002"],"award-info":[{"award-number":["ZDYF2025\uff08LALH\uff09002"]}]},{"name":"Beijing Natural Science Foundation","award":["L232039"],"award-info":[{"award-number":["L232039"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,12]]},"DOI":"10.1145\/3784833.3784897","type":"proceedings-article","created":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T05:22:31Z","timestamp":1770009751000},"page":"156-160","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Permutation-Free Training via Loudness Order for Interpretable Speech Separation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5559-1691","authenticated-orcid":false,"given":"Jiaying","family":"Wang","sequence":"first","affiliation":[{"name":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9723-3294","authenticated-orcid":false,"given":"Li","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China and Engineering Research Center of Blockchain and Network Convergence Technology, Ministry of Education, Beijing University of Posts and Telecommunications, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2026,2]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-00599-2_94"},{"key":"e_1_3_3_1_3_2","unstructured":"Francis\u00a0R Bach and Michael\u00a0I Jordan. 2006. Learning spectral clustering with application to speech separation. The Journal of Machine Learning Research 7 (2006) 1963\u20132001."},{"key":"e_1_3_3_1_4_2","unstructured":"Seungjin Choi Andrzej Cichocki Hyung-Min Park and Soo-Young Lee. 2005. Blind source separation and independent component analysis: A review. Neural Information Processing-Letters and Reviews 6 1 (2005) 1\u201357."},{"key":"e_1_3_3_1_5_2","unstructured":"Joris Cosentino Manuel Pariente Samuele Cornell Antoine Deleforge and Emmanuel Vincent. 2020. Librimix: An open-source dataset for generalizable speech separation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.11262 (2020)."},{"key":"e_1_3_3_1_6_2","unstructured":"R EBU-Recommendation. 2011. Loudness normalisation and permitted maximum level of audio signals. Eur. Broadcast. Union (2011)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7471631"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Guoning Hu and DeLiang Wang. 2010. A tandem algorithm for pitch estimation and voiced speech segregation. IEEE Transactions on Audio Speech and Language Processing 18 8 (2010) 2067\u20132079.","DOI":"10.1109\/TASL.2010.2041110"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Ke Hu and DeLiang Wang. 2012. An unsupervised approach to cochannel speech separation. IEEE Transactions on audio speech and language processing 21 1 (2012) 122\u2013131.","DOI":"10.1109\/TASL.2012.2215591"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6853860"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Morten Kolb\u00e6k Dong Yu Zheng-Hua Tan and Jesper Jensen. 2017. Multitalker speech separation with utterance-level permutation invariant training of deep recurrent neural networks. IEEE\/ACM Transactions on Audio Speech and Language Processing 25 10 (2017) 1901\u20131913.","DOI":"10.1109\/TASLP.2017.2726762"},{"key":"e_1_3_3_1_12_2","unstructured":"Morten Kolb\u00e6k Dong Yu Zheng-Hua Tan and Jesper Jensen. 2017. Multi-talker Speech Separation with Utterance-level Permutation Invariant Training of Deep Recurrent Neural Networks. arxiv:https:\/\/arXiv.org\/abs\/1703.06284\u00a0[cs.SD] https:\/\/arxiv.org\/abs\/1703.06284"},{"key":"e_1_3_3_1_13_2","unstructured":"Daniel Lee and H\u00a0Sebastian Seung. 2000. Algorithms for non-negative matrix factorization. Advances in neural information processing systems 13 (2000)."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Liang Lu Naoyuki Kanda Jinyu Li and Yifan Gong. 2021. Streaming end-to-end multi-talker speech recognition. IEEE Signal Processing Letters 28 (2021) 803\u2013807.","DOI":"10.1109\/LSP.2021.3070817"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Yi Luo and Nima Mesgarani. 2019. Conv-tasnet: Surpassing ideal time\u2013frequency magnitude masking for speech separation. IEEE\/ACM transactions on audio speech and language processing 27 8 (2019) 1256\u20131266.","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"e_1_3_3_1_16_2","unstructured":"Soumi Maiti Yushi Ueda Shinji Watanabe Chunlei Zhang Meng Yu Shi-Xiong Zhang and Yong Xu. 2022. EEND-SS: Joint End-to-End Neural Speaker Diarization and Speech Separation for Flexible Number of Speakers. arxiv:https:\/\/arXiv.org\/abs\/2203.17068\u00a0[eess.AS] https:\/\/arxiv.org\/abs\/2203.17068"},{"key":"e_1_3_3_1_17_2","unstructured":"BS Series. 2011. Algorithms to measure audio programme loudness and true-peak audio level. International Telecommunication Union Radiocommunication Assembly 3 (2011)."},{"key":"e_1_3_3_1_18_2","unstructured":"Jing Shi Xuankai Chang Pengcheng Guo Shinji Watanabe Yusuke Fujita Jiaming Xu Bo Xu and Lei Xie. 2020. Sequence to Multi-Sequence Learning via Conditional Chain Mapping for Mixture Signals. arxiv:https:\/\/arXiv.org\/abs\/2006.14150\u00a0[eess.AS] https:\/\/arxiv.org\/abs\/2006.14150"},{"key":"e_1_3_3_1_19_2","unstructured":"Naoya Takahashi Sudarsanam Parthasaarathy Nabarun Goswami and Yuki Mitsufuji. 2019. Recursive speech separation for unknown number of speakers. arxiv:https:\/\/arXiv.org\/abs\/1904.03065\u00a0[cs.SD] https:\/\/arxiv.org\/abs\/1904.03065"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Tuomas Virtanen. 2007. Monaural sound source separation by nonnegative matrix factorization with temporal continuity and sparseness criteria. IEEE transactions on audio speech and language processing 15 3 (2007) 1066\u20131074.","DOI":"10.1109\/TASL.2006.885253"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","unstructured":"DeLiang Wang and Jitong Chen. 2018. Supervised Speech Separation Based on Deep Learning: An Overview. IEEE\/ACM Transactions on Audio Speech and Language Processing 26 10 (2018) 1702\u20131726. 10.1109\/TASLP.2018.2842159","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462507"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2018-2284"},{"key":"e_1_3_3_1_24_2","unstructured":"Dong Yu Morten Kolb\u00e6k Zheng-Hua Tan and Jesper Jensen. 2017. Permutation Invariant Training of Deep Models for Speaker-Independent Multi-talker Speech Separation. arxiv:https:\/\/arXiv.org\/abs\/1607.00325\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/1607.00325"}],"event":{"name":"ICCIP 2025: 2025 the 11th International Conference on Communication and Information Processing","location":"Lingshui Hainan China","acronym":"ICCIP 2025"},"container-title":["Proceedings of the 2025 11th International Conference on Communication and Information Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3784833.3784897","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,2]],"date-time":"2026-02-02T07:45:26Z","timestamp":1770018326000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3784833.3784897"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,12]]},"references-count":23,"alternative-id":["10.1145\/3784833.3784897","10.1145\/3784833"],"URL":"https:\/\/doi.org\/10.1145\/3784833.3784897","relation":{},"subject":[],"published":{"date-parts":[[2025,11,12]]},"assertion":[{"value":"2026-02-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}