{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T14:48:56Z","timestamp":1774968536083,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T00:00:00Z","timestamp":1663891200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s00034-022-02178-1","type":"journal-article","created":{"date-parts":[[2022,9,23]],"date-time":"2022-09-23T12:03:40Z","timestamp":1663934620000},"page":"1163-1180","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Cycle GAN-Based Audio Source Separation Using Time\u2013Frequency Masking"],"prefix":"10.1007","volume":"42","author":[{"given":"Sujo","family":"Joseph","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5488-9026","authenticated-orcid":false,"given":"Rajeev","family":"Rajan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,23]]},"reference":[{"key":"2178_CR1","unstructured":"D. Barry, G. Kearney, Localization quality assessment in source separation-based upmixing algorithms, in AES 35th International Conference (2009), pp. 2391\u20132395"},{"issue":"3","key":"2178_CR2","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1016\/0165-1684(94)90029-9","volume":"36","author":"P Comon","year":"1994","unstructured":"P. Comon, Independent component analysis, a new concept? Signal Process. 36(3), 287\u2013314 (1994). https:\/\/doi.org\/10.1016\/0165-1684(94)90029-9","journal-title":"Signal Process."},{"key":"2178_CR3","doi-asserted-by":"crossref","unstructured":"C. Donahue, B. Li, R. Prabhavalkar, Exploring Speech Enhancement with Generative Adversarial Networks for robust Speech Recognition, in International Conference on Acoustics, Speech and Signal Processing (2018), pp. 5024\u20135028","DOI":"10.1109\/ICASSP.2018.8462581"},{"key":"2178_CR4","unstructured":"C. Donahue, J. McAuley, M. Puckette, Adversarial audio synthesis, in Proceedings of ICLR (2019), pp. 1\u201316"},{"key":"2178_CR5","unstructured":"C. Donahue, J.J. McAuley, M.S. Puckette, Synthesizing audio with generative adversarial networks, CoRR, vol. abs\/1802.04208 (2018). [Online]. Available: http:\/\/arxiv.org\/abs\/1802.04208"},{"issue":"8","key":"2178_CR6","doi-asserted-by":"publisher","first-page":"2121","DOI":"10.1109\/TASL.2010.2042119","volume":"18","author":"Z Duan","year":"2010","unstructured":"Z. Duan, B. Pardo, C. Zhang, Multiple fundamental frequency estimation by modeling spectral peaks and non-peak regions. IEEE Trans. Audio Speech Lang. Process. 18(8), 2121\u20132133 (2010)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"1","key":"2178_CR7","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1109\/TASLP.2013.2285484","volume":"22","author":"Z Duan","year":"2014","unstructured":"Z. Duan, J. Han, B. Pardo, Multi-pitch streaming of harmonic sound mixtures. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(1), 138\u2013150 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"1","key":"2178_CR8","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1109\/TASLP.2013.2285484","volume":"22","author":"Z Duan","year":"2014","unstructured":"Z. Duan, J. Han, B. Pardo, Multi-pitch streaming of harmonic sound mixtures. IEEE\/ACM Trans. Audio Speech Lang. Process. 22(1), 138\u2013150 (2014)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2178_CR9","doi-asserted-by":"publisher","unstructured":"Z.-C. Fan, Y.-L. Lai, J.-S.R. Jang, SVSGAN: Singing Voice Separation Via Generative Adversarial Network, in 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2018), pp. 726\u2013730. https:\/\/doi.org\/10.1109\/ICASSP.2018.8462091","DOI":"10.1109\/ICASSP.2018.8462091"},{"issue":"9","key":"2178_CR10","doi-asserted-by":"publisher","first-page":"2421","DOI":"10.1162\/NECO_a_00168","volume":"23","author":"C F\u00e9votte","year":"2011","unstructured":"C. F\u00e9votte, J. Idier, Algorithms for nonnegative matrix factorization with the $$\\beta $$-divergence. Neural Comput. 23(9), 2421\u20132456 (2011)","journal-title":"Neural Comput."},{"key":"2178_CR11","first-page":"2672","volume":"27","author":"IJ Goodfellow","year":"2014","unstructured":"I.J. Goodfellow et al., Generative adversarial nets. Adv. Neural Inform. Process. Syst. 27, 2672\u20132680 (2014)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"2178_CR12","unstructured":"M. Gover, Score-Informed Source Separation of Choral Music (McGill University, Thesis submitted to Department of Music Research Schulich School of Music, 2019)"},{"key":"2178_CR13","doi-asserted-by":"crossref","unstructured":"E.M. Grais, M.U. Sen, H. Erdogan, Deep neural networks for single channel source separation, in IEEE International Conference on Acoustics, Speech and Signal Processing (2014), pp. 3734\u20133738","DOI":"10.1109\/ICASSP.2014.6854299"},{"key":"2178_CR14","unstructured":"GTZAN Dataset-Music Genre Classification, https:\/\/www.kaggle.com\/andradaolteanu\/gtzan-dataset-music-genre-classification, Accessed online on 04 Jan 2022"},{"key":"2178_CR15","doi-asserted-by":"crossref","unstructured":"J.R. Hershey, Z. Chen, J. Le Roux, S. Watanabe, Deep clustering: discriminative embeddings for segmentation and separation, in IEEE International Conference on Acoustics, Speech and Signal Processing (2016), pp. 31\u201335","DOI":"10.1109\/ICASSP.2016.7471631"},{"issue":"11","key":"2178_CR16","doi-asserted-by":"publisher","first-page":"2084","DOI":"10.1109\/TASLP.2016.2577879","volume":"24","author":"Y Ikemiya","year":"2016","unstructured":"Y. Ikemiya, K. Itoyama, K. Yoshii, Singing voice separation and vocal f0 estimation based on mutual combination of robust principal component analysis and subharmonic summation. IEEE\/ACM Trans. Audio Speech Lang. Process. 24(11), 2084\u20132095 (2016)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2178_CR17","doi-asserted-by":"crossref","unstructured":"S. Inoue, H. Kameoka, L. Li, S. Makino, Sepnet: a deep separation matrix prediction network for multichannel audio source separation, in ICASSP 2021\u20142021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021), pp. 191\u2013195","DOI":"10.1109\/ICASSP39728.2021.9414884"},{"key":"2178_CR18","doi-asserted-by":"crossref","unstructured":"P. Isola, J.-Y. Zhu, T. Zhou, A.A. Efros, Image-to-image translation with conditional adversarial networks, in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017), pp. 5967\u20135976","DOI":"10.1109\/CVPR.2017.632"},{"key":"2178_CR19","doi-asserted-by":"publisher","unstructured":"L. Le Magoarou, A. Ozerov, N.Q.K. Duong, Text-informed audio source separation using nonnegative matrix partial co-factorization, in 2013 IEEE International Workshop on Machine Learning for Signal Processing (MLSP) (2013), pp. 1\u20136. https:\/\/doi.org\/10.1109\/MLSP.2013.6661995","DOI":"10.1109\/MLSP.2013.6661995"},{"key":"2178_CR20","unstructured":"J. Le Roux, F.J. Weninger, J.R. Hershey, Sparse NMF half-baked or well done? Tech. Rep. TR2015-023 (MERL, Cambridge, 2015)"},{"key":"2178_CR21","doi-asserted-by":"crossref","unstructured":"H. Li, S. Fu, Y. Tsao, J. Yamagish, iMetricGAN: intelligibility enhancement for speech-in-noise using generative adversarial network-based metric learning, in Proceedings of Interspeech 2020, Shanghai, China, October 25\u201329 (2020), pp. 1336-1340","DOI":"10.21437\/Interspeech.2020-1016"},{"key":"2178_CR22","doi-asserted-by":"publisher","unstructured":"L. Li, H. Kameoka, S. Makino, Determined audio source separation with multichannel star generative adversarial network, in 2020 IEEE 30th International Workshop on Machine Learning for Signal Processing (MLSP) (2020), pp. 1\u20136. https:\/\/doi.org\/10.1109\/MLSP49062.2020.9231555","DOI":"10.1109\/MLSP49062.2020.9231555"},{"key":"2178_CR23","doi-asserted-by":"crossref","unstructured":"Y. Luo, N. Mesgarani, Real-time single-channel dereverberation and separation with time-domain audio separation network, in Proceedings of Interspeech (2018), pp. 342\u2013346","DOI":"10.21437\/Interspeech.2018-2290"},{"key":"2178_CR24","doi-asserted-by":"crossref","unstructured":"Y. Luo, N. Mesgarani, Tasnet: time-domain audio separation network for real- time, single-channel speech separation, in IEEE International Conference on Acoustics, Speech and Signal Processing (2018), pp. 696\u2013700","DOI":"10.1109\/ICASSP.2018.8462116"},{"issue":"8","key":"2178_CR25","doi-asserted-by":"publisher","first-page":"1256","DOI":"10.1109\/TASLP.2019.2915167","volume":"27","author":"Y Luo","year":"2019","unstructured":"Y. Luo, N. Mesgarani, Conv-TasNet: surpassing ideal time\u2013frequency magnitude masking for speech separation. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(8), 1256\u20131266 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"issue":"22","key":"2178_CR26","doi-asserted-by":"publisher","first-page":"1024","DOI":"10.1016\/j.cub.2009.09.005","volume":"19","author":"JH McDermott","year":"2009","unstructured":"J.H. McDermott, The cocktail party problem. Curr. Biol. 19(22), 1024\u20131027 (2009)","journal-title":"Curr. Biol."},{"key":"2178_CR27","unstructured":"M. Mirza, S. Osindero, Conditional generative adversarial nets (2014). arXiv:1411.1784"},{"key":"2178_CR28","doi-asserted-by":"crossref","unstructured":"B. Nasersharif, S. Abdali, Speech\/music separation using non-negative matrix factorization with combination of cost functions, in The International Symposium on Artificial Intelligence and Signal Processing (2015), pp. 107\u2013111","DOI":"10.1109\/AISP.2015.7123491"},{"key":"2178_CR29","doi-asserted-by":"crossref","unstructured":"V. Panayotov, G. Chen, D. Povey, S. Khudanpur, Librispeech: an ASR corpus based on public domain audio books, in IEEE International Conference on Acoustics, Speech and Signal Processing (2015), pp. 5206\u20135210","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"2178_CR30","doi-asserted-by":"publisher","first-page":"3642","DOI":"10.21437\/Interspeech.2017-1428","volume":"2017","author":"S Pascual","year":"2017","unstructured":"S. Pascual, A. Bonafonte, J. Serr\u00e0, SEGAN: speech enhancement generative adversarial network. Proc Interspeech 2017, 3642\u20133646 (2017)","journal-title":"Proc Interspeech"},{"key":"2178_CR31","doi-asserted-by":"publisher","first-page":"1700","DOI":"10.1109\/LSP.2020.3025020","volume":"27","author":"H Phan","year":"2020","unstructured":"H. Phan et al., Improving GANs for speech enhancement. IEEE Signal Process. Lett. 27, 1700\u20131704 (2020). https:\/\/doi.org\/10.1109\/LSP.2020.3025020","journal-title":"IEEE Signal Process. Lett."},{"issue":"8","key":"2178_CR32","doi-asserted-by":"publisher","first-page":"1307","DOI":"10.1109\/TASLP.2018.2825440","volume":"26","author":"Z Rafii","year":"2018","unstructured":"Z. Rafii, A. Liutkus, F.-R. St\u00f6ter, S.I. Mimilakis, D. FitzGerald, B. Pardo, An overview of lead and accompaniment separation in music. IEEE\/ACM Trans. Audio Speech Lang. Process. 26(8), 1307\u20131335 (2018). https:\/\/doi.org\/10.1109\/TASLP.2018.2825440","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"2178_CR33","first-page":"234","volume":"9351","author":"O Ronneberger","year":"2015","unstructured":"O. Ronneberger, P. Fischer, T. Brox, U-net: convolutional networks for biomedical image segmentation. Med. Image Comput. Comput. Assist. Intervent. 9351, 234\u2013241 (2015)","journal-title":"Med. Image Comput. Comput. Assist. Intervent."},{"key":"2178_CR34","doi-asserted-by":"crossref","unstructured":"D. Stoller, S. Ewert, S. Dixon, Adversarial semi-supervised audio source separation applied to singing voice extraction, in IEEE International Conference on Acoustics, Speech and Signal Processing (2018), pp. 2391\u20132395","DOI":"10.1109\/ICASSP.2018.8461722"},{"key":"2178_CR35","doi-asserted-by":"crossref","unstructured":"F.-R. St\u00f6ter, A. Liutkus, N. Ito, The 2018 signal separation evaluation campaign. in Y. Deville, S. Gannot, R. Mason, M.D. Plumbley, D. Ward (Eds.), 14th International Conference on Latent Variable Analysis and Signal Separation (LVA\/ICA 2018) (2018), pp. 293\u2013305","DOI":"10.1007\/978-3-319-93764-9_28"},{"key":"2178_CR36","doi-asserted-by":"crossref","unstructured":"Y.C. Subakan, P. Smaragdis, Generative adversarial source separation, in IEEE International Conference on Acoustics, Speech and Signal Processing (2018), pp. 26\u201330","DOI":"10.1109\/ICASSP.2018.8461671"},{"key":"2178_CR37","unstructured":"The LJ Speech Dataset, https:\/\/keithito.com\/LJ-Speech-Dataset\/"},{"key":"2178_CR38","doi-asserted-by":"crossref","unstructured":"E. Tzinis, Z. Wang, P. Smaragdis, Sudo RM-RF: efficient networks for universal audio source separation, in IEEE 30th International Workshop on Machine Learning for Signal Processing (MLSP) (2020), pp. 1\u20136","DOI":"10.1109\/MLSP49062.2020.9231900"},{"key":"2178_CR39","doi-asserted-by":"crossref","unstructured":"S. Uhlich, F. Giron, Y. Mitsufuji, Deep neural network based instrument extraction from music, in IEEE International Conference on Acoustics, Speech and Signal Processing (2015), pp. 2135\u20132139","DOI":"10.1109\/ICASSP.2015.7178348"},{"key":"2178_CR40","doi-asserted-by":"crossref","unstructured":"Z.-Q. Wang, J.L. Roux, J.R. Hershey, Alternative objective functions for deep clustering, in IEEE International Conference on Acoustics, Speech and Signal Processing (2018), pp. 686\u2013690","DOI":"10.1109\/ICASSP.2018.8462507"},{"issue":"7","key":"2178_CR41","doi-asserted-by":"publisher","first-page":"1830","DOI":"10.1109\/TSP.2004.828896","volume":"52","author":"O Yilmaz","year":"2004","unstructured":"O. Yilmaz, S. Rickard, Blind separation of speech mixtures via time\u2013frequency masking. IEEE Trans. Signal Process. 52(7), 1830\u20131847 (2004)","journal-title":"IEEE Trans. Signal Process."},{"key":"2178_CR42","doi-asserted-by":"crossref","unstructured":"J.-Y. Zhu, T. Park, P. Isola, A. Efros, Unpaired image-to-image translation using cycle-consistent adversarial networks, in Proceedings of IEEE International Conference on Computer Vision (2017), pp. 2242\u20132251","DOI":"10.1109\/ICCV.2017.244"}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-02178-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-022-02178-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-022-02178-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,3]],"date-time":"2023-02-03T03:09:28Z","timestamp":1675393768000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-022-02178-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,23]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["2178"],"URL":"https:\/\/doi.org\/10.1007\/s00034-022-02178-1","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"value":"0278-081X","type":"print"},{"value":"1531-5878","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9,23]]},"assertion":[{"value":"16 January 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 September 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 September 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}