{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:24:37Z","timestamp":1775665477953,"version":"3.50.1"},"reference-count":115,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,12,15]],"date-time":"2021-12-15T00:00:00Z","timestamp":1639526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,12,15]],"date-time":"2021-12-15T00:00:00Z","timestamp":1639526400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,12,15]]},"DOI":"10.1109\/bigdata52589.2021.9671640","type":"proceedings-article","created":{"date-parts":[[2022,1,13]],"date-time":"2022-01-13T15:39:16Z","timestamp":1642088356000},"page":"812-823","source":"Crossref","is-referenced-by-count":2,"title":["A Simple Approach to Balance Task Loss in Multi-Task Learning"],"prefix":"10.1109","author":[{"given":"Sicong","family":"Liang","sequence":"first","affiliation":[{"name":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China"}]},{"given":"Chang","family":"Deng","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology,Department of Computer Science and Engineering,Shenzhen,China"}]},{"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology Peng Cheng Laboratory,Department of Computer Science and Engineering,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1206"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1001"},{"key":"ref33","first-page":"1593","article-title":"Learning multiple tasks with multilinear relationship networks","author":"long","year":"2017","journal-title":"NIPS"},{"key":"ref32","first-page":"640","article-title":"Is learning the n-th thing any easier than learning the first?","author":"thrun","year":"1995","journal-title":"Advances in Neural Information Processing Systems 8"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007327622663"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-008-5088-0"},{"key":"ref37","article-title":"An overview of multi-task learning in deep neural networks","author":"ruder","year":"2017","journal-title":"CoRR"},{"key":"ref36","article-title":"Sluice networks: Learning what to share between loosely related tasks","author":"ruder","year":"2017","journal-title":"CoRR"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.433"},{"key":"ref34","article-title":"Deep multi-task representation learning: A tensor factorisation approach","author":"yang","year":"2017","journal-title":"ICLRE"},{"key":"ref28","first-page":"747","article-title":"Maximum relative margin and data-dependent regularization","volume":"11","author":"shivaswamy","year":"2010","journal-title":"Journal of Machine Learning Research"},{"key":"ref27","first-page":"489","article-title":"Discovering structure in multiple learning tasks: The TC algorithm","author":"thrun","year":"1996","journal-title":"Proceedings of the Thirteenth International Conference on Machine Learning"},{"key":"ref29","first-page":"1708","article-title":"A theoretical analysis of metric hypothesis transfer learning","author":"perrot","year":"2015","journal-title":"Proceedings of The 32nd International Conference on Machine Learning"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.crma.2012.03.014"},{"key":"ref22","first-page":"2159","article-title":"Minimax multi-task learning and a generalized loss-compositional paradigm for MTL","author":"mehta","year":"2012","journal-title":"NIPS"},{"key":"ref21","first-page":"12 037","article-title":"Pareto multi-task learning","author":"lin","year":"2019","journal-title":"NIPS"},{"key":"ref24","first-page":"230","article-title":"Asymmetric multi-task learning based on task relatedness and loss","author":"lee","year":"2016","journal-title":"Proceedings of the 33rd International Conference on Machine Learning"},{"key":"ref23","first-page":"793","article-title":"Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks","author":"chen","year":"2018","journal-title":"ICML"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"ref101","first-page":"1638","article-title":"Contextual string embeddings for sequence labeling","author":"akbik","year":"2018","journal-title":"COLING 2018 27th International Conference on Computational Linguistics"},{"key":"ref25","first-page":"2962","article-title":"Deep asymmetric multi-task feature learning","author":"lee","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning"},{"key":"ref100","first-page":"1","article-title":"Local rademacher complexity-based learning guarantees for multi-task learning","volume":"19","author":"yousefi","year":"2018","journal-title":"JMLR"},{"key":"ref50","first-page":"745","article-title":"Clustered multi-task learning: A convex formulation","author":"jacob","year":"2008","journal-title":"NIPS"},{"key":"ref51","first-page":"153","article-title":"Multi-task Gaussian process prediction","author":"bonilla","year":"2007","journal-title":"NIPS"},{"key":"ref59","first-page":"1229","article-title":"The bigraphical lasso","author":"kalaitzis","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1023\/B:MACH.0000008082.80494.e0"},{"key":"ref57","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v29i1.9558","article-title":"Multi-task learning and algorithmic stability","author":"zhang","year":"2015","journal-title":"Proceedings of AAAI"},{"key":"ref56","first-page":"3185","article-title":"Simultaneously leveraging output and task structures for multiple-output regression","author":"rai","year":"2012","journal-title":"Advances in Neural Information Processing Systems 25"},{"key":"ref55","article-title":"Learning task grouping and overlap in multitask learning","author":"kumar","year":"2012","journal-title":"Proceedings of the 29 Th International Conference on Machine Learning"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1145\/1835804.1835954"},{"key":"ref53","first-page":"1867","article-title":"Large margin multi-task metric learning","author":"parameswaran","year":"2010","journal-title":"Advances in Neural Information Processing Systems 23"},{"key":"ref52","first-page":"733","article-title":"A convex formulation for learning task relationships in multi-task learning","author":"zhang","year":"2010","journal-title":"UAI"},{"key":"ref40","article-title":"Trace norm regularised deep multi-task learning","author":"yang","year":"2017","journal-title":"ICLR Workshop Track"},{"key":"ref4","article-title":"Multi-level Lasso for sparse multi-task regression","author":"lozano","year":"2012","journal-title":"ICML"},{"key":"ref3","first-page":"521","article-title":"Learning with whom to share in multi-task feature learning","author":"kang","year":"2011","journal-title":"ICML"},{"key":"ref6","first-page":"2559","article-title":"Probabilistic multi-task feature selection","author":"zhang","year":"2010","journal-title":"NIPS"},{"key":"ref5","first-page":"1638","article-title":"Multi-stage multi-task learning with reduced rank","author":"han","year":"2016","journal-title":"AAAI"},{"key":"ref8","first-page":"1073","article-title":"A probabilistic model for dirty multi-task feature selection","author":"hern\u00e1ndez-lobato","year":"2015","journal-title":"ICML"},{"key":"ref49","first-page":"83","article-title":"Task clustering and gating for bayesian multitask learning","volume":"4","author":"bakker","year":"2003","journal-title":"JMLR"},{"key":"ref7","first-page":"746","article-title":"Learning feature selection dependencies in multi-task learning","author":"hern\u00e1ndez-lobato","year":"2013","journal-title":"NIPS"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","article-title":"Multitask learning","volume":"28","author":"caruana","year":"1997","journal-title":"Machine Learning"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/1014052.1014067"},{"key":"ref45","first-page":"1817","article-title":"A framework for learning predictive structures from multiple tasks and unlabeled data","volume":"6","author":"ando","year":"2005","journal-title":"JMLR"},{"key":"ref48","first-page":"737","article-title":"Multi-task learning via conic programming","author":"kato","year":"2008","journal-title":"Advances in Neural Information Processing Systems 20"},{"key":"ref47","first-page":"615","article-title":"Learning multiple tasks with kernel methods","volume":"6","author":"evgeniou","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"ref42","article-title":"Towards more reliable transfer learning","author":"wang","year":"2018","journal-title":"CoRR"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-009-5152-4"},{"key":"ref44","first-page":"41","article-title":"Multi-task feature learning","author":"argyriou","year":"2006","journal-title":"NIPS"},{"key":"ref43","first-page":"2550","article-title":"Learning multiple tasks with a sparse matrix-normal penalty","author":"zhang","year":"2010","journal-title":"Advances in Neural Information Processing Systems 23"},{"key":"ref73","article-title":"Multi-task feature selection","author":"obozinski","year":"2006"},{"key":"ref72","first-page":"737","article-title":"Multi-task learning via conic programming","author":"kato","year":"2007","journal-title":"Advances in Neural Information Processing Systems 20"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1002\/rsa.20105"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4419-9467-7"},{"key":"ref76","article-title":"A convex feature learning formulation for latent task structure discovery","author":"jawanpuria","year":"2012","journal-title":"Proceedings of the 29th International Conference on Machine Learning"},{"key":"ref77","article-title":"Heterogeneous-neighborhood-based multi-task local learning algorithms","author":"zhang","year":"2013","journal-title":"Advances in Neural Information Processing Systems 26"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553392"},{"key":"ref75","first-page":"35","article-title":"Multi-task learning for classification with Dirichlet process priors","volume":"8","author":"xue","year":"2007","journal-title":"JMLR"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1145\/1835804.1835952"},{"key":"ref79","first-page":"964","article-title":"A dirty model for multi-task learning","author":"jalali","year":"2010","journal-title":"Advances in Neural Information Processing Systems 23"},{"key":"ref60","first-page":"25","article-title":"A spectral regularization framework for multi-task structure learning","author":"argyriou","year":"2007","journal-title":"Advances in Neural Information Processing Systems 20"},{"key":"ref62","author":"gupta","year":"2000","journal-title":"Matrix Variate Distributions"},{"key":"ref61","first-page":"1099","article-title":"Learning the kernel function via regularization","volume":"6","author":"micchelli","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1137\/090763184"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/2538028"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143856"},{"key":"ref66","first-page":"2507","article-title":"When is there a representer theorem? vector versus matrix regularizers","volume":"10","author":"argyriou","year":"2009","journal-title":"Journal of Machine Learning Research"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1137\/080716542"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1561\/2200000016"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10599-4_7"},{"key":"ref69","first-page":"463","article-title":"Rademacher and Gaussian complexities: Risk bounds and structural results","volume":"3","author":"bartlett","year":"2002","journal-title":"Journal of Machine Learning Research"},{"key":"ref1","first-page":"1041","article-title":"Domain adaptation with multiple sources","author":"mansour","year":"2008","journal-title":"Advances in Neural Information Processing Systems 21"},{"key":"ref95","article-title":"Deep multi-task representation learning: A tensor factorisation approach","author":"yang","year":"2016"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_17"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.572"},{"key":"ref108","article-title":"Adashare: Learning what to share for efficient deep multi-task learning","volume":"33","author":"sun","year":"2020","journal-title":"NIPS"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_16"},{"key":"ref107","first-page":"3994","article-title":"Cross-stitch networks for multi-task learning","author":"misra","year":"2016","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref92","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","author":"abadi","year":"2016","journal-title":"Proc USENIX Symp on Operating System Design and Implementation"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"ref91","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014"},{"key":"ref105","first-page":"2650","article-title":"Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture","author":"eigen","year":"2015","journal-title":"Proceedings of the IEEE International Conference on Computer Vision"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref104","article-title":"Indoor semantic segmentation using depth information","author":"couprie","year":"2013"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"ref111","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"yu","year":"2020","journal-title":"NIPS"},{"key":"ref112","first-page":"6597","article-title":"Multi-task learning with user preferences: Gradient descent with controlled ascent in pareto optimization","author":"mahapatra","year":"2020","journal-title":"ICML"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref98","first-page":"527","article-title":"Multi-task learning as multi-objective optimization","author":"sener","year":"2018","journal-title":"NIPS"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1214\/009053605000000282"},{"key":"ref96","article-title":"Trace norm regularised deep multi-task learning","author":"yang","year":"2016"},{"key":"ref97","first-page":"1594","article-title":"Learning multiple tasks with multilinear relationship networks","author":"long","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref10","article-title":"A survey on multi-task learning","author":"zhang","year":"2017","journal-title":"CoRR"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781107359949.008"},{"key":"ref12","first-page":"7482","article-title":"Multi-task learning using uncertainty to weigh losses for scene geometry and semantics","author":"kendall","year":"2018","journal-title":"CVPR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00197"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299188"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref16","first-page":"2175","article-title":"Self-paced multi-task learning","author":"li","year":"2017","journal-title":"AAAI"},{"key":"ref82","first-page":"1865","article-title":"Regularization techniques for learning with matrices","volume":"13","author":"kakade","year":"2012","journal-title":"Journal of Machine Learning Research"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/351"},{"key":"ref81","first-page":"458","article-title":"Better approximation and faster algorithm using the proximal average","author":"yu","year":"2013","journal-title":"Advances in Neural Information Processing Systems 26"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1007\/11776420_8"},{"key":"ref18","first-page":"1189","article-title":"Self-paced learning for latent variable models","author":"kumar","year":"2010","journal-title":"NIPS"},{"key":"ref19","first-page":"525","article-title":"Multi-task learning as multi-objective optimization","author":"sener","year":"2018","journal-title":"NIPS"},{"key":"ref83","first-page":"55","article-title":"Excess risk bounds for multitask learning with trace norm regularization","author":"maurer","year":"2013","journal-title":"Proceedings of the 26th Annual Conference on Learning Theory"},{"key":"ref114","first-page":"1563","article-title":"Bilevel programming for hyperparameter optimization and meta-learning","author":"franceschi","year":"2018","journal-title":"ICML"},{"key":"ref113","article-title":"Gradient surgery for multi-task learning","author":"yu","year":"2020","journal-title":"NIPS"},{"key":"ref80","first-page":"37","article-title":"Hierarchical regularization cascade for joint learning","author":"zweig","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning"},{"key":"ref115","first-page":"8024","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"Advances in Neural Information Processing Systems 32 Annual Conference on Neural Information Processing Systems 2019 NeurIPS 2019"},{"key":"ref89","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014"},{"key":"ref85","first-page":"1854","article-title":"Encoding tree sparsity in multi-task learning: A probabilistic framework","author":"han","year":"2014","journal-title":"AAAI"},{"key":"ref86","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v29i1.9581","article-title":"Learning multi-level task groups in multi-task learning","author":"han","year":"2015","journal-title":"Proceedings of the 29th AAAI Conference on Artificial Intelligence"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783393"},{"key":"ref88","article-title":"Learning sparse task relations in multi-task learning","author":"zhang","year":"2017","journal-title":"Proceedings of the 31th AAAI Conference on Artificial Intelligence"}],"event":{"name":"2021 IEEE International Conference on Big Data (Big Data)","location":"Orlando, FL, USA","start":{"date-parts":[[2021,12,15]]},"end":{"date-parts":[[2021,12,18]]}},"container-title":["2021 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9671263\/9671273\/09671640.pdf?arnumber=9671640","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,22]],"date-time":"2023-01-22T17:07:41Z","timestamp":1674407261000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9671640\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,15]]},"references-count":115,"URL":"https:\/\/doi.org\/10.1109\/bigdata52589.2021.9671640","relation":{},"subject":[],"published":{"date-parts":[[2021,12,15]]}}}