{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,20]],"date-time":"2026-02-20T15:50:49Z","timestamp":1771602649380,"version":"3.50.1"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"8-9","license":[{"start":{"date-parts":[[2019,6,5]],"date-time":"2019-06-05T00:00:00Z","timestamp":1559692800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,6,5]],"date-time":"2019-06-05T00:00:00Z","timestamp":1559692800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Ministry of Electronics & IT"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s10994-019-05811-4","type":"journal-article","created":{"date-parts":[[2019,6,5]],"date-time":"2019-06-05T17:02:18Z","timestamp":1559754138000},"page":"1369-1393","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["A flexible probabilistic framework for large-margin mixture of experts"],"prefix":"10.1007","volume":"108","author":[{"given":"Archit","family":"Sharma","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9704-7301","authenticated-orcid":false,"given":"Siddhartha","family":"Saxena","sequence":"additional","affiliation":[]},{"given":"Piyush","family":"Rai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,6,5]]},"reference":[{"issue":"1","key":"5811_CR1","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1214\/16-AOS1435","volume":"45","author":"S Balakrishnan","year":"2017","unstructured":"Balakrishnan, S., Wainwright, M. J., Yu, B., et al. (2017). Statistical guarantees for the EM algorithm: From population to sample-based analysis. The Annals of Statistics, 45(1), 77\u2013120.","journal-title":"The Annals of Statistics"},{"key":"5811_CR2","unstructured":"Bishop, C. M., & Svenskn, M. (2002). Bayesian hierarchical mixtures of experts. In: UAI."},{"key":"5811_CR3","unstructured":"Cotter, A., Shalev-Shwartz, S., & Srebro, N. (2013). Learning optimally sparse support vector machines. In: Proceedings of the 30th International Conference on Machine Learning (ICML-13) (pp. 266\u2013274)."},{"key":"5811_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","volume":"39","author":"AP Dempster","year":"1977","unstructured":"Dempster, A. P., Laird, N. M., & Rubin, D. B. (1977). Maximum likelihood from incomplete data via the EM algorithm. Journal of the royal statistical society Series B (Methodological), 39, 1\u201338.","journal-title":"Journal of the royal statistical society Series B (Methodological)"},{"key":"5811_CR5","unstructured":"Henao, R., Yuan, X., & Carin, L. (2014). Bayesian nonlinear support vector machines and discriminative factor modeling. In: Advances in neural information processing systems (pp. 1754\u20131762)."},{"issue":"1","key":"5811_CR6","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","volume":"3","author":"RA Jacobs","year":"1991","unstructured":"Jacobs, R. A., Jordan, M. I., Nowlan, S. J., & Hinton, G. E. (1991). Adaptive mixtures of local experts. Neural Computation, 3(1), 79\u201387.","journal-title":"Neural Computation"},{"issue":"2","key":"5811_CR7","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1162\/neco.1994.6.2.181","volume":"6","author":"MI Jordan","year":"1994","unstructured":"Jordan, M. I., & Jacobs, R. A. (1994). Hierarchical mixtures of experts and the EM algorithm. Neural Computation, 6(2), 181\u2013214.","journal-title":"Neural Computation"},{"issue":"2","key":"5811_CR8","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1007\/s10462-012-9338-y","volume":"42","author":"S Masoudnia","year":"2014","unstructured":"Masoudnia, S., & Ebrahimpour, R. (2014). Mixture of experts: A literature survey. Artificial Intelligence Review, 42(2), 275\u2013293.","journal-title":"Artificial Intelligence Review"},{"key":"5811_CR9","unstructured":"Meeds, E., & Osindero, S. (2006). An alternative infinite mixture of Gaussian process experts. In: Advances in neural information processing systems (pp. 883\u2013890)."},{"key":"5811_CR10","first-page":"2035","volume":"9","author":"H Nickisch","year":"2008","unstructured":"Nickisch, H., & Rasmussen, C. E. (2008). Approximations for binary Gaussian process classification. Journal of Machine Learning Research, 9, 2035\u20132078.","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"5811_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1214\/11-BA601","volume":"6","author":"NG Polson","year":"2011","unstructured":"Polson, N. G., Scott, S. L., et al. (2011). Data augmentation for support vector machines. Bayesian Analysis, 6(1), 1\u201323.","journal-title":"Bayesian Analysis"},{"issue":"504","key":"5811_CR12","doi-asserted-by":"publisher","first-page":"1339","DOI":"10.1080\/01621459.2013.829001","volume":"108","author":"NG Polson","year":"2013","unstructured":"Polson, N. G., Scott, J. G., & Windle, J. (2013). Bayesian inference for logistic models using p\u00f3lya-gamma latent variables. Journal of the American statistical Association, 108(504), 1339\u20131349.","journal-title":"Journal of the American statistical Association"},{"key":"5811_CR13","unstructured":"Rahimi, A., & Recht, B. (2008) Random features for large-scale kernel machines. In: Advances in neural information processing systems (pp. 1177\u20131184)."},{"key":"5811_CR14","first-page":"203","volume":"12","author":"L Ren","year":"2011","unstructured":"Ren, L., Du, L., Carin, L., & Dunson, D. (2011). Logistic stick-breaking process. Journal of Machine Learning Research, 12, 203\u2013239.","journal-title":"Journal of Machine Learning Research"},{"key":"5811_CR15","unstructured":"Rigon, T., & Durante, D. (2017). Tractable Bayesian density regression via logit stick-breaking priors. ArXiv e-prints \n                    arXiv:1701.02969\n                    \n                  ."},{"key":"5811_CR16","unstructured":"Scott, J. G., & Sun, L. (2013). Expectation\u2013maximization for logistic regression. ArXiv\u00a0preprint \n                    arXiv:1306.0040\n                    \n                  ."},{"key":"5811_CR17","first-page":"1829","volume":"10","author":"B Shahbaba","year":"2009","unstructured":"Shahbaba, B., & Neal, R. (2009). Nonlinear models using Dirichlet process mixtures. Journal of Machine Learning Research, 10, 1829\u20131850.","journal-title":"Journal of Machine Learning Research"},{"key":"5811_CR18","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809682","volume-title":"Kernel methods for pattern analysis","author":"J Shawe-Taylor","year":"2004","unstructured":"Shawe-Taylor, J., & Cristianini, N. (2004). Kernel methods for pattern analysis. Cambridge: Cambridge University Press."},{"key":"5811_CR19","unstructured":"Shazeer, N., Mirhoseini, A., Maziarz, K., Davis, A., Le, Q., Hinton, G., & Dean, J. (2017). Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. ArXiv\u00a0preprint \n                    arXiv:1701.06538\n                    \n                  ."},{"key":"5811_CR20","first-page":"211","volume":"1","author":"ME Tipping","year":"2001","unstructured":"Tipping, M. E. (2001). Sparse Bayesian learning and the relevance vector machine. Journal of Machine Learning Research, 1, 211\u2013244.","journal-title":"Journal of Machine Learning Research"},{"key":"5811_CR21","unstructured":"Wang, Z., Djuric, N., Crammer, K., & Vucetic, S. (2011). Trading representability for scalability: Adaptive multi-hyperplane machine for nonlinear classification. In: Proceedings of the 17th ACM SIGKDD international conference on knowledge discovery and data mining (pp. 24\u201332). ACM."},{"key":"5811_CR22","volume-title":"Small-variance asymptotics for dirichlet process mixtures of SVMs","author":"Y Wang","year":"2014","unstructured":"Wang, Y., & Zhu, J. (2014). Small-variance asymptotics for dirichlet process mixtures of SVMs. Palo Alto: AAAI."},{"key":"5811_CR23","unstructured":"Williams, C. K., & Seeger, M. (2001). Using the Nystr\u00f6m method to speed up kernel machines. In: Advances in neural information processing systems (pp. 682\u2013688)."},{"key":"5811_CR24","unstructured":"Xu, L., Jordan, M. I., & Hinton, G. E. (1995). An alternative model for mixtures of experts. In: Advances in neural information processing systems (pp. 633\u2013640)."},{"key":"5811_CR25","unstructured":"Yuan, C., & Neubauer, C. (2009). Variational mixture of Gaussian process experts. In: Advances in neural information processing systems (pp. 1897\u20131904)."},{"issue":"8","key":"5811_CR26","doi-asserted-by":"publisher","first-page":"1177","DOI":"10.1109\/TNNLS.2012.2200299","volume":"23","author":"SE Yuksel","year":"2012","unstructured":"Yuksel, S. E., Wilson, J. N., & Gader, P. D. (2012). Twenty years of mixture of experts. IEEE Transactions on Neural Networks and Learning Systems, 23(8), 1177\u20131193.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"5811_CR27","unstructured":"Zhou, M. (2016). Softplus regressions and convex polytopes. ArXiv\u00a0e-prints \n                    arXiv:1608.06383\n                    \n                  ."},{"key":"5811_CR28","unstructured":"Zhu, J., Chen, N., & Xing, E.P. (2011). Infinite SVM: A dirichlet process mixture of large-margin kernel machines. In: ICML."}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-019-05811-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10994-019-05811-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-019-05811-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,6,5]],"date-time":"2020-06-05T00:05:41Z","timestamp":1591315541000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10994-019-05811-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,6,5]]},"references-count":28,"journal-issue":{"issue":"8-9","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["5811"],"URL":"https:\/\/doi.org\/10.1007\/s10994-019-05811-4","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,6,5]]},"assertion":[{"value":"26 November 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 April 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 June 2019","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}