{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T10:10:07Z","timestamp":1751796607448,"version":"3.41.0"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319962917"},{"type":"electronic","value":"9783319962924"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-96292-4_9","type":"book-chapter","created":{"date-parts":[[2018,8,13]],"date-time":"2018-08-13T14:29:34Z","timestamp":1534170574000},"page":"105-117","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Comparative Study of Feature Engineering Techniques for Disease Prediction"],"prefix":"10.1007","author":[{"given":"Khandaker Tasnim","family":"Huq","sequence":"first","affiliation":[]},{"given":"Abdus Selim","family":"Mollah","sequence":"additional","affiliation":[]},{"given":"Md. Shakhawat Hossain","family":"Sajal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,8,14]]},"reference":[{"key":"9_CR1","unstructured":"Beckhardt, B., Keselman, L., Perez, A.: CS229 Project: Doctor Bayes (2015)"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Subotin, M., Davis, A.R.: A system for predicting ICD-10-PCS codes from electronic health records. In: Proceedings of BioNLP, pp. 59\u201367, June 2014","DOI":"10.3115\/v1\/W14-3409"},{"key":"9_CR3","doi-asserted-by":"crossref","unstructured":"Quwaider, M., Alfaqeeh, M.: Social networks benchmark dataset for diseases classification. In: IEEE International Conference on Future Internet of Things and Cloud Workshops (FiCloudW), pp. 234\u2013239. IEEE, August 2016","DOI":"10.1109\/W-FiCloud.2016.56"},{"issue":"1","key":"9_CR4","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1016\/S0933-3657(01)00077-X","volume":"23","author":"I Kononenko","year":"2001","unstructured":"Kononenko, I.: Machine learning for medical diagnosis: history, state of the art and perspective. Artif. Intell. Med. 23(1), 89\u2013109 (2001)","journal-title":"Artif. Intell. Med."},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"McCowan, I., Moore, D., Fry, M.J.: Classification of cancer stage from free-text histology reports. In: 28th Annual International Conference of the IEEE Engineering in Medicine and Biology Society 2006. EMBS 2006, pp. 5153\u20135156. IEEE, August 2006","DOI":"10.1109\/IEMBS.2006.259563"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Yao, L., Zhang, Y., Wei, B., Li, Z., Huang, X.: Traditional Chinese medicine clinical records classification using knowledge-powered document embedding. In: 2016 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 1926\u20131928. IEEE, December 2016","DOI":"10.1109\/BIBM.2016.7822817"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Parlak, B., Uysal, A.K.: The impact of feature selection on medical document classification. In: 2016 11th Iberian Conference on Information Systems and Technologies (CISTI), pp. 1\u20135. IEEE, June 2016","DOI":"10.1109\/CISTI.2016.7521524"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Parlak, B., Uysal, A.K.: Classification of medical documents according to diseases. In: 2015 23th Signal Processing and Communications Applications Conference (SIU), pp. 1635\u20131638. IEEE, May 2015","DOI":"10.1109\/SIU.2015.7130164"},{"key":"9_CR9","unstructured":"Frobenius Norm (n.d.). http:\/\/mathworld.wolfram.com\/FrobeniusNorm.html . Accessed 27 Aug 2017"},{"key":"9_CR10","unstructured":"Regularization: Simple Definition, L1 & L2 Penalties (n.d.). http:\/\/www.statisticshowto.com\/regularization\/ . Accessed 27 Aug 2017"},{"key":"9_CR11","unstructured":"Stochastic Gradient Descent (n.d.). http:\/\/scikit-learn.org\/stable\/modules\/sgd.html . Accessed 29 Aug 2017"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Levner, I.: Feature selection and nearest centroid classification for protein mass spectrometry, 23 March 2005. https:\/\/bmcbioinformatics.biomedcentral.com\/articles\/10.1186\/1471-2105-6-68 . Accessed 29 Aug 2017","DOI":"10.1186\/1471-2105-6-68"},{"key":"9_CR13","unstructured":"Support Vector Machines (n.d.). http:\/\/scikit-learn.org\/stable\/modules\/svm.html#svm-mathematical-formulation . Accessed 29 Aug 2017"},{"key":"9_CR14","unstructured":"Scaling the regularization parameter for SVCs (n.d.). http:\/\/scikit-learn.org\/stable\/auto_examples\/svm\/plot_svm_scale_c.htm . Accessed 29 Aug 2017"},{"key":"9_CR15","first-page":"3","volume":"1","author":"RA Fisher","year":"1921","unstructured":"Fisher, R.A.: On the probable error of a coefficient of correlation deduced from a small sample. Metron 1, 3\u201332 (1921)","journal-title":"Metron"},{"key":"9_CR16","unstructured":"Kuhn, M.: The caret Package 23 July 2017. https:\/\/topepo.github.io\/caret\/recursive-feature-elimination.html . Accessed 27 Aug 2017"},{"issue":"2","key":"9_CR17","doi-asserted-by":"publisher","first-page":"233","DOI":"10.3109\/10826089809115863","volume":"33","author":"M Buscema","year":"1998","unstructured":"Buscema, M.: Back propagation neural networks. Subst. Use Misuse 33(2), 233\u2013270 (1998)","journal-title":"Subst. Use Misuse"},{"issue":"1","key":"9_CR18","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/j.compag.2010.06.009","volume":"74","author":"T Rumpf","year":"2010","unstructured":"Rumpf, T., Mahlein, A.K., Steiner, U., Oerke, E.C., Dehne, H.W., Plumer, L.: Early detection and classification of plant diseases with support vector machines based on hyperspectral reflectance. Comput. Electron. Agric. 74(1), 91\u201399 (2010)","journal-title":"Comput. Electron. Agric."},{"key":"9_CR19","unstructured":"Mayo Clinic: Symptoms (n.d). http:\/\/www.mayoclinic.org\/symptoms"},{"key":"9_CR20","unstructured":"Reitz, K.: HTML Scraping (2016). http:\/\/python-guide-pt-br.readthedocs.io\/en\/latest\/scenarios\/scrape\/"},{"key":"9_CR21","unstructured":"WebMd: Symptoms A-Z (n.d). http:\/\/symptomchecker.webmd.com\/symptoms-a-z"},{"key":"9_CR22","unstructured":"NHS choices: Health A-Z Conditions and treatments (n.d). http:\/\/www.nhs.uk\/Conditions\/Pages\/hub.aspx"},{"key":"9_CR23","unstructured":"Healthline: Health Topics (n.d). http:\/\/www.healthline.com\/directory\/topics"},{"key":"9_CR24","unstructured":"Zhu, X.: Basic Text Process (2010). http:\/\/pages.cs.wisc.edu\/jerryzhu\/cs769\/text_preprocessing.pdf . Accessed 14 June 2017"},{"key":"9_CR25","unstructured":"NLTK Project: Source code for nltk.stem.snowball [Scholarly project]. In: NLTK 3.2.4 documentation, 21 May 2017. http:\/\/www.nltk.org\/_modules\/nltk\/stem\/snowball.html . Accessed 14 June 2017"},{"issue":"4","key":"9_CR26","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1147\/rd.14.0309","volume":"1","author":"HP Luhn","year":"1957","unstructured":"Luhn, H.P.: A statistical approach to mechanized encoding and searching of literary information. IBM J. Res. Dev. 1(4), 309\u2013317 (1957)","journal-title":"IBM J. Res. Dev."},{"key":"9_CR27","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M., Duchesnay, E.: Scikit-learn: machine learning in Python. J. Mach. Lear. Res. 12, 2825\u20132830 (2011). http:\/\/scikit-learn.org\/stable\/about.html#citing-scikit-learn","journal-title":"J. Mach. Lear. Res."},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Rajaraman, A., Ullman, J.D.: Data Mining. Mining of Massive Datasets (PDF), pp. 1\u201317 (2011). http:\/\/doi.org\/10.1017\/CBO9781139058452.002 . ISBN 978-1-139-05845-2","DOI":"10.1017\/CBO9781139058452.002"},{"key":"9_CR29","unstructured":"AP: Unique words count (n.d.). https:\/\/planetcalc.com\/3205\/ . Accessed 30 Aug 2017"},{"key":"9_CR30","unstructured":"Tf-idf: A Single-Page Tutorial - Information Retrieval and Text Mining (n.d.). http:\/\/www.tfidf.com\/ . Accessed 29 Aug 2017"},{"issue":"Jan","key":"9_CR31","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J. Mach. Lear. Res. 3(Jan), 993\u20131022 (2003)","journal-title":"J. Mach. Lear. Res."},{"key":"9_CR32","unstructured":"Dhillon, I.S., Sra, S.: Generalized Nonnegative Matrix Approximations with Bregman Divergences (2005). http:\/\/papers.nips.cc\/book\/advances-in-neural-information-processing-systems-18-2005 . Accessed 12 July 2017"},{"key":"9_CR33","doi-asserted-by":"publisher","first-page":"788","DOI":"10.1038\/44565","volume":"401","author":"DD Lee","year":"1999","unstructured":"Lee, D.D., Seung, H.S.: Learning the parts of objects by non-negative matrix factorization. Nature 401, 788\u2013791 (1999). https:\/\/doi.org\/10.1038\/44565","journal-title":"Nature"},{"issue":"10","key":"9_CR34","doi-asserted-by":"publisher","first-page":"2756","DOI":"10.1162\/neco.2007.19.10.2756","volume":"19","author":"CJ Lin","year":"2007","unstructured":"Lin, C.J.: Projected gradient methods for nonnegative matrix factorization. Neural Comput. 19(10), 2756\u20132779 (2007)","journal-title":"Neural Comput."},{"key":"9_CR35","unstructured":"Chi2 Feature selection, 04 July 2009. https:\/\/nlp.stanford.edu\/IR-book\/html\/htmledition\/feature-selectionchi2-feature-selection-1.html . Accessed 29 Aug 2017"},{"key":"9_CR36","unstructured":"Generalized Linear Models (n.d.). http:\/\/scikit-learn.org\/stable\/modules\/linear_model.html#linear-model . Accessed 29 Aug 2017"},{"key":"9_CR37","unstructured":"Decision Trees (n.d.). http:\/\/scikit-learn.org\/stable\/modules\/tree.html . Accessed 30 Aug 2017"},{"key":"9_CR38","doi-asserted-by":"crossref","unstructured":"Al-Mubaid, H., Shenify, M.: Improved Bayesian based method for classifying disease documents. In: 2016 World Symposium on Computer Applications and Research (WSCAR), pp. 47\u201352. IEEE, March 2016","DOI":"10.1109\/WSCAR.2016.26"},{"key":"9_CR39","doi-asserted-by":"crossref","unstructured":"Li, Y., Wei, B., Chen, H., Jiang, L., Li, Z.: Cross-domain learning based traditional chinese medicine medical record classification. In: 2015 10th International Conference on Intelligent Systems and Knowledge Engineering (ISKE), pp. 335\u2013340. IEEE, November 2015","DOI":"10.1109\/ISKE.2015.99"},{"key":"9_CR40","doi-asserted-by":"crossref","unstructured":"Zhu, W., Zhang, W., Li, G.Z., He, C., Zhang, L.: A study of damp-heat syndrome classification using Word2vec and TF-IDF. In: 2016 IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 1415\u20131420. IEEE, December 2016","DOI":"10.1109\/BIBM.2016.7822730"}],"container-title":["Communications in Computer and Information Science","Big Data, Cloud and Applications"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-96292-4_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,6]],"date-time":"2025-07-06T09:41:26Z","timestamp":1751794886000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-96292-4_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319962917","9783319962924"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-96292-4_9","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2018]]}}}