{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T20:10:03Z","timestamp":1781295003638,"version":"3.54.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2016,9,12]],"date-time":"2016-09-12T00:00:00Z","timestamp":1473638400000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Comput Virol Hack Tech"],"published-print":{"date-parts":[[2018,2]]},"DOI":"10.1007\/s11416-016-0283-1","type":"journal-article","created":{"date-parts":[[2016,9,12]],"date-time":"2016-09-12T09:27:27Z","timestamp":1473672447000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":107,"title":["An investigation of byte n-gram features for malware classification"],"prefix":"10.1007","volume":"14","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-1972","authenticated-orcid":false,"given":"Edward","family":"Raff","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Richard","family":"Zak","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Russell","family":"Cox","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jared","family":"Sylvester","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Paul","family":"Yacci","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rebecca","family":"Ward","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anna","family":"Tracy","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mark","family":"McLean","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Charles","family":"Nicholas","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2016,9,12]]},"reference":[{"key":"283_CR1","doi-asserted-by":"crossref","unstructured":"Abou-Assaleh, T., Cercone, N., Keselj, V., Sweidan, R.: N-gram-based detection of new malicious code. In: Proceedings of 28th annual int\u2019l computer software & applications conference, vol. 2, pp. 41\u201342. IEEE (2004)","DOI":"10.1109\/CMPSAC.2004.1342667"},{"key":"283_CR2","doi-asserted-by":"crossref","unstructured":"Aggarwal, C.C., Hinneburg, A., Keim, D.A.: On the Surprising Behavior of Distance Metrics in High Dimensional Spaces. In: van den Bussche, J., Vianu, V. (eds.) Proceedings of 8th international conference on database theory, pp. 420\u2013434. Springer-Verlag (2001)","DOI":"10.1007\/3-540-44503-X_27"},{"key":"283_CR3","doi-asserted-by":"crossref","unstructured":"Banko, M., Brill, E.: Scaling to Very Very Large Corpora for Natural Language Disambiguation. In: Proceedings of the 39th annual meeting on association for computational linguistics, pp. 26\u201333 (2001)","DOI":"10.3115\/1073012.1073017"},{"key":"283_CR4","volume-title":"Dynamic Programming","author":"R Bellman","year":"1957","unstructured":"Bellman, R.: Dynamic Programming. Princeton University Press, Princeton (1957)"},{"key":"283_CR5","unstructured":"Corelan Team. Exploit writing tutorial, part 11: heap spraying demystified (2011). https:\/\/www.corelan.be\/index.php\/2011\/12\/31\/exploit-writing-tutorialpart-11-heap-spraying-demystified\/ (visited on 05\/25\/2016)"},{"issue":"10","key":"283_CR6","doi-asserted-by":"crossref","first-page":"78","DOI":"10.1145\/2347736.2347755","volume":"55","author":"P Domingos","year":"2012","unstructured":"Domingos, P.: A few useful things to know about machine learning. Commun. ACM 55(10), 78\u201387 (2012). (issn: 0001-0782)","journal-title":"Commun. ACM"},{"key":"283_CR7","doi-asserted-by":"crossref","unstructured":"Elovici, Y., Shabtai, A., Moskovitch, R., Tahan, G., Glezer, C.: Applying Machine Learning Techniques for Detection of Malicious Code in Network Traffic. In: Proceedings of the 30th annual German conference on advances in artificial intelligence. In: KI \u201907, pp. 44\u201350. Springer-Verlag, Berlin, Heidelberg. isbn: 978- 3-540-74564-8 (2007)","DOI":"10.1007\/978-3-540-74565-5_5"},{"key":"283_CR8","unstructured":"Freund, Y., Schapire, R.: Experiments with a new boosting algorithm. In: Saitta, L. (ed.) Proceedings of the thirteenth international conference on machine learning (ICML 1996), pp. 148\u2013156. Morgan Kaufmann (1996)"},{"issue":"1","key":"283_CR9","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v033.i01","volume":"33","author":"J Friedman","year":"2010","unstructured":"Friedman, J., Hastie, T., Tibshirani, R.: Regularization paths for generalized linear models via coordinate descent. J. Stat Softw. 33(1), 1\u201322 (2010)","journal-title":"J. Stat Softw."},{"key":"283_CR10","unstructured":"Gong, P., Ye, J.: A modified orthant-wise limited memory quasi-Newton method with convergence analysis. In: Proceedings of 32nd international conference on machine learning, vol. 37, pp. 276\u2013284 (2015)"},{"key":"283_CR11","doi-asserted-by":"crossref","unstructured":"Griffin, K., Schneider, S., Hu, X., Chiueh, T.-C.: Automatic generation of string signatures for malware detection. In: Lippmann, R., Clark, A. (eds.) Recent Advances in Intrusion Detection, RAID \u201909 Proceedings of the 12th International Symposium on Recent Advances in Intrusion Detection, pp. 101\u2013120 (2009)","DOI":"10.1007\/978-3-642-04342-0_6"},{"issue":"2","key":"283_CR12","doi-asserted-by":"crossref","first-page":"8","DOI":"10.1109\/MIS.2009.36","volume":"24","author":"A Halevy","year":"2009","unstructured":"Halevy, A., Norvig, P., Pereira, F.: The unreasonable effectiveness of data. Intell. Syst. IEEE 24(2), 8\u201312 (2009)","journal-title":"Intell. Syst. IEEE"},{"key":"283_CR13","doi-asserted-by":"crossref","unstructured":"Henchiri, O., Japkowicz, N.: A Feature Selection and Evaluation Scheme for Computer Virus Detection. In: Proceedings of the 6th international conference on data mining. IEEE Computer Society, pp. 891\u2013895. isbn: 0-7695-2701-9 (2006)","DOI":"10.1109\/ICDM.2006.4"},{"key":"283_CR14","doi-asserted-by":"crossref","unstructured":"Ibrahim, A.H., Abdelhalim, M.B., Hussein, H., Fahmy, A.: Analysis of x86 instruction set usage for Windows 7 applications. In: 2nd international conference on computer technology & development, pp. 511\u2013516 (2010)","DOI":"10.1109\/ICCTD.2010.5645851"},{"key":"283_CR15","doi-asserted-by":"crossref","unstructured":"Jain, S., Meena, Y.K.: Byte level n-gram analysis for malware detection. In: Venugopal, K.R., Patnaik, L.M. (eds.) Computer Networks and Intelligent Computing, pp. 51\u201359. Springer, Berlin Heidelberg (2011)","DOI":"10.1007\/978-3-642-22786-8_6"},{"key":"283_CR16","unstructured":"Kephart, J.O., Sorkin, G.B., Arnold, W.C., Chess, D.M., Tesauro, G.J., White, S.R.: Biologically Inspired Defenses Against Computer Viruses. In: Proceedings of the 14th international joint conference on artificial intelligence, vol. 1, pp. 985\u2013996. Morgan Kaufmann (1995). (isbn: 1-55860-363-8)"},{"key":"283_CR17","first-page":"2721","volume":"7","author":"JZ Kolter","year":"2006","unstructured":"Kolter, J.Z., Maloof, M.A.: Learning to detect and classify malicious executables in the wild. J. Mach. Learn. Res. 7, 2721\u20132744 (2006)","journal-title":"J. Mach. Learn. Res."},{"key":"283_CR18","doi-asserted-by":"crossref","unstructured":"Kolter, J.Z., Maloof, M.A.: Learning to detect malicious executablesin the wild. In: Proceedings of the 2004 ACM SIGKDD international conference on knowledge discovery and data mining, pp. 470\u2013478. ACM Press (2004)","DOI":"10.1145\/1014052.1014105"},{"issue":"6","key":"283_CR19","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1016\/0167-4048(95)00012-W","volume":"14","author":"RW Lo","year":"1995","unstructured":"Lo, R.W., Levitt, K.N., Olsson, R.A.: Refereed paper: MCF: a malicious code filter. Comput. Secur. 14(6), 541\u2013566 (1995). issn: 0167-4048","journal-title":"Comput. Secur."},{"issue":"2","key":"283_CR20","doi-asserted-by":"crossref","first-page":"40","DOI":"10.1109\/MSP.2007.48","volume":"5","author":"R Lyda","year":"2007","unstructured":"Lyda, R., Hamrock, J.: Using entropy analysis to find encrypted and packed malware. IEEE Secur. Priv. Mag. 5(2), 40\u201345 (2007)","journal-title":"IEEE Secur. Priv. Mag."},{"issue":"1","key":"283_CR21","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1007\/s10796-007-9054-3","volume":"10","author":"MM Masud","year":"2008","unstructured":"Masud, M.M., Khan, L., Thuraisingham, B.: A scalable multi-level feature extraction technique to detect malicious executables. Inf. Syst. Front. 10(1), 33\u201345 (2008)","journal-title":"Inf. Syst. Front."},{"issue":"3","key":"283_CR22","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/2019618.2019622","volume":"2","author":"MM Masud","year":"2011","unstructured":"Masud, M.M., Al-Khateeb, T.M., Hamlen, K.W., Gao, J., Khan, L., Han, J., Thuraisingham, B.: Cloudbased malware detection for evolving data streams. ACM Trans. Manag. Inf. Syst. 2(3), 1\u201327 (2011)","journal-title":"ACM Trans. Manag. Inf. Syst."},{"issue":"4","key":"283_CR23","doi-asserted-by":"crossref","first-page":"1483","DOI":"10.1016\/j.csda.2008.10.015","volume":"53","author":"E Menahem","year":"2009","unstructured":"Menahem, E., Shabtai, A., Rokach, L., Elovici, Y.: Improving malware detection by applying multi-inducer ensemble. Comput. Stat. Fata Anal. 53(4), 1483\u20131494 (2009). issn: 0167-9473","journal-title":"Comput. Stat. Fata Anal."},{"key":"283_CR24","unstructured":"Microsoft Portable Executable and Common Object File Format Specification Version 8.3. Tech. rep. Microsoft, p. 98 (2013)"},{"issue":"4","key":"283_CR25","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1007\/s11416-009-0122-8","volume":"5","author":"R Moskovitch","year":"2009","unstructured":"Moskovitch, R., Stopel, D., Feher, C., Nissim, N., Japkowicz, N., Elovici, Y.: Unknown malcode detection and the imbalance problem. J. Comput. Virol. 5(4), 295\u2013308 (2009)","journal-title":"J. Comput. Virol."},{"key":"283_CR26","doi-asserted-by":"crossref","unstructured":"Ng, A.Y.: Feature selection, $$L_{1}$$ L 1 vs. $$L_{2}$$ L 2 regularization, and rotational invariance. In: Proceedings of 21st international conference on machine learning, pp. 78\u201386 (2004)","DOI":"10.1145\/1015330.1015435"},{"key":"283_CR27","doi-asserted-by":"crossref","unstructured":"Perdisci, R., Lanzi, A., Lee, W.: McBoost: boosting scalability in malware collection and analysis using statistical classification of executables. In: Annual computer security applications conference (ACSAC), pp. 301\u2013310. IEEE (2008)","DOI":"10.1109\/ACSAC.2008.22"},{"key":"283_CR28","unstructured":"Quinlan, J.R.: C4.5: programs for machine learning. Vol. 1(3) of Morgan Kaufmann series in Machine Learning. Morgan Kaufmann (1993). isbn: 1558602380"},{"key":"283_CR29","unstructured":"Quist, D.: Open malware. http:\/\/openmalware.org\/ (visited on 05\/25\/2016)"},{"issue":"3","key":"283_CR30","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1007\/s11416-006-0027-8","volume":"2","author":"DKS Reddy","year":"2006","unstructured":"Reddy, D.K.S., Pujari, A.K.: N-gram analysis for computer virus detection. J. Comput. Virol. 2(3), 231\u2013239 (2006)","journal-title":"J. Comput. Virol."},{"key":"283_CR31","unstructured":"Roberts, J.-M.: Virus share. https:\/\/virusshare.com\/ (visited on 05\/25\/2016)"},{"key":"283_CR32","doi-asserted-by":"crossref","unstructured":"Santos, I., Penya, Y.K., Devesa, J., Bringas, P.G.: N-grams-based file signatures for malware detection. In: Proceedings of 11th international conference on enterprise information systems, pp. 317\u2013320 (2009)","DOI":"10.5220\/0001863603170320"},{"key":"283_CR33","doi-asserted-by":"crossref","unstructured":"Schultz, M., Eskin, E., Zadok, F., Stolfo, S.: Data mining methods for detection of new malicious executables. In: Proceedings of IEEE symposium on security and privacy, pp. 38\u201349 (2001)","DOI":"10.1109\/SECPRI.2001.924286"},{"issue":"1","key":"283_CR34","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1016\/j.istr.2009.03.003","volume":"14","author":"A Shabtai","year":"2009","unstructured":"Shabtai, A., Moskovitch, R., Elovici, Y., Glezer, C.: Detection of malicious code by applying machine learning classifiers on static features: a state-of-the-art survey. Inf. Secur. Tech. Rep. 14(1), 16\u201329 (2009). issn: 1363-4127","journal-title":"Inf. Secur. Tech. Rep."},{"key":"283_CR35","doi-asserted-by":"crossref","unstructured":"Shafiq, M.Z., Tabish, S.M., Mirza, F., Farooq, M.: PE-Miner: mining structural information to detect malicious executables in realtime. In: Lippmann, R., Clark, A. (eds.) Recent Advances in Intrusion Detection, Springer, Berlin Heidelberg, pp. 121\u2013141 (2009)","DOI":"10.1007\/978-3-642-04342-0_7"},{"key":"283_CR36","doi-asserted-by":"crossref","unstructured":"Stolfo, S.J., Wang, K., Li, W.-J.: Towards stealthy malware detection. In: Christodorescu, M., Jha, S., Maughan, D., Song, D., Wang, C. (eds.) Malware Detection, pp. 231\u2013249. Springer, Berlin Heidelberg (2007). isbn: 978-0-387-44599-1","DOI":"10.1007\/978-0-387-44599-1_11"},{"key":"283_CR37","first-page":"949","volume":"13","author":"G Tahan","year":"2012","unstructured":"Tahan, G., Rokach, L., Shahar, Y.: Mal-ID: automatic malware detection using common segment analysis and meta-features. J. Mach. Learn. Res. 13, 949\u2013979 (2012). issn: 1532-4435","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"283_CR38","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1994","unstructured":"Tibshirani, R.: Regression shrinkage and selection via the lasso. J. R. Stat. Soc. B 58(1), 267\u2013288 (1994)","journal-title":"J. R. Stat. Soc. B"},{"key":"283_CR39","doi-asserted-by":"crossref","unstructured":"Verleysen, M., Fran\u00e7ois, D.: The Curse of Dimensionality in Data Mining and Time Series Prediction. In: Cabestany, J., Prieto, A., Sandoval, F. (eds.) Proceedings of 8th international conference on artificial neural networks: computational intelligence and bioinspired systems, pp. 758\u2013770 (2005)","DOI":"10.1007\/11494669_93"},{"key":"283_CR40","first-page":"3183","volume":"11","author":"G-X Yuan","year":"2010","unstructured":"Yuan, G.-X., Chang, K.-W., Hsieh, C.-J., Lin, C.-J.: A comparison of optimization methods and software for large-scale $$L_{1}$$ L 1 -regularized linear classification. J. Mach. Learn. Res. 11, 3183\u20133234 (2010)","journal-title":"J. Mach. Learn. Res."},{"key":"283_CR41","first-page":"1999","volume":"13","author":"G-X Yuan","year":"2012","unstructured":"Yuan, G.-X., Ho, C.-H., Lin, C.-J.: An improved GLMNET for $$L_{1}$$ L 1 -regularized logistic regression. J. Mach. Learn. Res. 13, 1999\u20132030 (2012)","journal-title":"J. Mach. Learn. Res."},{"key":"283_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, B., Yin, J., Hao, J., Zhang, D., Wang, S.: Malicious codes detection based on ensemble learning. In: Proceedings of the 4th international conference on autonomic and trusted computing, pp. 468\u2013477. Springer-Verlag (2007). isbn: 3-540-73546-1","DOI":"10.1007\/978-3-540-73547-2_48"},{"issue":"2","key":"283_CR43","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1111\/j.1467-9868.2005.00503.x","volume":"67","author":"H Zou","year":"2005","unstructured":"Zou, H., Hastie, T.: Regularization and variable selection via the elastic net. J. R. Stat. Soc. B 67(2), 301\u2013320 (2005)","journal-title":"J. R. Stat. Soc. B"}],"container-title":["Journal of Computer Virology and Hacking Techniques"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11416-016-0283-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11416-016-0283-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11416-016-0283-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T18:11:18Z","timestamp":1749579078000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11416-016-0283-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,9,12]]},"references-count":43,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2018,2]]}},"alternative-id":["283"],"URL":"https:\/\/doi.org\/10.1007\/s11416-016-0283-1","relation":{},"ISSN":["2263-8733"],"issn-type":[{"value":"2263-8733","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,9,12]]}}}