{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T01:48:33Z","timestamp":1778896113304,"version":"3.51.4"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031434112","type":"print"},{"value":"9783031434129","type":"electronic"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43412-9_16","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:28:38Z","timestamp":1694896118000},"page":"270-285","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Marvolo: Programmatic Data Augmentation for\u00a0Deep Malware Detection"],"prefix":"10.1007","author":[{"given":"Mike","family":"Wong","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Edward","family":"Raff","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"James","family":"Holt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ravi","family":"Netravali","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"key":"16_CR1","unstructured":"Code obfuscation (2021). https:\/\/en.wikibooks.org\/wiki\/X86_Disassembly\/Code_Obfuscation (Accessed 12 June 2023)"},{"key":"16_CR2","unstructured":"Global Ransomware Damage Costs Predicted To Reach \\$20 Billion (USD) By (2021). https:\/\/bit.ly\/3j3bTEB (2021), (Accessed 6 Oct 2021)"},{"key":"16_CR3","unstructured":"Labs Report at RSA: Evasive Malware\u2019s Gone Mainstream. https:\/\/bit.ly\/3p2lH5G (2021), (Accessed 6 Oct 2021)"},{"key":"16_CR4","unstructured":"Yara: The pattern matching swiss knife for malware researchers (and everyone else) (2021). https:\/\/virustotal.github.io\/yara\/, (Accessed 7 Aug 2021)"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Abedelaziz Mohaisen, O.A.: Unveiling zeus: automated classification of malware samples. In: WWW Companion (2013)","DOI":"10.1145\/2487788.2488056"},{"key":"16_CR6","unstructured":"Anderson, H.S., Roth, P.: EMBER: An Open Dataset for Training Static PE Malware Machine Learning Models. arXiv: 1804.04637 (2018)"},{"key":"16_CR7","unstructured":"Arp, D., et al.: Dos and don\u2019ts of machine learning in computer security. In: USENIX Security Symposium (2022)"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Bhagat, N., Arora, B.: Intrusion detection using honeypots. In: PDGC (2018)","DOI":"10.1109\/PDGC.2018.8745761"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"Ceschin, F., Pinag\u00e9, F., Castilho, M., Menotti, D., Oliveira, L.S., Gr\u00e9gio, A.: The need for speed: an analysis of brazilian malware classifiers. In: IEEE Security & Privacy (2018)","DOI":"10.1109\/MSEC.2018.2875369"},{"key":"16_CR10","doi-asserted-by":"publisher","unstructured":"Chen, T., Guestrin, C.: Xgboost: a scalable tree boosting system. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD 2016, pp. 785\u2013794. Association for Computing Machinery, New York (2016). https:\/\/doi.org\/10.1145\/2939672.2939785","DOI":"10.1145\/2939672.2939785"},{"key":"16_CR11","unstructured":"Flores-Montoya, A., Schulte, E.: Datalog disassembly. In: 29th USENIX Security Symposium (2020)"},{"key":"16_CR12","unstructured":"Harang, R., Rudd, E.M.: Sorel-20m: A large scale benchmark dataset for malicious pe detection (2020)"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Jamalpur, S., Sai Navya, Y., Raja, P., Tagore, G., Rama Koteswara Rao, G.: Dynamic malware analysis using cuckoo sandbox. In: IEEE (ICICCT) (2018)","DOI":"10.1109\/ICICCT.2018.8473346"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Jason Wei, K.Z.: Eda: Easy data augmentation techniques for boosting performance on text classification tasks. In: IJCNLP (2020)","DOI":"10.18653\/v1\/D19-1670"},{"key":"16_CR15","unstructured":"Joyce, R.J., Amlani, D., Nicholas, C., Raff, E.: MOTIF: a large malware reference dataset with ground truth family labels. In: The AAAI-22 Workshop on Artificial Intelligence for Cyber Security (AICS) (2022). 10.48550, https:\/\/github.com\/boozallen\/MOTIF, arXiv: 2111.15031"},{"key":"16_CR16","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient Estimation of Word Representations in Vector Space. arXiv:1301.3781v3"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Nguyen, A.T., Raff, E., Sant-Miller, A.: Would a file by any other name seem as malicious? In: IEEE Big Data (2019)","DOI":"10.1109\/BigData47090.2019.9006132"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"Niall McLaughlin, J.M.d.R.: Data augmentation for opcode sequence based malware detection. arXiv: 2106.11821 (2021)","DOI":"10.1109\/Cyber-RCI55324.2022.10032676"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Ozgur, F., Catak, Ahmed, J., Sahinbas, K., Hussain Khand, Z.: Data augmentation based malware detection using convolutional neural networks. In: PeerJ Computer Science (2021)","DOI":"10.7717\/peerj-cs.346"},{"key":"16_CR20","unstructured":"Raff, E., Barker, J., Sylvester, J., Brandon, R., Catanzaro, B., Nicholas, C.: Malware Detection by Eating a Whole EXE. arXiv:1710.09435 (Oct 2017)"},{"key":"16_CR21","doi-asserted-by":"publisher","unstructured":"Raff, E., Filar, B., Holt, J.: Getting passive aggressive about false positives: patching deployed malware detectors. In: 2020 International Conference on Data Mining Workshops (ICDMW), pp. 506\u2013515. IEEE (Nov 2020). https:\/\/doi.org\/10.1109\/ICDMW51313.2020.00074, https:\/\/ieeexplore.ieee.org\/document\/9346444\/","DOI":"10.1109\/ICDMW51313.2020.00074"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Raff, E., Fleshman, W., Zak, R., Anderson, H.S., Filar, B., McLean, M.: Classifying sequences of extreme length with constant memory applied to malware detection. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i11.17131"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"Raff, E., Nicholas, C.: An Alternative to NCD for Large Sequences, Lempel-Ziv Jaccard Distance. In: ACM SIGKDD, pp. 1007\u20131015 (2017)","DOI":"10.1145\/3097983.3098111"},{"key":"16_CR24","doi-asserted-by":"crossref","unstructured":"Raff, E., Nicholas, C., McLean, M.: A new burrows wheeler transform markov distance. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence (2020). https:\/\/arxiv.org\/abs\/1912.13046","DOI":"10.1609\/aaai.v34i04.5994"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"Raff, E., et al.: Automatic yara rule generation using biclustering. In: ACM CCS AISec (2020)","DOI":"10.1145\/3411508.3421372"},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Smith, M.R., et al.: Mind the gap: on bridging the semantic gap between machine learning and malware analysis. In: ACM CCS AISec (2020)","DOI":"10.1145\/3411508.3421373"},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Votipka, D., Rabin, S.M., Micinski, K., Foster, J.S., Mazurek, M.M.: An observational investigation of reverse engineers \u2019 processes. In: USENIX Security Symposium (2019)","DOI":"10.1145\/3290607.3313040"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"You, I., Yim, K.: Malware obfuscation techniques: a brief survey. In: BWCCA (2010)","DOI":"10.1109\/BWCCA.2010.85"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43412-9_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:31:24Z","timestamp":1694896284000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43412-9_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434112","9783031434129"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43412-9_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}