From 1851deb929ee0695fb8d5b48f2ecad92989e490a Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Tue, 15 Jul 2025 08:27:56 +0200 Subject: [PATCH 1/4] Removed `libxmljs` from being marked as `sink` for `xml-bomb`. --- .../semmle/javascript/frameworks/XmlParsers.qll | 14 +++++++------- .../query-tests/Security/CWE-776/XmlBomb.expected | 8 -------- .../ql/test/query-tests/Security/CWE-776/libxml.js | 2 +- .../query-tests/Security/CWE-776/libxml.noent.js | 2 +- .../query-tests/Security/CWE-776/libxml.sax.js | 2 +- .../query-tests/Security/CWE-776/libxml.saxpush.js | 2 +- 6 files changed, 11 insertions(+), 19 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll b/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll index a451182aa21a..c0a783c17644 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll @@ -49,9 +49,7 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // internal entities are always resolved - kind = InternalEntity() - or + not kind = InternalEntity() and // other entities are only resolved if the configuration option `noent` is set to `true` exists(JS::Expr noent | this.hasOptionArgument(1, "noent", noent) and @@ -126,8 +124,9 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // entities are resolved by default - any() + // SAX parsers in libxmljs also inherit libxml2's protection against XML bombs + kind = ExternalEntity(_) or + kind = ParameterEntity(true) } override DataFlow::Node getAResult() { @@ -149,8 +148,9 @@ module XML { override JS::Expr getSourceArgument() { result = this.getArgument(0) } override predicate resolvesEntities(EntityKind kind) { - // entities are resolved by default - any() + // SAX push parsers in libxmljs also inherit libxml2's protection against XML bombs + kind = ExternalEntity(_) or + kind = ParameterEntity(true) } override DataFlow::Node getAResult() { diff --git a/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected b/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected index 2b4d41804915..6a5c2adfb7a1 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected +++ b/javascript/ql/test/query-tests/Security/CWE-776/XmlBomb.expected @@ -5,10 +5,6 @@ | domparser.js:11:57:11:59 | src | domparser.js:2:13:2:36 | documen ... .search | domparser.js:11:57:11:59 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | domparser.js:2:13:2:36 | documen ... .search | user-provided value | | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | expat.js:6:16:6:36 | req.par ... e-xml") | user-provided value | | jquery.js:4:14:4:16 | src | jquery.js:2:13:2:36 | documen ... .search | jquery.js:4:14:4:16 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | jquery.js:2:13:2:36 | documen ... .search | user-provided value | -| libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.js:5:21:5:41 | req.par ... e-xml") | user-provided value | -| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | user-provided value | -| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | user-provided value | -| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | user-provided value | edges | closure.js:2:7:2:36 | src | closure.js:3:24:3:26 | src | provenance | | | closure.js:2:13:2:36 | documen ... .search | closure.js:2:7:2:36 | src | provenance | | @@ -31,8 +27,4 @@ nodes | jquery.js:2:7:2:36 | src | semmle.label | src | | jquery.js:2:13:2:36 | documen ... .search | semmle.label | documen ... .search | | jquery.js:4:14:4:16 | src | semmle.label | src | -| libxml.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | -| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | semmle.label | req.par ... e-xml") | subpaths diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.js index 6af2da17ef5d..3f16e457dc38 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.js @@ -2,5 +2,5 @@ const express = require('express'); const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { - libxmljs.parseXml(req.param("some-xml")); // $ Alert - libxml expands internal general entities by default + libxmljs.parseXml(req.param("some-xml")); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js index da133cc782e0..de633c04688a 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.noent.js @@ -2,5 +2,5 @@ const express = require('express'); const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { - libxmljs.parseXml(req.param("some-xml"), { noent: true }); // $ Alert - unguarded entity expansion + libxmljs.parseXml(req.param("some-xml"), { noent: true }); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js index 6049a8297a98..dc7ec2ddec00 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.sax.js @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { const parser = new libxmljs.SaxParser(); - parser.parseString(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default + parser.parseString(req.param("some-xml")); }); diff --git a/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js b/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js index 2fc4afc8ce47..15e63bf5d532 100644 --- a/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js +++ b/javascript/ql/test/query-tests/Security/CWE-776/libxml.saxpush.js @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs'); express().get('/some/path', function(req) { const parser = new libxmljs.SaxPushParser(); - parser.push(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default + parser.push(req.param("some-xml")); }); From 887d80f49f22392bdc62b79b7d173d733a51d488 Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Tue, 15 Jul 2025 09:37:34 +0200 Subject: [PATCH 2/4] Added change note --- javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md diff --git a/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md b/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md new file mode 100644 index 000000000000..b10509c0e068 --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Removed `libxmljs` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/GNOME/libxml2/blob/0c948334a8f5c66d50e9f8992e62998017dc4fc6/NEWS#L905-L908) that prevents XML bomb attacks. From 638f6498f0058900ff046ef2af8a087a0af027c3 Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Tue, 15 Jul 2025 09:54:55 +0200 Subject: [PATCH 3/4] Removed `lxml.etree.XMLParser` from xml bomb sinks --- python/ql/lib/semmle/python/frameworks/Lxml.qll | 10 ++-------- .../ql/test/library-tests/frameworks/lxml/parsing.py | 4 ++-- .../Security/CWE-776-XmlBomb/XmlBomb.expected | 10 ---------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index c503d7d5cfbc..e043093ed49e 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -129,11 +129,6 @@ module Lxml { any(True t) ) or - kind.isXmlBomb() and - this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) and - not this.getKeywordParameter("resolve_entities").getAValueReachingSink().asExpr() = - any(False t) - or kind.isDtdRetrieval() and this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t) @@ -305,9 +300,8 @@ module Lxml { // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O kind.isXxe() or - kind.isXmlBomb() and - this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) - or + // libxml2 has built-in protection against XML bombs via entity reference loop detection, + // so lxml is not vulnerable to XML bomb attacks. kind.isDtdRetrieval() and this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t) diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index 63cdc79b4c1d..bff508f24ab0 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -50,7 +50,7 @@ # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) @@ -63,5 +63,5 @@ # iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of # the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O -lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected index 8a7d7bc75e3d..e217064d1dfc 100644 --- a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected @@ -1,14 +1,4 @@ edges -| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | ControlFlowNode for request | provenance | | -| test.py:1:26:1:32 | ControlFlowNode for request | test.py:19:19:19:25 | ControlFlowNode for request | provenance | | -| test.py:19:5:19:15 | ControlFlowNode for xml_content | test.py:30:34:30:44 | ControlFlowNode for xml_content | provenance | | -| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:5:19:15 | ControlFlowNode for xml_content | provenance | AdditionalTaintStep | nodes -| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember | -| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:19:5:19:15 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value | From ea93b392f790f125f648738c46b3f6fb771b1d45 Mon Sep 17 00:00:00 2001 From: Napalys Klicius Date: Tue, 15 Jul 2025 09:57:02 +0200 Subject: [PATCH 4/4] Added change note for python --- .../ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md diff --git a/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md b/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md new file mode 100644 index 000000000000..11ff0181a017 --- /dev/null +++ b/python/ql/lib/change-notes/2025-07-15-xml-bomb-sinks-python.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Removed `lxml` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/lxml/lxml/blob/f33ac2c2f5f9c4c4c1fc47f363be96db308f2fa6/doc/FAQ.txt#L1077) that prevents XML bomb attacks. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy