Skip to content

JS: Exclude patched libraries from xml-bomb sink #20048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions javascript/ql/lib/change-notes/2025-07-15-xml-bomb-sinks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Removed `libxmljs` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/GNOME/libxml2/blob/0c948334a8f5c66d50e9f8992e62998017dc4fc6/NEWS#L905-L908) that prevents XML bomb attacks.
14 changes: 7 additions & 7 deletions javascript/ql/lib/semmle/javascript/frameworks/XmlParsers.qll
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ module XML {
override JS::Expr getSourceArgument() { result = this.getArgument(0) }

override predicate resolvesEntities(EntityKind kind) {
// internal entities are always resolved
kind = InternalEntity()
or
not kind = InternalEntity() and
// other entities are only resolved if the configuration option `noent` is set to `true`
exists(JS::Expr noent |
this.hasOptionArgument(1, "noent", noent) and
Expand Down Expand Up @@ -126,8 +124,9 @@ module XML {
override JS::Expr getSourceArgument() { result = this.getArgument(0) }

override predicate resolvesEntities(EntityKind kind) {
// entities are resolved by default
any()
// SAX parsers in libxmljs also inherit libxml2's protection against XML bombs
kind = ExternalEntity(_) or
kind = ParameterEntity(true)
}

override DataFlow::Node getAResult() {
Expand All @@ -149,8 +148,9 @@ module XML {
override JS::Expr getSourceArgument() { result = this.getArgument(0) }

override predicate resolvesEntities(EntityKind kind) {
// entities are resolved by default
any()
// SAX push parsers in libxmljs also inherit libxml2's protection against XML bombs
kind = ExternalEntity(_) or
kind = ParameterEntity(true)
}

override DataFlow::Node getAResult() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
| domparser.js:11:57:11:59 | src | domparser.js:2:13:2:36 | documen ... .search | domparser.js:11:57:11:59 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | domparser.js:2:13:2:36 | documen ... .search | user-provided value |
| expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | expat.js:6:16:6:36 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | expat.js:6:16:6:36 | req.par ... e-xml") | user-provided value |
| jquery.js:4:14:4:16 | src | jquery.js:2:13:2:36 | documen ... .search | jquery.js:4:14:4:16 | src | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | jquery.js:2:13:2:36 | documen ... .search | user-provided value |
| libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | libxml.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.js:5:21:5:41 | req.par ... e-xml") | user-provided value |
| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.noent.js:5:21:5:41 | req.par ... e-xml") | user-provided value |
| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.sax.js:6:22:6:42 | req.par ... e-xml") | user-provided value |
| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | user-provided value |
edges
| closure.js:2:7:2:36 | src | closure.js:3:24:3:26 | src | provenance | |
| closure.js:2:13:2:36 | documen ... .search | closure.js:2:7:2:36 | src | provenance | |
Expand All @@ -31,8 +27,4 @@ nodes
| jquery.js:2:7:2:36 | src | semmle.label | src |
| jquery.js:2:13:2:36 | documen ... .search | semmle.label | documen ... .search |
| jquery.js:4:14:4:16 | src | semmle.label | src |
| libxml.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") |
| libxml.noent.js:5:21:5:41 | req.par ... e-xml") | semmle.label | req.par ... e-xml") |
| libxml.sax.js:6:22:6:42 | req.par ... e-xml") | semmle.label | req.par ... e-xml") |
| libxml.saxpush.js:6:15:6:35 | req.par ... e-xml") | semmle.label | req.par ... e-xml") |
subpaths
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ const express = require('express');
const libxmljs = require('libxmljs');

express().get('/some/path', function(req) {
libxmljs.parseXml(req.param("some-xml")); // $ Alert - libxml expands internal general entities by default
libxmljs.parseXml(req.param("some-xml"));
});
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ const express = require('express');
const libxmljs = require('libxmljs');

express().get('/some/path', function(req) {
libxmljs.parseXml(req.param("some-xml"), { noent: true }); // $ Alert - unguarded entity expansion
libxmljs.parseXml(req.param("some-xml"), { noent: true });
});
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs');

express().get('/some/path', function(req) {
const parser = new libxmljs.SaxParser();
parser.parseString(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default
parser.parseString(req.param("some-xml"));
});
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ const libxmljs = require('libxmljs');

express().get('/some/path', function(req) {
const parser = new libxmljs.SaxPushParser();
parser.push(req.param("some-xml")); // $ Alert - the SAX parser expands external entities by default
parser.push(req.param("some-xml"));
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Removed `lxml` as an XML bomb sink. The underlying libxml2 library now includes [entity reference loop detection](https://github.com/lxml/lxml/blob/f33ac2c2f5f9c4c4c1fc47f363be96db308f2fa6/doc/FAQ.txt#L1077) that prevents XML bomb attacks.
10 changes: 2 additions & 8 deletions python/ql/lib/semmle/python/frameworks/Lxml.qll
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,6 @@ module Lxml {
any(True t)
)
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) and
not this.getKeywordParameter("resolve_entities").getAValueReachingSink().asExpr() =
any(False t)
or
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t)
Expand Down Expand Up @@ -305,9 +300,8 @@ module Lxml {
// note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O
kind.isXxe()
or
kind.isXmlBomb() and
this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t)
or
// libxml2 has built-in protection against XML bombs via entity reference loop detection,
// so lxml is not vulnerable to XML bomb attacks.
kind.isDtdRetrieval() and
this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and
this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t)
Expand Down
4 changes: 2 additions & 2 deletions python/ql/test/library-tests/frameworks/lxml/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

# Billion laughs vuln (also XXE)
parser = lxml.etree.XMLParser(huge_tree=True)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)

# Safe for both Billion laughs and XXE
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
Expand All @@ -63,5 +63,5 @@
# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of
# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O

lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
edges
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | ControlFlowNode for request | provenance | |
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:19:19:19:25 | ControlFlowNode for request | provenance | |
| test.py:19:5:19:15 | ControlFlowNode for xml_content | test.py:30:34:30:44 | ControlFlowNode for xml_content | provenance | |
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:5:19:15 | ControlFlowNode for xml_content | provenance | AdditionalTaintStep |
nodes
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:19:5:19:15 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
subpaths
#select
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy