Skip to content

Commit

Permalink
Merge pull request #58 from hydralabs/defusedxml
Browse files Browse the repository at this point in the history
Switch to use defusedxml as the default xml loader.
  • Loading branch information
njoyce committed Dec 17, 2015
2 parents 001ea4f + f081dbf commit 71fbe94
Show file tree
Hide file tree
Showing 15 changed files with 175 additions and 45 deletions.
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ python:
- "2.7"
sudo: false

cache:
directories:
- $HOME/.cache/pip

env:
global:
- PYTHONPATH=~/gaesdk/google_appengine
Expand All @@ -25,6 +29,8 @@ env:
- SQLALCHEMY_VERSION=">=0.9,<1.0"
- TWISTED_VERSION=">=14,<15"
- TWISTED_VERSION=">=15,<16"
- LXML_VERSION=">=3.4,<3.5"
- LXML_VERSION=">=3.5,<3.6"
# special, see install_optional_dependencies.sh
- GAESDK_VERSION=1.9.30

Expand Down
4 changes: 4 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ versions of PyAMF.
0.8 (Unreleased)
----------------
- Add support for Django>=1.8
- *Backward incompatible* Wrapped all xml parsing in ``defusedxml`` to protect
against any XML entity attacks. See
https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing for more
details. Thanks to Nicolas Grégoire (@Agarri_FR) for the report.

0.7.2 (2015-02-18)
------------------
Expand Down
31 changes: 31 additions & 0 deletions cpyamf/amf0.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from cpyamf cimport codec, util, amf3


cdef class Context(codec.Context):
cdef amf3.Context amf3_context


cdef class Decoder(codec.Decoder):
cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Decoder amf3_decoder

cdef object readAMF3(self)
cdef object readLongString(self, bint bytes=?)
cdef object readMixedArray(self)
cdef object readReference(self)
cdef object readTypedObject(self)
cdef void readObjectAttributes(self, object obj_attrs)
cdef object readBytes(self)
cdef object readBoolean(self)


cdef class Encoder(codec.Encoder):
cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Encoder amf3_encoder

cdef inline int _writeEndObject(self) except -1
cdef int writeAMF3(self, o) except -1
cdef int _writeDict(self, dict attrs) except -1
cdef inline int writeReference(self, o) except -2
20 changes: 7 additions & 13 deletions cpyamf/amf0.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ cdef object UnknownClassAlias = pyamf.UnknownClassAlias


cdef class Context(codec.Context):
cdef amf3.Context amf3_context

cpdef int clear(self) except -1:
codec.Context.clear(self)

Expand All @@ -60,10 +58,6 @@ cdef class Decoder(codec.Decoder):
"""
"""

cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Decoder amf3_decoder

def __cinit__(self):
self.use_amf3 = 0

Expand All @@ -72,7 +66,7 @@ cdef class Decoder(codec.Decoder):
self.context = kwargs.pop('context', None)

if self.context is None:
self.context = Context()
self.context = Context(**kwargs)

codec.Codec.__init__(self, *args, **kwargs)

Expand Down Expand Up @@ -244,7 +238,11 @@ cdef class Decoder(codec.Decoder):

cdef object readXML(self):
cdef object data = self.readLongString()
cdef object root = xml.fromstring(data)
cdef object root = xml.fromstring(
data,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities
)

self.context.addObject(root)

Expand Down Expand Up @@ -301,10 +299,6 @@ cdef class Encoder(codec.Encoder):
The AMF0 Encoder.
"""

cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Encoder amf3_encoder

def __cinit__(self):
self.use_amf3 = 0

Expand All @@ -314,7 +308,7 @@ cdef class Encoder(codec.Encoder):
self.context = kwargs.pop('context', None)

if self.context is None:
self.context = Context()
self.context = Context(**kwargs)

codec.Codec.__init__(self, *args, **kwargs)

Expand Down
6 changes: 5 additions & 1 deletion cpyamf/amf3.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,11 @@ cdef class Decoder(codec.Decoder):
self.stream.read(&buf, ref)
s = PyString_FromStringAndSize(buf, ref)

x = xml.fromstring(s)
x = xml.fromstring(
s,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities
)
self.context.addObject(x)

return x
Expand Down
2 changes: 2 additions & 0 deletions cpyamf/codec.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ cdef class Context(object):
cdef dict unicodes
cdef dict _strings
cdef public dict extra
cdef public bint forbid_dtd
cdef public bint forbid_entities

cpdef int clear(self) except -1
cpdef object getClassAlias(self, object klass)
Expand Down
13 changes: 11 additions & 2 deletions cpyamf/codec.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,17 @@ cdef class Context(object):

def __cinit__(self):
self.objects = IndexedCollection()
self.forbid_entities = True
self.forbid_dtd = True

self.clear()

def __init__(self):
def __init__(self, forbid_dtd=True, forbid_entities=True, **kwargs):
self.clear()

self.forbid_entities = forbid_entities
self.forbid_dtd = forbid_dtd

cpdef int clear(self) except -1:
self.objects.clear()

Expand Down Expand Up @@ -341,7 +346,8 @@ cdef class Codec(object):
self.strict = 0
self.timezone_offset = None

def __init__(self, stream=None, strict=False, timezone_offset=None):
def __init__(self, stream=None, strict=False, timezone_offset=None,
forbid_entities=True, forbid_dtd=True):
if not isinstance(stream, BufferedByteStream):
stream = BufferedByteStream(stream)

Expand All @@ -350,6 +356,9 @@ cdef class Codec(object):

self.timezone_offset = timezone_offset

self.context.forbid_entities = <bint>forbid_entities
self.context.forbid_dtd = <bint>forbid_dtd


cdef class Decoder(Codec):
"""
Expand Down
9 changes: 9 additions & 0 deletions install_optional_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ function install_twisted {
pip install "Twisted${TWISTED_VERSION}"
}

function install_lxml {
if [ -z "${LXML_VERSION}" ]; then
return 0
fi

pip install "lxml${LXML_VERSION}"
}

function install_gae_sdk {
if [ -z "${GAESDK_VERSION}" ]; then
return 0
Expand All @@ -40,4 +48,5 @@ function install_gae_sdk {
install_django
install_sqlalchemy
install_twisted
install_lxml
install_gae_sdk
14 changes: 9 additions & 5 deletions pyamf/amf0.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ class Decoder(codec.Decoder):
Decodes an AMF0 stream.
"""

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
# great for coverage, sucks for readability
Expand Down Expand Up @@ -380,7 +380,11 @@ def readXML(self):
Read XML.
"""
data = self.readLongString()
root = xml.fromstring(data)
root = xml.fromstring(
data,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities,
)

self.context.addObject(root)

Expand All @@ -401,8 +405,8 @@ def __init__(self, *args, **kwargs):

self.use_amf3 = kwargs.pop('use_amf3', False)

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
if self.use_amf3:
Expand Down
18 changes: 11 additions & 7 deletions pyamf/amf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,14 +607,14 @@ class Context(codec.Context):
@type classes: C{list}
"""

def __init__(self):
def __init__(self, **kwargs):
self.strings = codec.ByteStringReferenceCollection()
self.classes = {}
self.class_ref = {}

self.class_idx = 0

codec.Context.__init__(self)
codec.Context.__init__(self, **kwargs)

def clear(self):
"""
Expand Down Expand Up @@ -765,8 +765,8 @@ def __init__(self, *args, **kwargs):

codec.Decoder.__init__(self, *args, **kwargs)

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
if data == TYPE_UNDEFINED:
Expand Down Expand Up @@ -1079,7 +1079,11 @@ def readXML(self):

xmlstring = self.stream.read(ref >> 1)

x = xml.fromstring(xmlstring)
x = xml.fromstring(
xmlstring,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities,
)
self.context.addObject(x)

return x
Expand Down Expand Up @@ -1136,8 +1140,8 @@ def __init__(self, *args, **kwargs):

codec.Encoder.__init__(self, *args, **kwargs)

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
"""
Expand Down
24 changes: 18 additions & 6 deletions pyamf/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ class Context(object):
"""
The base context for all AMF [de|en]coding.
@ivar extra: The only public attribute. This is a placeholder for any extra
contextual data that required for different adapters.
@ivar extra: This is a placeholder for any extra contextual data that
required for different adapters.
@type extra: C{dict}
@ivar _objects: A collection of stored references to objects that have
already been visited by this context.
Expand All @@ -158,11 +158,20 @@ class Context(object):
determined by L{pyamf.get_class_alias}
@ivar _unicodes: Lookup of utf-8 encoded byte strings -> string objects
(aka strings/unicodes).
@ivar forbid_dtd: Don't allow DTD in XML documents (decode only). By
default PyAMF will not support potentially malicious XML documents
- e.g. XXE.
@ivar forbid_entities: Don't allow entities in XML documents (decode only).
By default PyAMF will not support potentially malicious XML documents
- e.g. XXE.
"""

def __init__(self):
def __init__(self, forbid_dtd=True, forbid_entities=True):
self._objects = IndexedCollection()

self.forbid_entities = forbid_entities
self.forbid_dtd = forbid_dtd

self.clear()

def clear(self):
Expand Down Expand Up @@ -280,18 +289,21 @@ class _Codec(object):
"""

def __init__(self, stream=None, context=None, strict=False,
timezone_offset=None):
timezone_offset=None, forbid_dtd=True, forbid_entities=True):
if isinstance(stream, basestring) or stream is None:
stream = util.BufferedByteStream(stream)

self.stream = stream
self.context = context or self.buildContext()
self.context = context or self.buildContext(
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities
)
self.strict = strict
self.timezone_offset = timezone_offset

self._func_cache = {}

def buildContext(self):
def buildContext(self, **kwargs):
"""
A context factory.
"""
Expand Down
9 changes: 6 additions & 3 deletions pyamf/remoting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,8 @@ def get_fault(data):
return get_fault_class(level, **e)(**e)


def decode(stream, strict=False, logger=None, timezone_offset=None):
def decode(stream, strict=False, logger=None, timezone_offset=None,
**kwargs):
"""
Decodes the incoming stream as a remoting message.
Expand Down Expand Up @@ -641,7 +642,8 @@ def decode(stream, strict=False, logger=None, timezone_offset=None):
pyamf.AMF0,
stream,
strict=strict,
timezone_offset=timezone_offset
timezone_offset=timezone_offset,
**kwargs
)
context = decoder.context

Expand Down Expand Up @@ -669,7 +671,7 @@ def decode(stream, strict=False, logger=None, timezone_offset=None):
return msg


def encode(msg, strict=False, logger=None, timezone_offset=None):
def encode(msg, strict=False, logger=None, timezone_offset=None, **kwargs):
"""
Encodes and returns the L{msg<Envelope>} as an AMF stream.
Expand All @@ -695,6 +697,7 @@ def encode(msg, strict=False, logger=None, timezone_offset=None):
stream,
strict=strict,
timezone_offset=timezone_offset,
**kwargs
)

if msg.amfVersion == pyamf.AMF3:
Expand Down
Loading

0 comments on commit 71fbe94

Please sign in to comment.