diff --git a/intermine/bar_chart.py b/intermine/bar_chart.py index 50f62ebb..edc08ca6 100644 --- a/intermine/bar_chart.py +++ b/intermine/bar_chart.py @@ -102,12 +102,14 @@ def plot_go_vs_p(list_name): def autolabel(rects, ax): i = 0 for rect in rects: - x = rect.get_x() + rect.get_width()/2. + x = rect.get_x() + rect.get_width() / 2. y = rect.get_height() - ax.annotate(gene_count[i], (x, y), xytext=(0, 5), + ax.annotate(gene_count[i], (x, y), + xytext=(0, 5), textcoords="offset points", - ha='center', va='bottom') - i = i+1 + ha='center', + va='bottom') + i = i + 1 autolabel(ax.patches, ax) @@ -169,12 +171,14 @@ def plot_go_vs_count(list_name): def autolabel(rects, ax): i = 0 for rect in rects: - x = rect.get_x() + rect.get_width()/2. + x = rect.get_x() + rect.get_width() / 2. y = rect.get_height() - ax.annotate(annotation_count[i], (x, y), xytext=(0, 5), + ax.annotate(annotation_count[i], (x, y), + xytext=(0, 5), textcoords="offset points", - ha='center', va='bottom') - i = i+1 + ha='center', + va='bottom') + i = i + 1 autolabel(ax.patches, ax) ax.margins(y=0.1) @@ -194,7 +198,7 @@ def get_query(xml): req.pathname2url(xml) r = requests.get(link) list = (r.text).split('\n') - for i in range(0, len(list)-1): + for i in range(0, len(list) - 1): list[i] = list[i].split('\t') return (list) @@ -223,7 +227,7 @@ def query_to_barchart_log(xml, resp): store = store.split(' ') x_val = [] y_val = [] - for i in range(0, len(list)-1): + for i in range(0, len(list) - 1): x_val.append(list[i][1]) y_val.append(float(list[i][2])) @@ -248,12 +252,14 @@ def query_to_barchart_log(xml, resp): def autolabel(rects, ax): i = 0 for rect in rects: - x = rect.get_x() + rect.get_width()/2. + x = rect.get_x() + rect.get_width() / 2. y = rect.get_height() - ax.annotate(y_val[i], (x, y), xytext=(0, 5), + ax.annotate(y_val[i], (x, y), + xytext=(0, 5), textcoords="offset points", - ha='center', va='bottom') - i = i+1 + ha='center', + va='bottom') + i = i + 1 autolabel(ax.patches, ax) diff --git a/intermine/constraints.py b/intermine/constraints.py index 8736ccee..6266f8ae 100644 --- a/intermine/constraints.py +++ b/intermine/constraints.py @@ -3,6 +3,7 @@ from intermine.pathfeatures import PathFeature, PATH_PATTERN from intermine.util import ReadableException + class Constraint(PathFeature): """ A class representing constraints on a query @@ -14,6 +15,7 @@ class Constraint(PathFeature): """ child_type = "constraint" + class LogicNode(object): """ A class representing nodes in a logic graph @@ -24,7 +26,6 @@ class LogicNode(object): inherit from this class, which defines methods for overloading built-in operations. """ - def __add__(self, other): """ Overloads + @@ -77,6 +78,7 @@ def __or__(self, other): else: return LogicGroup(self, 'OR', other) + class LogicGroup(LogicNode): """ A logic node that represents two sub-nodes joined in some way @@ -98,7 +100,7 @@ def __init__(self, left, op, right, parent=None): Groups may have a reference to their parent. """ - if not op in self.LEGAL_OPS: + if op not in self.LEGAL_OPS: raise TypeError(op + " is not a legal logical operation") self.parent = parent self.left = left @@ -138,18 +140,22 @@ def get_codes(self): codes.append(node.code) return codes + class LogicParseError(ReadableException): """ An error representing problems in parsing constraint logic. """ pass + class EmptyLogicError(ValueError): """ - An error representing the fact that an the logic string to be parsed was empty + An error representing the fact that an the logic string to be parsed was + empty """ pass + class LogicParser(object): """ Parses logic strings into logic groups @@ -160,7 +166,6 @@ class LogicParser(object): robust parsing of logic strings, with the ability to identify syntax errors in such strings. """ - def __init__(self, query): """ Constructor @@ -203,22 +208,17 @@ def get_priority(self, op): @rtype: int """ - return { - "AND": 2, - "OR" : 1, - "(" : 3, - ")" : 3 - }.get(op) + return {"AND": 2, "OR": 1, "(": 3, ")": 3}.get(op) ops = { - "AND" : "AND", - "&" : "AND", - "&&" : "AND", - "OR" : "OR", - "|" : "OR", - "||" : "OR", - "(" : "(", - ")" : ")" + "AND": "AND", + "&": "AND", + "&&": "AND", + "OR": "OR", + "|": "OR", + "||": "OR", + "(": "(", + ")": ")" } def parse(self, logic_str): @@ -249,11 +249,13 @@ def flatten(l): else: ret.append(item) return ret + def canonical(x, d): if x in d: return d[x] else: return re.split("\b", x) + def dedouble(x): if re.search("[()]", x): return list(x) @@ -261,7 +263,7 @@ def dedouble(x): return x logic_str = logic_str.upper() - tokens = [t for t in re.split("\s+", logic_str) if t] + tokens = [t for t in re.split(r'\s+', logic_str) if t] if not tokens: raise EmptyLogicError() tokens = flatten([canonical(x, self.ops) for x in tokens]) @@ -280,8 +282,10 @@ def check_syntax(self, infix_tokens): which should hopefully lead to more informative error messages. This checks for: - - correct operator positions (cannot put two codes next to each other without intervening operators) - - correct grouping (all brackets are matched, and contain valid expressions) + - correct operator positions (cannot put two codes next to each other + without intervening operators) + - correct grouping (all brackets are matched, and contain valid + expressions) @param infix_tokens: The input parsed into a list of tokens. @type infix_tokens: iterable @@ -295,22 +299,28 @@ def check_syntax(self, infix_tokens): for token in infix_tokens: if token not in self.ops: if need_an_op: - raise LogicParseError("Expected an operator after: '" + ' '.join(processed) + "'" - + " - but got: '" + token + "'") + raise LogicParseError("Expected an operator after: '" + + ' '.join(processed) + "'" + + " - but got: '" + token + "'") if need_binary_op_or_closing_bracket: - raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'" - + " - expected an operator or a closing bracket") + raise LogicParseError( + "Logic grouping error after: '" + ' '.join(processed) + + "'" + " - expected an operator or a closing bracket") need_an_op = True else: need_an_op = False if token == "(": if processed and processed[-1] not in self.ops: - raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'" - + " - got an unexpeced opening bracket") + raise LogicParseError( + "Logic grouping error after: '" + + ' '.join(processed) + "'" + + " - got an unexpeced opening bracket") if need_binary_op_or_closing_bracket: - raise LogicParseError("Logic grouping error after: '" + ' '.join(processed) + "'" - + " - expected an operator or a closing bracket") + raise LogicParseError( + "Logic grouping error after: '" + + ' '.join(processed) + "'" + + " - expected an operator or a closing bracket") open_brackets += 1 elif token == ")": @@ -354,16 +364,20 @@ def infix_to_postfix(self, infix_tokens): if last_op == "(": if stack: previous_op = stack.pop() - if previous_op != "(": postfix_tokens.append(previous_op) + if previous_op != "(": + postfix_tokens.append(previous_op) break else: postfix_tokens.append(last_op) else: - while stack and self.get_priority(stack[-1]) <= self.get_priority(op): + while stack and self.get_priority( + stack[-1]) <= self.get_priority(op): prev_op = stack.pop() - if prev_op != "(": postfix_tokens.append(prev_op) + if prev_op != "(": + postfix_tokens.append(prev_op) stack.append(op) - while stack: postfix_tokens.append(stack.pop()) + while stack: + postfix_tokens.append(stack.pop()) return postfix_tokens def postfix_to_tree(self, postfix_tokens): @@ -396,6 +410,7 @@ def postfix_to_tree(self, postfix_tokens): except IndexError: raise EmptyLogicError() + class CodedConstraint(Constraint, LogicNode): """ A parent class for all constraints that have codes @@ -437,6 +452,7 @@ def __str__(self): Stringify to the code they are refered to by. """ return self.code + def to_string(self): """ Provide a human readable representation of the logic. @@ -454,6 +470,7 @@ def to_dict(self): d.update(op=self.op, code=self.code) return d + class UnaryConstraint(CodedConstraint): """ Constraints which have just a path and an operator @@ -468,6 +485,7 @@ class UnaryConstraint(CodedConstraint): """ OPS = set(['IS NULL', 'IS NOT NULL']) + class BinaryConstraint(CodedConstraint): """ Constraints which have an operator and a value @@ -493,7 +511,9 @@ class BinaryConstraint(CodedConstraint): - NOT LIKE (same as not equal to, but with implied wildcards) """ - OPS = set(['=', '!=', '<', '>', '<=', '>=', 'LIKE', 'NOT LIKE', 'CONTAINS']) + OPS = set( + ['=', '!=', '<', '>', '<=', '>=', 'LIKE', 'NOT LIKE', 'CONTAINS']) + def __init__(self, path, op, value, code="A"): """ Constructor @@ -502,7 +522,8 @@ def __init__(self, path, op, value, code="A"): @param path: The path to constrain @type path: string - @param op: The relationship between the value represented by the path and the value provided (must be a valid operator) + @param op: The relationship between the value represented by the path + and the value provided (must be a valid operator) @type op: string @param value: The value to compare the stored value to @@ -521,6 +542,7 @@ def to_string(self): """ s = super(BinaryConstraint, self).to_string() return " ".join([s, str(self.value)]) + def to_dict(self): """ Return a dict object which can be used to construct a @@ -530,6 +552,7 @@ def to_dict(self): d.update(value=str(self.value)) return d + class ListConstraint(CodedConstraint): """ Constraints which refer to an objects membership of lists @@ -549,11 +572,12 @@ class ListConstraint(CodedConstraint): """ OPS = set(['IN', 'NOT IN']) + def __init__(self, path, op, list_name, code="A"): if hasattr(list_name, 'to_query'): q = list_name.to_query() - l = q.service.create_list(q) - self.list_name = l.name + ls = q.service.create_list(q) + self.list_name = ls.name elif hasattr(list_name, "name"): self.list_name = list_name.name else: @@ -567,6 +591,7 @@ def to_string(self): """ s = super(ListConstraint, self).to_string() return " ".join([s, str(self.list_name)]) + def to_dict(self): """ Return a dict object which can be used to construct a @@ -576,6 +601,7 @@ def to_dict(self): d.update(value=str(self.list_name)) return d + class LoopConstraint(CodedConstraint): """ Constraints with refer to object identity @@ -593,7 +619,8 @@ class LoopConstraint(CodedConstraint): """ OPS = set(['IS', 'IS NOT']) - SERIALISED_OPS = {'IS':'=', 'IS NOT':'!='} + SERIALISED_OPS = {'IS': '=', 'IS NOT': '!='} + def __init__(self, path, op, loopPath, code="A"): """ Constructor @@ -602,7 +629,8 @@ def __init__(self, path, op, loopPath, code="A"): @param path: The path to constrain @type path: string - @param op: The relationship between the path and the path provided (must be a valid operator) + @param op: The relationship between the path and the path provided + (must be a valid operator) @type op: string @param loopPath: The path to check for identity against @@ -621,6 +649,7 @@ def to_string(self): """ s = super(LoopConstraint, self).to_string() return " ".join([s, self.loopPath]) + def to_dict(self): """ Return a dict object which can be used to construct a @@ -630,6 +659,7 @@ def to_dict(self): d.update(loopPath=self.loopPath, op=self.SERIALISED_OPS[self.op]) return d + class TernaryConstraint(BinaryConstraint): """ Constraints for broad, general searching over all fields @@ -646,22 +676,27 @@ class TernaryConstraint(BinaryConstraint): well as the main value. """ OPS = set(['LOOKUP']) + def __init__(self, path, op, value, extra_value=None, code="A"): """ Constructor =========== - @param path: The path to constrain. Here is must be a class, or a reference to a class. + @param path: The path to constrain. Here is must be a class, or a + reference to a class. @type path: string - @param op: The relationship between the path and the path provided (must be a valid operator) + @param op: The relationship between the path and the path provided + (must be a valid operator) @type op: string @param value: The value to check other fields against. @type value: string - @param extra_value: A further value for disambiguation. The meaning of this value varies by class - and configuration. For example, if the class of the object is Gene, then + @param extra_value: A further value for disambiguation. The meaning of + this value varies by class + and configuration. For example, if the class of + the object is Gene, then extra_value will refer to the Organism. @type extra_value: string @@ -681,6 +716,7 @@ def to_string(self): return s else: return " ".join([s, 'IN', self.extra_value]) + def to_dict(self): """ Return a dict object which can be used to construct a @@ -691,6 +727,7 @@ def to_dict(self): d.update(extraValue=self.extra_value) return d + class MultiConstraint(CodedConstraint): """ Constraints for checking membership of a set of values @@ -712,25 +749,30 @@ class MultiConstraint(CodedConstraint): than an object's identity. """ OPS = set(['ONE OF', 'NONE OF']) + def __init__(self, path, op, values, code="A"): """ Constructor =========== - @param path: The path to constrain. Here it must be an attribute of some object. + @param path: The path to constrain. Here it must be an attribute of + some object. @type path: string - @param op: The relationship between the path and the path provided (must be a valid operator) + @param op: The relationship between the path and the path provided + (must be a valid operator) @type op: string - @param values: The set of values which the object of the constraint either must or must not belong to. + @param values: The set of values which the object of the constraint + either must or must not belong to. @type values: set or list @param code: The code for this constraint (default = "A") @type code: string """ if not isinstance(values, (set, list)): - raise TypeError("values must be a set or a list, not " + str(type(values))) + raise TypeError("values must be a set or a list, not " + + str(type(values))) self.values = values super(MultiConstraint, self).__init__(path, op, code) @@ -741,6 +783,7 @@ def to_string(self): """ s = super(MultiConstraint, self).to_string() return ' '.join([s, str(self.values)]) + def to_dict(self): """ Return a dict object which can be used to construct a @@ -750,6 +793,7 @@ def to_dict(self): d.update(value=self.values) return d + class RangeConstraint(MultiConstraint): """ Constraints for testing where a value lies relative to a set of ranges @@ -771,14 +815,18 @@ class RangeConstraint(MultiConstraint): 4 WITHIN [1..5, 20..25] => True - The format of the ranges depends on the value being constrained and what range - parsers have been configured on the target server. A common range parser for - biological mines is the one for Locations: + The format of the ranges depends on the value being constrained and what + range parsers have been configured on the target server. A common range + parser for biological mines is the one for Locations: Gene.chromosomeLocation OVERLAPS [2X:54321..67890, 3R:12345..456789] """ - OPS = set(['OVERLAPS', 'DOES NOT OVERLAP', 'WITHIN', 'OUTSIDE', 'CONTAINS', 'DOES NOT CONTAIN']) + OPS = set([ + 'OVERLAPS', 'DOES NOT OVERLAP', 'WITHIN', 'OUTSIDE', 'CONTAINS', + 'DOES NOT CONTAIN' + ]) + class IsaConstraint(MultiConstraint): """ @@ -798,6 +846,7 @@ class IsaConstraint(MultiConstraint): """ OPS = set(['ISA']) + class SubClassConstraint(Constraint): """ Constraints on the class of a reference @@ -821,31 +870,35 @@ def __init__(self, path, subclass): Constructor =========== - @param path: The path to constrain. This must refer to a class or a reference to a class. + @param path: The path to constrain. This must refer to a class or a + reference to a class. @type path: str - @param subclass: The class to subclass the path to. This must be a simple class name (not a dotted name) + @param subclass: The class to subclass the path to. This must be a + simple class name (not a dotted name) @type subclass: str """ if not PATH_PATTERN.match(subclass): - raise TypeError + raise TypeError self.subclass = subclass super(SubClassConstraint, self).__init__(path) + def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(SubClassConstraint, self).to_string() - return s + ' ISA ' + self.subclass + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(SubClassConstraint, self).to_string() + return s + ' ISA ' + self.subclass + def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(SubClassConstraint, self).to_dict() - d.update(type=self.subclass) - return d + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(SubClassConstraint, self).to_dict() + d.update(type=self.subclass) + return d class TemplateConstraint(object): @@ -853,24 +906,28 @@ class TemplateConstraint(object): A mixin to supply the behaviour and state of constraints on templates ===================================================================== - Constraints on templates can also be designated as "on", "off" or "locked", which refers - to whether they are active or not. Inactive constraints are still configured, but behave - as if absent for the purpose of results. In addition, template constraints can be - editable or not. Only values for editable constraints can be provided when requesting results, - and only constraints that can participate in logic expressions can be editable. + Constraints on templates can also be designated as "on", "off" or "locked", + which refers to whether they are active or not. Inactive constraints are + still configured, but behave as if absent for the purpose of results. + In addition, template constraints can be editable or not. Only values for + editable constraints can be provided when requesting results, and only + constraints that can participate in logic expressions can be editable. """ REQUIRED = "locked" OPTIONAL_ON = "on" OPTIONAL_OFF = "off" + def __init__(self, editable=True, optional="locked"): """ Constructor =========== - @param editable: Whether or not this constraint should accept new values. + @param editable: Whether or not this constraint should accept new + values. @type editable: bool - @param optional: Whether a value for this constraint must be provided when running. + @param optional: Whether a value for this constraint must be provided + when running. @type optional: "locked", "on" or "off" """ self.editable = editable @@ -950,6 +1007,7 @@ def to_string(self): else: editable = "non-editable" return '(' + editable + ", " + self.get_switchable_status() + ')' + def separate_arg_sets(self, args): """ A static function to use when building template constraints. @@ -957,9 +1015,9 @@ def separate_arg_sets(self, args): dict -> (dict, dict) - Splits a dictionary of arguments into two separate dictionaries, one with - arguments for the main constraint, and one with arguments for the template - portion of the behaviour + Splits a dictionary of arguments into two separate dictionaries, + one with arguments for the main constraint, and one with arguments + for the template portion of the behaviour """ c_args = {} t_args = {} @@ -972,122 +1030,141 @@ def separate_arg_sets(self, args): c_args[k] = v return (c_args, t_args) + class TemplateUnaryConstraint(UnaryConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) UnaryConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(UnaryConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (UnaryConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateBinaryConstraint(BinaryConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) BinaryConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(BinaryConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (BinaryConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateListConstraint(ListConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) ListConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(ListConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (ListConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateLoopConstraint(LoopConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) LoopConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(LoopConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (LoopConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateTernaryConstraint(TernaryConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) TernaryConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(TernaryConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (TernaryConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateMultiConstraint(MultiConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) MultiConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(MultiConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (MultiConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateRangeConstraint(RangeConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) RangeConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(RangeConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (RangeConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateIsaConstraint(IsaConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) IsaConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(IsaConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (IsaConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class TemplateSubClassConstraint(SubClassConstraint, TemplateConstraint): def __init__(self, *a, **d): (c_args, t_args) = self.separate_arg_sets(d) SubClassConstraint.__init__(self, *a, **c_args) TemplateConstraint.__init__(self, **t_args) + def to_string(self): """ Provide a template specific human readable representation of the constraint. This method is called by repr. """ - return(SubClassConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) + return (SubClassConstraint.to_string(self) + " " + + TemplateConstraint.to_string(self)) + class ConstraintFactory(object): """ @@ -1099,9 +1176,10 @@ class ConstraintFactory(object): constraint. """ CONSTRAINT_CLASSES = set([ - UnaryConstraint, BinaryConstraint, TernaryConstraint, - MultiConstraint, SubClassConstraint, LoopConstraint, - ListConstraint, RangeConstraint, IsaConstraint]) + UnaryConstraint, BinaryConstraint, TernaryConstraint, MultiConstraint, + SubClassConstraint, LoopConstraint, ListConstraint, RangeConstraint, + IsaConstraint + ]) def __init__(self): """ @@ -1111,7 +1189,8 @@ def __init__(self): Creates a new ConstraintFactory """ self._codes = iter(string.ascii_uppercase) - self.reference_ops = TernaryConstraint.OPS | RangeConstraint.OPS | ListConstraint.OPS | IsaConstraint.OPS + self.reference_ops = TernaryConstraint.OPS | \ + RangeConstraint.OPS | ListConstraint.OPS | IsaConstraint.OPS def get_next_code(self): """ @@ -1137,10 +1216,11 @@ def make_constraint(self, *args, **kwargs): if hasattr(c, "code") and c.code == "A": c.code = self.get_next_code() return c - except TypeError as e: + except TypeError: pass - raise TypeError("No matching constraint class found for " - + str(args) + ", " + str(kwargs)) + raise TypeError("No matching constraint class found for " + str(args) + + ", " + str(kwargs)) + class TemplateConstraintFactory(ConstraintFactory): """ diff --git a/intermine/decorators.py b/intermine/decorators.py index 8b8ef818..d7643dbe 100644 --- a/intermine/decorators.py +++ b/intermine/decorators.py @@ -1,12 +1,12 @@ from functools import wraps from intermine.errors import ServiceError + def requires_version(required): error_fmt = "Service must be at version %s, but is at %s" def decorator(f): - @wraps(f) def wrapper(self, *args, **kwargs): if self.version < required: @@ -16,4 +16,3 @@ def wrapper(self, *args, **kwargs): return wrapper return decorator - diff --git a/intermine/errors.py b/intermine/errors.py index 26af09a9..d608f002 100644 --- a/intermine/errors.py +++ b/intermine/errors.py @@ -1,12 +1,15 @@ from intermine.util import ReadableException + class UnimplementedError(Exception): pass + class ServiceError(ReadableException): """Errors in the creation and use of the Service object""" pass + class WebserviceError(IOError): """Errors from interaction with the webservice""" pass diff --git a/intermine/idresolution.py b/intermine/idresolution.py index c209e321..d1db6dd4 100644 --- a/intermine/idresolution.py +++ b/intermine/idresolution.py @@ -7,6 +7,7 @@ except ImportError: import simplejson as json + def get_json(service, path, key): text = service.opener.read(service.root + path) data = json.loads(text) @@ -16,10 +17,12 @@ def get_json(service, path, key): raise Exception(key + " not returned from " + path) return data[key] + ONE_MINUTE = 60 COMPLETED = set(["SUCCESS", "ERROR"]) + class Job(object): """ A Representation of an Identifier Resolution Job @@ -68,7 +71,8 @@ def fetch_status(self): @rtype: dict """ - return get_json(self.service, "/ids/{0}/status".format(self.uid), "status") + return get_json(self.service, "/ids/{0}/status".format(self.uid), + "status") def delete(self): """ @@ -88,4 +92,5 @@ def fetch_results(self): @rtype String """ - return get_json(self.service, "/ids/{0}/result".format(self.uid), "results") + return get_json(self.service, "/ids/{0}/result".format(self.uid), + "results") diff --git a/intermine/lists/list.py b/intermine/lists/list.py index 1cedc8b8..e0ab8a24 100644 --- a/intermine/lists/list.py +++ b/intermine/lists/list.py @@ -14,6 +14,7 @@ from intermine.model import ConstraintNode from intermine.errors import ServiceError + class List(object): """ Class for representing a List on an InterMine Webservice @@ -32,14 +33,17 @@ class List(object): >>> from intermine.webservice import Service >>> >>> flymine = Service("www.flymine.org/query", "SOMETOKEN") - >>> new_list = flymine.create_list(["h", "zen", "eve", "bib"], "Gene", name="My New List") + >>> new_list = flymine.create_list(["h", "zen", "eve", "bib"], "Gene", + name="My New List") >>> >>> another_list = flymine.get_list("Some other list") >>> combined_list = new_list | another_list # Same syntax as for sets >>> combined_list.name = "Union of the other lists" >>> - >>> print "The combination of the two lists has %d elements" % combined_list.size - >>> print "The combination of the two lists has %d elements" % len(combined_list) + >>> print "The combination of the two lists has %d elements" + % combined_list.size + >>> print "The combination of the two lists has %d elements" + % len(combined_list) >>> >>> for row in combined_list: ... print row @@ -47,7 +51,8 @@ class List(object): OVERVIEW -------- - Lists are created from a webservice, and can be manipulated in various ways. + Lists are created from a webservice, and can be manipulated in various + ways. The operations are:: * Union: this | that * Intersection: this & that @@ -61,7 +66,8 @@ class List(object): * contained in a string In all these cases the syntax is the same: - >>> new_list = service.create_list(content, type, name="Some name", description="Some description", tags=["some", "tags"]) + >>> new_list = service.create_list(content, type, name="Some name", + description="Some description", tags=["some", "tags"]) Lists can also be created from a query's result with the exact same syntax. In the case of queries, the type is not required, @@ -98,7 +104,8 @@ def __init__(self, **args): self._is_authorized = args.get("authorized") self._status = args.get("status") - if self._is_authorized is None: self._is_authorized = True + if self._is_authorized is None: + self._is_authorized = True if "tags" in args: tags = args["tags"] @@ -137,7 +144,8 @@ def status(self): @property def is_authorized(self): - """Whether or not the current user is authorised to make changes to this list""" + """Whether or not the current user is authorised to make changes to + this list""" return self._is_authorized @property @@ -154,20 +162,16 @@ def set_name(self, new_name): Set the name of the list ======================== - Setting the list's name causes the list's name to be updated on the server. + Setting the list's name causes the list's name to be updated on + the server. """ if self._name == new_name: return uri = self._service.root + self._service.LIST_RENAME_PATH - params = { - "oldname": self._name, - "newname": new_name - } + params = {"oldname": self._name, "newname": new_name} uri += "?" + urlencode(params) resp = self._service.opener.open(uri) - data = resp.read() resp.close() - new_list = self._manager.parse_list_upload_response(data) self._name = new_name def del_name(self): @@ -176,7 +180,8 @@ def del_name(self): @property def size(self): - """Return the number of elements in the list. Also available as len(obj)""" + """Return the number of elements in the list. Also available as + len(obj)""" return self._size @property @@ -229,7 +234,8 @@ def to_query(self): def make_list_constraint(self, path, op): """ - Implementation of trait that allows use of these objects in list constraints + Implementation of trait that allows use of these objects in list + constraints """ return ConstraintNode(path, op, self.name) @@ -253,27 +259,29 @@ def display(self): k = 0 while s[k] != '(': k += 1 - s = s[k+1:] + s = s[k + 1:] s = s.split(",") for j in s: print(j.strip()) print() def __iter__(self): - """Return an iterator over the objects in this list, with all attributes selected for output""" + """Return an iterator over the objects in this list, with all + attributes selected for output""" return iter(self.to_query()) def __getitem__(self, index): """Get a member of this list by index""" if not isinstance(index, int): raise IndexError("Expected an integer key - got %s" % (index)) - if index < 0: # handle negative indices. + if index < 0: # handle negative indices. i = self.size + index else: i = index if i not in range(self.size): - raise IndexError("%d is not a valid index for a list of size %d" % (index, self.size)) + raise IndexError("%d is not a valid index for a list of size %d" % + (index, self.size)) return self.to_query().first(start=i, row="jsonobjects") @@ -285,10 +293,12 @@ def __and__(self, other): def __iand__(self, other): """ - Intersect this list and another, and replace this list with the result of the - intersection + Intersect this list and another, and replace this list with the + result of the intersection """ - intersection = self._manager.intersect([self, other], description=self.description, tags=self.tags) + intersection = self._manager.intersect([self, other], + description=self.description, + tags=self.tags) self.delete() intersection.name = self.name return intersection @@ -319,10 +329,11 @@ def _do_append(self, content): ids = codecs.open(content, 'r', 'UTF-8').read() except (TypeError, IOError): if hasattr(content, 'strip') and hasattr(content, 'encode'): - ids = content # probably a string. + ids = content # probably a string. else: try: - ids = "\n".join(map(lambda x: '"' + x + '"', iter(content))) + ids = "\n".join(map(lambda x: '"' + x + '"', + iter(content))) except TypeError: content = self._manager._get_listable_query(content) uri = content.get_list_append_uri() @@ -348,38 +359,51 @@ def append(self, appendix): """Append the arguments to this list""" try: return self._do_append(self._manager.union(appendix)) - except: + except (TypeError, IOError): return self._do_append(appendix) - def calculate_enrichment(self, widget, background = None, correction = "Holm-Bonferroni", maxp = 0.05, filter = ''): + def calculate_enrichment(self, + widget, + background=None, + correction="Holm-Bonferroni", + maxp=0.05, + filter=''): """ Perform an enrichment calculation on this list ============================================== example:: - >>> for item in service.get_list("some list").calculate_enrichment("thingy_enrichment"): + >>> for item in service.get_list("some list") + .calculate_enrichment("thingy_enrichment"): ... print item.identifier, item.p_value - Gets an iterator over the rows for an enrichment calculation. Each row represents - a record with the following properties: + Gets an iterator over the rows for an enrichment calculation. + Each row represents a record with the following properties: * identifier {str} * p-value {float} * matches {int} * description {str} - The enrichment row object may be treated as an object with property access, or as - a dictionary, supporting key lookup with the [] operator: + The enrichment row object may be treated as an object with property + access, or as a dictionary, supporting key lookup with the [] operator: >>> p_value = row['p-value'] """ if self._service.version < 8: - raise ServiceError("This service does not support enrichment requests") - params = dict(list = self.name, widget = widget, correction = correction, maxp = maxp, filter = filter) + raise ServiceError( + "This service does not support enrichment requests") + params = dict(list=self.name, + widget=widget, + correction=correction, + maxp=maxp, + filter=filter) if background is not None: if self._service.version < 11: - raise ServiceError("This service does not support custom background populations") + raise ServiceError( + "This service does not support custom background" + + "populations") params["population"] = background form = urlencode(params) uri = self._service.root + self._service.LIST_ENRICHMENT_PATH @@ -391,8 +415,11 @@ def __xor__(self, other): return self._manager.xor([self, other]) def __ixor__(self, other): - """Calculate the symmetric difference of this list and another and replace this list with the result""" - diff = self._manager.xor([self, other], description=self.description, tags=self.tags) + """Calculate the symmetric difference of this list and another and + replace this list with the result""" + diff = self._manager.xor([self, other], + description=self.description, + tags=self.tags) self.delete() diff.name = self.name return diff @@ -402,8 +429,12 @@ def __sub__(self, other): return self._manager.subtract([self], [other]) def __isub__(self, other): - """Replace this list with the subtraction of the other from this list""" - subtr = self._manager.subtract([self], [other], description=self.description, tags=self.tags) + """ + Replace this list with the subtraction of the other from thislist + """ + subtr = self._manager.subtract([self], [other], + description=self.description, + tags=self.tags) self.delete() subtr.name = self.name return subtr diff --git a/intermine/lists/listmanager.py b/intermine/lists/listmanager.py index 65c7c566..206db850 100644 --- a/intermine/lists/listmanager.py +++ b/intermine/lists/listmanager.py @@ -28,7 +28,6 @@ from urllib.parse import urlencode -import urllib import codecs from intermine.errors import WebserviceError @@ -47,7 +46,6 @@ def safe_key(maybe_unicode): class ListManager(object): - """ A Class for Managing List Content and Operations ================================================ @@ -92,13 +90,14 @@ def refresh_lists(self): self.LOG.debug('LIST INFO: {0}'.format(list_info)) if not list_info.get('wasSuccessful'): raise ListServiceError(list_info.get('error')) - for l in list_info['lists']: + for item in list_info['lists']: # Workaround for python 2.6 unicode key issues - l = ListManager.safe_dict(l) - self.lists[l['name']] = List(service=self.service, - manager=self, **l) + item = ListManager.safe_dict(item) + self.lists[item['name']] = List(service=self.service, + manager=self, + **item) @staticmethod def safe_dict(d): @@ -116,7 +115,7 @@ def get_list(self, name): self.refresh_lists() return self.lists.get(name) - def l(self, name): + def ls(self, name): """Alias for get_list""" return self.get_list(name) @@ -182,11 +181,11 @@ def _get_listable_query(self, queryable): return q def _create_list_from_queryable( - self, - queryable, - name, - description, - tags, + self, + queryable, + name, + description, + tags, ): q = self._get_listable_query(queryable) @@ -202,14 +201,14 @@ def _create_list_from_queryable( return self.parse_list_upload_response(data) def create_list( - self, - content, - list_type='', - name=None, - description=None, - tags=[], - add=[], - organism=None, + self, + content, + list_type='', + name=None, + description=None, + tags=[], + add=[], + organism=None, ): """ Create a new list in the webservice @@ -299,8 +298,8 @@ def create_list( ids = item_content.read() # File like thing except AttributeError: try: - with closing(codecs.open(item_content, 'r', 'UTF-8' - )) as c: # File name + with closing(codecs.open(item_content, 'r', + 'UTF-8')) as c: # File name ids = c.read() except (TypeError, IOError): try: @@ -308,16 +307,14 @@ def create_list( except AttributeError: try: # Queryable return self._create_list_from_queryable( - item_content, name, - description, tags) + item_content, name, description, tags) except AttributeError: try: # Array of idents idents = iter(item_content) - ids = '\n'.join( - map('"{0}"'.format, idents)) + ids = '\n'.join(map('"{0}"'.format, idents)) except AttributeError: - raise TypeError('Cannot create list from ' - + repr(item_content)) + raise TypeError('Cannot create list from ' + + repr(item_content)) uri = self.service.root + self.service.LIST_CREATION_PATH query_form = { @@ -342,8 +339,7 @@ def parse_list_upload_response(self, response): try: response_data = json.loads(response.decode('utf8')) except ValueError: - raise ListServiceError('Error parsing response: ' - + response) + raise ListServiceError('Error parsing response: ' + response) if not response_data.get('wasSuccessful'): raise ListServiceError(response_data.get('error')) @@ -366,8 +362,7 @@ def delete_lists(self, lists): else: name = str(l) if name not in all_names: - self.LOG.debug( - '{0} does not exist - skipping'.format(name)) + self.LOG.debug('{0} does not exist - skipping'.format(name)) continue self.LOG.debug('deleting {0}'.format(name)) uri = self.service.root + self.service.LIST_PATH @@ -437,10 +432,10 @@ def __enter__(self): return self def __exit__( - self, - exc_type, - exc_val, - traceback, + self, + exc_type, + exc_val, + traceback, ): self.LOG.debug('Exiting context - deleting {0}'.format( @@ -457,11 +452,11 @@ def delete_temporary_lists(self): self._temp_lists = set() def intersect( - self, - lists, - name=None, - description=None, - tags=[], + self, + lists, + name=None, + description=None, + tags=[], ): """ Calculate the intersection of a given set of lists, and return the @@ -478,11 +473,11 @@ def intersect( ) def union( - self, - lists, - name=None, - description=None, - tags=[], + self, + lists, + name=None, + description=None, + tags=[], ): """ Calculate the union of a given set of lists, @@ -499,11 +494,11 @@ def union( ) def xor( - self, - lists, - name=None, - description=None, - tags=[], + self, + lists, + name=None, + description=None, + tags=[], ): """ Calculate the symmetric difference of a given set of lists, @@ -520,12 +515,12 @@ def xor( ) def subtract( - self, - lefts, - rights, - name=None, - description=None, - tags=[], + self, + lefts, + rights, + name=None, + description=None, + tags=[], ): """ Calculate the subtraction of rights from lefts, @@ -553,13 +548,13 @@ def subtract( return self.parse_list_upload_response(data) def _do_operation( - self, - path, - operation, - lists, - name, - description, - tags, + self, + path, + operation, + lists, + name, + description, + tags, ): list_names = self.make_list_names(lists) @@ -586,11 +581,9 @@ def make_list_names(self, lists): list_names = [] for l in lists: try: - t = l.list_type list_names.append(l.name) except AttributeError: try: - m = l.model list_names.append(self.create_list(l).name) except AttributeError: list_names.append(str(l)) @@ -599,7 +592,6 @@ def make_list_names(self, lists): class ListServiceError(WebserviceError): - """Errors thrown when something goes wrong with list requests""" pass diff --git a/intermine/model.py b/intermine/model.py index c49f5efc..c907af34 100644 --- a/intermine/model.py +++ b/intermine/model.py @@ -11,7 +11,6 @@ pass logging.basicConfig() - """ Classes representing the data model =================================== @@ -26,6 +25,7 @@ __license__ = "LGPL" __contact__ = "dev@intermine.org" + class Field(object): """ A class representing columns on database tables @@ -47,20 +47,28 @@ class Field(object): - CDSs is a group of CDS objects, which link back to this as gene - GLEANRsymbol is a String - UTRs is a group of UTR objects, which link back to this as gene - - alleles is a group of Allele objects, which link back to this as gene + - alleles is a group of Allele objects, + which link back to this as gene - chromosome is a Chromosome - chromosomeLocation is a Location - - clones is a group of CDNAClone objects, which link back to this as gene - - crossReferences is a group of CrossReference objects, which link back to this as subject + - clones is a group of CDNAClone objects, + which link back to this as gene + - crossReferences is a group of CrossReference objects, + which link back to this as subject - cytoLocation is a String - - dataSets is a group of DataSet objects, which link back to this as bioEntities + - dataSets is a group of DataSet objects, + which link back to this as bioEntities - downstreamIntergenicRegion is a IntergenicRegion - - exons is a group of Exon objects, which link back to this as gene - - flankingRegions is a group of GeneFlankingRegion objects, which link back to this as gene + - exons is a group of Exon objects, + which link back to this as gene + - flankingRegions is a group of GeneFlankingRegion objects, + which link back to this as gene - goAnnotation is a group of GOAnnotation objects - - homologues is a group of Homologue objects, which link back to this as gene + - homologues is a group of Homologue objects, + which link back to this as gene - id is a Integer - - interactions is a group of Interaction objects, which link back to this as gene + - interactions is a group of Interaction objects, + which link back to this as gene - length is a Integer ... @@ -99,6 +107,7 @@ def __str__(self): def fieldtype(self): raise Exception("Fields should never be directly instantiated") + class Attribute(Field): """ Attributes represent columns that contain actual data @@ -106,11 +115,11 @@ class Attribute(Field): The Attribute class inherits all the behaviour of L{intermine.model.Field} """ - @property def fieldtype(self): return "attribute" + class Reference(Field): """ References represent columns that refer to records in other tables @@ -138,6 +147,7 @@ def __init__(self, name, type_name, class_origin, reverse_ref=None): self.reverse_reference_name = reverse_ref super(Reference, self).__init__(name, type_name, class_origin) self.reverse_reference = None + def __repr__(self): """ Return a string representation @@ -149,12 +159,14 @@ def __repr__(self): if self.reverse_reference is None: return s else: - return s + ", which links back to this as " + self.reverse_reference.name + return s + ", which links back to this as " + \ + self.reverse_reference.name @property def fieldtype(self): return "reference" + class Collection(Reference): """ Collections are references which refer to groups of objects @@ -164,7 +176,8 @@ class Collection(Reference): """ def __repr__(self): """Return a string representation""" - ret = super(Collection, self).__repr__().replace(" is a ", " is a group of ") + ret = super(Collection, + self).__repr__().replace(" is a ", " is a group of ") if self.reverse_reference is None: return ret + " objects" else: @@ -174,6 +187,7 @@ def __repr__(self): def fieldtype(self): return "collection" + class Class(object): """ An abstraction of database tables in the data model @@ -205,9 +219,7 @@ class Class(object): as part of the model they belong to. """ - - - def __init__(self, name, parents, model, interface = True): + def __init__(self, name, parents, model, interface=True): """ Constructor - Creates a new Class descriptor ============================================ @@ -235,8 +247,9 @@ def __init__(self, name, parents, model, interface = True): self.field_dict["id"] = id_field def __repr__(self): - return "<%s.%s %s.%s>" % (self.__module__, self.__class__.__name__, - self.model.package_name if hasattr(self.model, 'package_name') else "__test__", self.name) + return "<%s.%s %s.%s>" % ( + self.__module__, self.__class__.__name__, self.model.package_name + if hasattr(self.model, 'package_name') else "__test__", self.name) @property def fields(self): @@ -249,7 +262,8 @@ def fields(self): @rtype: list(L{Field}) """ - return sorted(list(self.field_dict.values()), key=lambda field: field.name) + return sorted(list(self.field_dict.values()), + key=lambda field: field.name) def __iter__(self): for f in list(self.field_dict.values()): @@ -279,7 +293,9 @@ def references(self): @rtype: list(L{Reference}) """ - def isRef(x): return isinstance(x, Reference) and not isinstance(x, Collection) + def isRef(x): + return isinstance(x, Reference) and not isinstance(x, Collection) + return list(filter(isRef, self.fields)) @property @@ -306,7 +322,8 @@ def get_field(self, name): if name in self.field_dict: return self.field_dict[name] else: - raise ModelError("There is no field called %s in %s" % (name, self.name)) + raise ModelError("There is no field called %s in %s" % + (name, self.name)) def isa(self, other): """ @@ -334,6 +351,7 @@ def isa(self, other): return True return False + class ComposedClass(Class): """ An abstraction of dynamic objects that are in two classes @@ -341,7 +359,6 @@ class ComposedClass(Class): These objects are structural unions of two or more different data-types. """ - def __init__(self, parts, model): self.is_interface = True self.parts = parts @@ -370,11 +387,12 @@ def field_dict(self): fields.update(p.field_dict) return fields - @property - def parent_classes(self): - """The flattened list of parent classes, with the parts""" - all_parents = [pc for pc in p.parent_classes for p in self.parts] - return all_parents + self.parts + +# @property +# def parent_classes(self): +# """The flattened list of parent classes, with the parts""" +# all_parents = [pc for pc in p.parent_classes for p in self.parts] +# return all_parents + self.parts class Path(object): @@ -422,7 +440,8 @@ def __init__(self, path, model, subclasses={}): @type path: str @param model: the model to validate the path against @type model: L{Model} - @param subclasses: a dict which maps subclasses (defaults to an empty dict) + @param subclasses: a dict which maps subclasses + (defaults to an empty dict) @type subclasses: dict """ self.model = weakref.proxy(model) @@ -438,7 +457,8 @@ def __str__(self): return self._string def __repr__(self): - return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + self._string + '>' + return '<' + self.__module__ + "." + self.__class__.__name__ + ": " + \ + self._string + '>' def prefix(self): """ @@ -476,7 +496,8 @@ def append(self, *elements): @property def root(self): """ - The descriptor for the first part of the string. This should always a class descriptor. + The descriptor for the first part of the string. This should always a + class descriptor. @rtype: L{intermine.model.Class} """ @@ -511,7 +532,8 @@ def get_class(self): def is_reference(self): """ - Return true if the path is a reference, eg: Gene.organism or Gene.proteins + Return true if the path is a reference, eg: Gene.organism or + Gene.proteins Note: Collections are ALSO references @rtype: boolean @@ -539,10 +561,12 @@ def __eq__(self, other): def __hash__(self): i = hash(str(self)) - return reduce(lambda a, b: a ^ b, [hash(k) ^ hash(v) for k, v in list(self.subclasses.items())], i) + return reduce( + lambda a, b: a ^ b, + [hash(k) ^ hash(v) for k, v in list(self.subclasses.items())], i) -class ConstraintTree(object): +class ConstraintTree(object): def __init__(self, op, left, right): self.op = op self.left = left @@ -559,13 +583,14 @@ def __iter__(self): for subn in n: yield subn - def as_logic(self, codes = None, start = 'A'): + def as_logic(self, codes=None, start='A'): if codes is None: codes = (chr(c) for c in range(ord(start), ord('Z'))) - return "(%s %s %s)" % (self.left.as_logic(codes), self.op, self.right.as_logic(codes)) + return "(%s %s %s)" % (self.left.as_logic(codes), self.op, + self.right.as_logic(codes)) -class ConstraintNode(ConstraintTree): +class ConstraintNode(ConstraintTree): def __init__(self, *args, **kwargs): self.vargs = args self.kwargs = kwargs @@ -573,16 +598,17 @@ def __init__(self, *args, **kwargs): def __iter__(self): yield self - def as_logic(self, codes = None, start = 'A'): + def as_logic(self, codes=None, start='A'): if codes is None: codes = (chr(c) for c in range(ord(start), ord('Z'))) return next(codes) -class CodelessNode(ConstraintNode): - def as_logic(self, code = None, start = 'A'): +class CodelessNode(ConstraintNode): + def as_logic(self, code=None, start='A'): return '' + class Column(object): """ A representation of a path in a query that can be constrained @@ -591,13 +617,12 @@ class Column(object): Column objects allow constraints to be constructed in something close to a declarative style """ - - def __init__(self, path, model, subclasses={}, query=None, parent = None): + def __init__(self, path, model, subclasses={}, query=None, parent=None): self._model = model self._query = query self._subclasses = subclasses self._parent = parent - self.filter = self.where # alias + self.filter = self.where # alias if isinstance(path, Path): self._path = path else: @@ -606,7 +631,8 @@ def __init__(self, path, model, subclasses={}, query=None, parent = None): def select(self, *cols): """ - Create a new query with this column as the base class, selecting the given fields. + Create a new query with this column as the base class, + selecting the given fields. If no fields are given, then just this column will be selected. """ @@ -619,7 +645,8 @@ def select(self, *cols): def where(self, *args, **kwargs): """ - Create a new query based on this column, filtered with the given constraint. + Create a new query based on this column, filtered with the given + constraint. also available as "filter" """ @@ -636,8 +663,9 @@ def __iter__(self): """ Iterate over the things this column represents. - In the case of an attribute column, that is the values it may have. In the case - of a reference or class column, it is the objects that this path may refer to. + In the case of an attribute column, that is the values it may have. + In the case of a reference or class column, it is the objects that this + path may refer to. """ q = self.select() if self._path.is_attribute(): @@ -653,8 +681,9 @@ def __getattr__(self, name): cld = self._path.get_class() if cld is not None: try: - fld = cld.get_field(name) - branch = Column(str(self) + "." + name, self._model, self._subclasses, self._query, self) + branch = Column( + str(self) + "." + name, self._model, self._subclasses, + self._query, self) self._branches[name] = branch return branch except ModelError as e: @@ -737,6 +766,7 @@ def __gt__(self, other): def __ge__(self, other): return ConstraintNode(str(self), ">=", other) + class Model(object): """ A class for representing the data model of an InterMine datawarehouse @@ -760,7 +790,10 @@ class Model(object): data is available and how it is inter-related """ - NUMERIC_TYPES = frozenset(["int", "Integer", "float", "Float", "double", "Double", "long", "Long", "short", "Short"]) + NUMERIC_TYPES = frozenset([ + "int", "Integer", "float", "Float", "double", "Double", "long", "Long", + "short", "Short" + ]) LOG = logging.getLogger('Model') @@ -786,7 +819,7 @@ def __init__(self, source, service=None): else: self.service = None - self.classes= {} + self.classes = {} self.parse_model(source) self.vivify() @@ -795,8 +828,9 @@ def __init__(self, source, service=None): def parse_model(self, source): """ - Create classes, attributes, references and collections from the model.xml - ========================================================================= + Create classes, attributes, references and collections from the + model.xml + ======================================================================= The xml can be provided as a file, url or string. This method is called during instantiation - it does not need to be called @@ -808,7 +842,8 @@ def parse_model(self, source): try: io = openAnything(source) src = io.read() - if hasattr(src, 'decode'): # Handle binary and text streams equally. + # Handle binary and text streams equally. + if hasattr(src, 'decode'): src = src.decode('utf8') self.LOG.debug("model = [{0}]".format(src)) doc = minidom.parseString(src) @@ -816,14 +851,20 @@ def parse_model(self, source): self.name = node.getAttribute('name') self.package_name = node.getAttribute('package') assert node.nextSibling is None, "More than one model element" - assert self.name and self.package_name, "No model name or package name" + assert self.name and self.package_name, "No model name or" + \ + " package name" for c in doc.getElementsByTagName('class'): class_name = c.getAttribute('name') assert class_name, "Name not defined in" + c.toxml() + def strip_java_prefix(x): return re.sub(r'.*\.', '', x) - parents = [strip_java_prefix(p) for p in c.getAttribute('extends').split(' ') if len(p)] + + parents = [ + strip_java_prefix(p) + for p in c.getAttribute('extends').split(' ') if len(p) + ] interface = c.getAttribute('is-interface') == 'true' cl = Class(class_name, parents, self, interface) self.LOG.debug('Created {0}'.format(cl.name)) @@ -873,7 +914,8 @@ def vivify(self): c.field_dict.update(pc.field_dict) for f in c.fields: f.type_class = self.classes.get(f.type_name) - if hasattr(f, 'reverse_reference_name') and f.reverse_reference_name != '': + if hasattr(f, 'reverse_reference_name' + ) and f.reverse_reference_name != '': rrn = f.reverse_reference_name f.reverse_reference = f.type_class.field_dict[rrn] @@ -890,8 +932,13 @@ def to_ancestry(self, cd): """ parents = cd.parents self.LOG.debug('{0} < {1}'.format(cd.name, cd.parents)) - def defined(x): return x is not None # weeds out the java classes - def to_class(x): return self.classes.get(x) + + def defined(x): + return x is not None # weeds out the java classes + + def to_class(x): + return self.classes.get(x) + ancestry = list(filter(defined, list(map(to_class, parents)))) for ancestor in ancestry: self.LOG.debug('{0} is ancestor of {1}'.format(ancestor, cd.name)) @@ -908,7 +955,8 @@ def to_classes(self, classnames): This simply maps from a list of strings to a list of classes in the calling model. - @raise ModelError: if the list of class names includes ones that don't exist + @raise ModelError: if the list of class names includes ones that don't + exist @rtype: list(L{intermine.model.Class}) """ @@ -951,9 +999,9 @@ def get_class(self, name): else: return path.get_class() elif name in self.classes: - return self.classes[name] + return self.classes[name] else: - raise ModelError("'" + name + "' is not a class in this model") + raise ModelError("'" + name + "' is not a class in this model") def make_path(self, path, subclasses={}): """ @@ -998,8 +1046,9 @@ def validate_path(self, path_string, subclasses={}): self.parse_path_string(path_string, subclasses) return True except PathParseError as e: - raise PathParseError("Error parsing '%s' (subclasses: %s)" - % ( path_string, str(subclasses) ), e ) + raise PathParseError( + "Error parsing '%s' (subclasses: %s)" % + (path_string, str(subclasses)), e) def parse_path_string(self, path_string, subclasses={}): """ @@ -1046,14 +1095,16 @@ def parse_path_string(self, path_string, subclasses={}): def _unproxied(self): return self + class ModelError(ReadableException): pass + class PathParseError(ModelError): pass -class ModelParseError(ModelError): +class ModelParseError(ModelError): def __init__(self, message, source, cause=None): self.source = source super(ModelParseError, self).__init__(message, cause) @@ -1064,4 +1115,3 @@ def __str__(self): return base else: return base + repr(self.cause) - diff --git a/intermine/pathfeatures.py b/intermine/pathfeatures.py index c07a168f..1b2e9153 100644 --- a/intermine/pathfeatures.py +++ b/intermine/pathfeatures.py @@ -1,78 +1,89 @@ import re -PATTERN_STR = "^(?:\w+\.)*\w+$" +PATTERN_STR = r'^(?:\w+\.)*\w+$' PATH_PATTERN = re.compile(PATTERN_STR) + class PathFeature(object): def __init__(self, path): if path is None: raise ValueError("path must not be None") - try: - path = path.name - except: - pass + path = path.name if not PATH_PATTERN.match(path): - raise TypeError( - "Path '" + path + "' does not match expected pattern" + PATTERN_STR) + raise TypeError("Path '" + path + + "' does not match expected pattern" + PATTERN_STR) self.path = path + def __repr__(self): return "<" + self.__class__.__name__ + ": " + self.to_string() + ">" + def to_string(self): return str(self.path) + def to_dict(self): - return { 'path' : self.path } + return {'path': self.path} + @property def child_type(self): raise AttributeError() + class Join(PathFeature): valid_join_styles = ['OUTER', 'INNER'] INNER = "INNER" OUTER = "OUTER" child_type = 'join' + def __init__(self, path, style='OUTER'): if style.upper() not in Join.valid_join_styles: raise TypeError("Unknown join style: " + style) self.style = style.upper() super(Join, self).__init__(path) + def to_dict(self): d = super(Join, self).to_dict() d.update(style=self.style) return d + def __repr__(self): - return('<' + self.__class__.__name__ - + ' '.join([':', self.path, self.style]) + '>') + return ('<' + self.__class__.__name__ + + ' '.join([':', self.path, self.style]) + '>') + class PathDescription(PathFeature): child_type = 'pathDescription' + def __init__(self, path, description): self.description = description super(PathDescription, self).__init__(path) + def to_dict(self): d = super(PathDescription, self).to_dict() d.update(description=self.description) return d + class SortOrder(PathFeature): ASC = "asc" DESC = "desc" DIRECTIONS = frozenset(["asc", "desc"]) + def __init__(self, path, order): - try: - order = order.lower() - except: - pass - - if not order in self.DIRECTIONS: - raise TypeError("Order must be one of " + str(self.DIRECTIONS) - + " - not " + order) + order = order.lower() + + if order not in self.DIRECTIONS: + raise TypeError("Order must be one of " + str(self.DIRECTIONS) + + " - not " + order) self.order = order super(SortOrder, self).__init__(path) + def __str__(self): return self.path + " " + self.order + def to_string(self): return str(self) + class SortOrderList(object): """ A container implementation for holding sort orders @@ -85,6 +96,7 @@ class SortOrderList(object): def __init__(self, *sos): self.sort_orders = [] self.append(*sos) + def append(self, *sos): """ Add sort order elements to the sort order list. @@ -100,23 +112,30 @@ def append(self, *sos): self.sort_orders.append(SortOrder(*so)) else: raise TypeError( - "Sort orders must be either SortOrder instances," - + " or tuples of arguments: I got:" + so + sos) + "Sort orders must be either SortOrder instances," + + " or tuples of arguments: I got:" + so + sos) + def __repr__(self): return '<' + self.__class__.__name__ + ': [' + str(self) + ']>' + def __str__(self): return " ".join(map(str, self.sort_orders)) + def clear(self): self.sort_orders = [] + def is_empty(self): return len(self.sort_orders) == 0 + def __len__(self): return len(self.sort_orders) + def __next__(self): """2.x to 3.x bridge""" return self.next() + def next(self): return next(self.sort_orders) + def __iter__(self): return iter(self.sort_orders) - diff --git a/intermine/query.py b/intermine/query.py index 279a83f7..d7193a19 100644 --- a/intermine/query.py +++ b/intermine/query.py @@ -3,7 +3,8 @@ from xml.dom import minidom, getDOMImplementation from intermine.util import openAnything, ReadableException -from intermine.pathfeatures import PathDescription, Join, SortOrder, SortOrderList +from intermine.pathfeatures import PathDescription, Join, SortOrder, \ + SortOrderList from intermine.model import Column, Class, Model, Reference, ConstraintNode import intermine.constraints as constraints @@ -12,7 +13,6 @@ from functools import reduce except ImportError: pass - """ Classes representing queries against webservices ================================================ @@ -29,6 +29,7 @@ LOGIC_OPS = ["and", "or"] LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS] + class Query(object): """ A Class representing a structured database query @@ -46,7 +47,8 @@ class Query(object): >>> service = Service("http://www.flymine.org/query/service") >>> query = service.new_query() >>> - >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol") + >>> query.add_view("Gene.symbol", "Gene.pathways.name", + "Gene.proteins.symbol") >>> query.add_sort_order("Gene.pathways.name") >>> >>> query.add_constraint("Gene", "LOOKUP", "eve") @@ -70,24 +72,27 @@ class Query(object): OR, for a more SQL-alchemy, ORM style: - >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles): + >>> for gene in s.query(s.model.Gene) + .filter(s.model.Gene.symbol == ["zen", "H", "eve"]). + add_columns(s.model.Gene.alleles): ... handle(gene) - Query objects represent structured requests for information over the database - housed at the datawarehouse whose webservice you are querying. They utilise - some of the concepts of relational databases, within an object-related - ORM context. If you don't know what that means, don't worry: you - don't need to write SQL, and the queries will be fast. + Query objects represent structured requests for information over the + database housed at the datawarehouse whose webservice you are querying. + They utilise some of the concepts of relational databases, within an + object-related ORM context. If you don't know what that means, + don't worry: you don't need to write SQL, and the queries will be fast. - To make things slightly more familiar to those with knowledge of SQL, some syntactical - sugar is provided to make constructing queries a bit more recognisable. + To make things slightly more familiar to those with knowledge of SQL, + some syntactical sugar is provided to make constructing queries a bit + more recognisable. PRINCIPLES ---------- The data model represents tables in the databases as classes, with records - within tables as instances of that class. The columns of the database are the - fields of that object:: + within tables as instances of that class. The columns of the database are + the fields of that object:: The Gene table - showing two records/objects +---------------------------------------------------+ @@ -106,13 +111,15 @@ class Query(object): | 01 | D. melanogaster | 7227 | +----------------------------------+ - Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is - everything except the id columns). The other columns (such as "organism" in the gene table) - are ones that reference records of other tables (ie. other objects), and are called - references. You can refer to any field in any class, that has a connection, - however tenuous, with a table, by using dotted path notation:: + Columns that contain a meaningful value are known as 'attributes' (in the + tables above, that is everything except the id columns). The other + columns (such as "organism" in the gene table) are ones that reference + records of other tables (ie. other objects), and are called references. + You can refer to any field in any class, that has a connection, however + tenuous, with a table, by using dotted path notation:: - Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table + Gene.organism.name -> the name column in the organism table, + referenced by a record in the gene table These paths, and the connections between records and tables they represent, are the basis for the structure of InterMine queries. @@ -131,7 +138,8 @@ class Query(object): In addition, the query must be coherent: if you have information about an organism, and you want a list of genes, then the "Gene" table should be the basis for your query, and as such the Gene class, which - represents this table, should be the root of all the paths that appear in it: + represents this table, should be the root of all the paths that appear in + it: So, to take a simple example:: @@ -143,23 +151,26 @@ class Query(object): >>> query.add_view("Gene.length") >>> query.add_view("Gene.proteins.sequence.length") - Note I can freely mix attributes and references, as long as every view ends in - an attribute (a meaningful value). As a short-cut I can also write: + Note I can freely mix attributes and references, as long as every view ends + in an attribute (a meaningful value). As a short-cut I can also write: - >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length") + >>> query.add_views("Gene.name", "Gene.length", + "Gene.proteins.sequence.length") or: - >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length") + >>> query.add_views("Gene.name Gene.length + Gene.proteins.sequence.length") - They are all equivalent. You can also use common SQL style shortcuts such as "*" for all - attribute fields: + They are all equivalent. You can also use common SQL style shortcuts such + as "*" for all attribute fields: >>> query.add_views("Gene.*") You can also use "select" as a synonymn for "add_view" - Now I can add my constraints. As, we mentioned, I have information about an organism, so: + Now I can add my constraints. As, we mentioned, I have information about + an organism, so: >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster") @@ -176,33 +187,38 @@ class Query(object): Now I am guaranteed to get only information on genes I am interested in. - Note, though, that because I have included the link (or "join") from Gene -> Protein, - this, by default, means that I only want genes that have protein information associated - with them. If in fact I want information on all genes, and just want to know the - protein information if it is available, then I can specify that with: + Note, though, that because I have included the link (or "join") from + Gene -> Protein, this, by default, means that I only want genes that have + protein information associated with them. If in fact I want information on + all genes, and just want to know the protein information if it is + available, then I can specify that with: >>> query.add_join("Gene.proteins", "OUTER") - And if perhaps my query is not as simple as a strict cumulative filter, but I want all - D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes - (as unlikely as that sounds), I can specify the logic for that too: + And if perhaps my query is not as simple as a strict cumulative filter, + but I want all D. mel genes that EITHER have a short protein sequence OR + come from one of my favourite genes (as unlikely as that sounds), + I can specify the logic for that too: >>> query.set_logic("A and (B or C)") - Each letter refers to one of the constraints - the codes are assigned in the order you add - the constraints. If you want to be absolutely certain about the constraints you mean, you - can use the constraint objects themselves: + Each letter refers to one of the constraints - the codes are assigned in + the order you add the constraints. If you want to be absolutely certain + about the constraints you mean, you can use the constraint objects + themselves: >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve") >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne") >>> >>> query.set_logic(gene_is_eve | gene_is_zen) - By default the logic is a straight cumulative filter (ie: A and B and C and D and ...) + By default the logic is a straight cumulative filter (ie: A and B and C + and D and ...) Putting it all together: - >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length") + >>> query.add_view("Gene.name", "Gene.length", + "Gene.proteins.sequence.length") >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster") >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500) >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"]) @@ -224,8 +240,9 @@ class Query(object): Result Processing: Rows ----------------------- - calling ".rows()" on a query will return an iterator of rows, where each row - is a ResultRow object, which can be treated as both a list and a dictionary. + calling ".rows()" on a query will return an iterator of rows, where each + row is a ResultRow object, which can be treated as both a list and a + dictionary. Which means you can refer to columns by name: @@ -244,7 +261,8 @@ class Query(object): ... for column in row: ... do_something(column) - Here each row will have a gene name, a gene length, and a sequence length, eg: + Here each row will have a gene name, a gene length, and a sequence length, + eg: >>> print row.to_l ["even skipped", "1359", "376"] @@ -253,11 +271,13 @@ class Query(object): >>> for row in query.rows() ... print row.to_d - {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"} + {"Gene.name":"even skipped","Gene.length":"1359", + "Gene.proteins.sequence.length":"376"} - If you just want the raw results, for printing to a file, or for piping to another program, - you can request the results in one of these formats: json', 'rr', 'tsv', 'jsonobjects', 'jsonrows', 'list', 'dict', 'csv' + If you just want the raw results, for printing to a file, or for piping + to another program, you can request the results in one of these formats: + json', 'rr', 'tsv', 'jsonobjects', 'jsonrows', 'list', 'dict', 'csv' >>> for row in query.result("", size = ) ... print(row) @@ -266,35 +286,40 @@ class Query(object): Result Processing: Results -------------------------- - Results can also be processing on a record by record basis. If you have a query that - has output columns of "Gene.symbol", "Gene.pathways.name" and "Gene.proteins.proteinDomains.primaryIdentifier", - than processing it by records will return one object per gene, and that gene will have a property - named "pathways" which contains objects which have a name property. Likewise there will be a - proteins property which holds a list of proteinDomains which all have a primaryIdentifier property, and so on. - This allows a more object orientated approach to database records, familiar to users of - other ORMs. + Results can also be processing on a record by record basis. If you have a + query that has output columns of "Gene.symbol", "Gene.pathways.name" and + "Gene.proteins.proteinDomains.primaryIdentifier", than processing it by + records will return one object per gene, and that gene will have a property + named "pathways" which contains objects which have a name property. + Likewise there will be a proteins property which holds a list of + proteinDomains which all have a primaryIdentifier property, and so on. + This allows a more object orientated approach to database records, + familiar to users of other ORMs. - This is the format used when you choose to iterate over a query directly, or can be explicitly - chosen by invoking L{intermine.query.Query.results}: + This is the format used when you choose to iterate over a query directly, + or can be explicitly chosen by invoking L{intermine.query.Query.results}: >>> for gene in query: ... print gene.name, map(lambda x: x.name, gene.pathways) - The structure of the object and the information it contains depends entirely - on the output columns selected. The values may be None, of course, but also any valid values of an object - (according to the data model) will also be None if they were not selected for output. Attempts - to access invalid properties (such as gene.favourite_colour) will cause exceptions to be thrown. + The structure of the object and the information it contains depends + entirely on the output columns selected. The values may be None, + of course, but also any valid values of an object (according to the + data model) will also be None if they were not selected for output. + Attempts to access invalid properties (such as gene.favourite_colour) + will cause exceptions to be thrown. Getting us to Generate your Code -------------------------------- Not that you have to actually write any of this! The webapp will happily - generate the code for any query (and template) you can build in it. A good way to get - started is to use the webapp to generate your code, and then run it as scripts - to speed up your queries. You can always tinker with and edit the scripts you download. + generate the code for any query (and template) you can build in it. A good + way to get started is to use the webapp to generate your code, and then run + it as scripts to speed up your queries. You can always tinker with and + edit the scripts you download. - To get generated queries, look for the "python" link at the bottom of query-builder and - template form pages, it looks a bit like this:: + To get generated queries, look for the "python" link at the bottom of + query-builder and template form pages, it looks a bit like this:: . +=====================================+============= | | @@ -304,11 +329,12 @@ class Query(object): """ - SO_SPLIT_PATTERN = re.compile("\s*(asc|desc)\s*", re.I) - LOGIC_SPLIT_PATTERN = re.compile("\s*(?:and|or|\(|\))\s*", re.I) - TRAILING_OP_PATTERN = re.compile("\s*(and|or)\s*$", re.I) - LEADING_OP_PATTERN = re.compile("^\s*(and|or)\s*", re.I) - ORPHANED_OP_PATTERN = re.compile("(?:\(\s*(?:and|or)\s*|\s*(?:and|or)\s*\))", re.I) + SO_SPLIT_PATTERN = re.compile(r'\s*(asc|desc)\s*', re.I) + LOGIC_SPLIT_PATTERN = re.compile(r'\s*(?:and|or|\(|\))\s*', re.I) + TRAILING_OP_PATTERN = re.compile(r'\s*(and|or)\s*$', re.I) + LEADING_OP_PATTERN = re.compile(r'^\s*(and|or)\s*', re.I) + ORPHANED_OP_PATTERN = re.compile( + r'(?:\(\s*(?:and|or)\s*|\s*(?:and|or)\s*\))', re.I) def __init__(self, model, service=None, validate=True, root=None): """ @@ -326,8 +352,9 @@ def __init__(self, model, service=None, validate=True, root=None): @param model: an instance of L{intermine.model.Model}. Required @param service: an instance of l{intermine.service.Service}. Optional, but you will not be able to make requests without one. - @param validate: a boolean - defaults to True. If set to false, the query - will not try and validate itself. You should not set this to false. + @param validate: a boolean - defaults to True. If set to false, the + query will not try and validate itself. You should not set this + to false. """ self.model = model @@ -339,8 +366,10 @@ def __init__(self, model, service=None, validate=True, root=None): self.name = '' self.description = '' self.service = service - self.prefetch_depth = service.prefetch_depth if service is not None else 1 - self.prefetch_id_only = service.prefetch_id_only if service is not None else False + self.prefetch_depth = service.prefetch_depth \ + if service is not None else 1 + self.prefetch_id_only = service.prefetch_id_only \ + if service is not None else False self.do_verification = validate self.path_descriptions = [] self.joins = [] @@ -373,7 +402,8 @@ def __len__(self): return self.count() def __sub__(self, other): - """Construct a new list from the symmetric difference of these things""" + """Construct a new list from the symmetric difference of these things + """ return self.service._list_manager.subtract([self], [other]) def __xor__(self, other): @@ -425,8 +455,9 @@ def from_xml(cls, xml, *args, **kwargs): queries = doc.getElementsByTagName('query') if len(queries) != 1: - raise QueryParseError("wrong number of queries in xml. " - + "Only one element is allowed. Found %d" % len(queries)) + raise QueryParseError( + "wrong number of queries in xml. " + + "Only one element is allowed. Found %d" % len(queries)) q = queries[0] obj.name = q.getAttribute('name') obj.description = q.getAttribute('longDescription') @@ -459,18 +490,19 @@ def from_xml(cls, xml, *args, **kwargs): for val_e in c.getElementsByTagName('value'): texts = [] for node in val_e.childNodes: - if node.nodeType == node.TEXT_NODE: texts.append(node.data) + if node.nodeType == node.TEXT_NODE: + texts.append(node.data) values.append(' '.join(texts)) - if len(values) > 0: args["values"] = values - args = dict((k, v) for k, v in list(args.items()) if v is not None and v != '') + if len(values) > 0: + args["values"] = values + args = dict((k, v) for k, v in list(args.items()) + if v is not None and v != '') if "loopPath" in args: - args["op"] = { - "=" : "IS", - "!=": "IS NOT" - }.get(args["op"]) + args["op"] = {"=": "IS", "!=": "IS NOT"}.get(args["op"]) con = obj.add_constraint(**args) if not con: - raise ConstraintError("error adding constraint with args: " + args) + raise ConstraintError("error adding constraint with args: " + + args) def group(iterator, count): itr = iter(iterator) @@ -483,12 +515,13 @@ def group(iterator, count): if q.getAttribute('sortOrder') is not None: sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder')) if len(sos) == 1: - if sos[0] in obj.views: # Be tolerant of irrelevant sort-orders + if sos[0] in obj.views: # Be tolerant of irrelevant + # sort-orders obj.add_sort_order(sos[0]) else: - sos.pop() # Get rid of empty string at end + sos.pop() # Get rid of empty string at end for path, direction in group(sos, 2): - if path in obj.views: # Be tolerant of irrelevant so. + if path in obj.views: # Be tolerant of irrelevant so. obj.add_sort_order(path, direction) if q.getAttribute('constraintLogic') is not None: @@ -510,19 +543,20 @@ def _set_questionable_logic(self, questionable_logic): pattern = re.compile("\\b" + c + "\\b", re.I) logic = pattern.sub("", logic) # Remove empty groups - logic = re.sub("\((:?and|or|\s)*\)", "", logic) + logic = re.sub(r'\((:?and|or|\s)*\)', "", logic) # Remove trailing and leading operators logic = Query.LEADING_OP_PATTERN.sub("", logic) logic = Query.TRAILING_OP_PATTERN.sub("", logic) - for x in range(2): # repeat, as this process can leave doubles + for x in range(2): # repeat, as this process can leave doubles for left, right in LOGIC_PRODUCT: if left == right: repl = left else: repl = "and" - pattern = re.compile(left + "\s*" + right, re.I) + pattern = re.compile(left + r'\s*' + right, re.I) logic = pattern.sub(repl, logic) - logic = Query.ORPHANED_OP_PATTERN.sub(lambda x: "(" if "(" in x.group(0) else ")", logic) + logic = Query.ORPHANED_OP_PATTERN.sub( + lambda x: "(" if "(" in x.group(0) else ")", logic) logic = logic.strip().lstrip() logic = Query.LEADING_OP_PATTERN.sub("", logic) logic = Query.TRAILING_OP_PATTERN.sub("", logic) @@ -530,11 +564,12 @@ def _set_questionable_logic(self, questionable_logic): if len(logic) > 0 and logic not in ["and", "or"]: self.set_logic(logic) except Exception as e: - raise Exception("Error parsing logic string " - + repr(questionable_logic) - + " (which is " + repr(logic) + " after irrelevant codes have been removed)" - + " with available codes: " + repr(list(used_codes)) - + " because: " + e.message) + raise Exception("Error parsing logic string " + + repr(questionable_logic) + " (which is " + + repr(logic) + + " after irrelevant codes have been removed)" + + " with available codes: " + + repr(list(used_codes)) + " because: " + e.message) def __str__(self): """Return the XML serialisation of this query""" @@ -637,31 +672,39 @@ def add_view(self, *paths): elif isinstance(p, Reference): views.append(p.name + ".*") else: - views.extend(re.split("(?:,?\s+|,)", str(p))) + views.extend(re.split(r'(?:,?\s+|,)', str(p))) views = list(map(self.prefix_path, views)) views_to_add = [] for view in views: if view.endswith(".*"): - view = re.sub("\.\*$", "", view) + view = re.sub(r'\.\*$', "", view) scd = self.get_subclass_dict() + def expand(p, level, id_only=False): if level > 0: path = self.model.make_path(p, scd) cd = path.end_class - add_f = lambda x: p + "." + x.name - vs = [p + ".id"] if id_only and cd.has_id else [add_f(a) for a in cd.attributes] + + def add_f(x): + return p + "." + x.name + + vs = [p + ".id"] if id_only and cd.has_id else [ + add_f(a) for a in cd.attributes + ] next_level = level - 1 rs_and_cs = list(cd.references) + list(cd.collections) for r in rs_and_cs: rp = add_f(r) if next_level: self.outerjoin(rp) - vs.extend(expand(rp, next_level, self.prefetch_id_only)) + vs.extend( + expand(rp, next_level, self.prefetch_id_only)) return vs else: return [] + depth = self.prefetch_depth views_to_add.extend(expand(view, depth)) else: @@ -676,12 +719,13 @@ def expand(p, level, id_only=False): def prefix_path(self, path): if self.root is None: - if self.do_verification: # eg. not when building from XML + if self.do_verification: # eg. not when building from XML if path.endswith(".*"): - trimmed = re.sub("\.\*$", "", path) + trimmed = re.sub(r'\.\*$', "", path) else: trimmed = path - self.root = self.model.make_path(trimmed, self.get_subclass_dict()).root + self.root = self.model.make_path(trimmed, + self.get_subclass_dict()).root return path else: if path.startswith(self.root.name): @@ -712,12 +756,13 @@ def verify_views(self, views=None): @raise intermine.model.ModelError: if the paths are invalid @raise ConstraintError: if the paths are not attributes """ - if views is None: views = self.views + if views is None: + views = self.views for path in views: path = self.model.make_path(path, self.get_subclass_dict()) if not path.is_attribute(): - raise ConstraintError("'" + str(path) - + "' does not represent an attribute") + raise ConstraintError("'" + str(path) + + "' does not represent an attribute") def add_constraint(self, *args, **kwargs): """ @@ -749,7 +794,8 @@ def add_constraint(self, *args, **kwargs): con = self.constraint_factory.make_constraint(*args[0]) else: try: - con = self.constraint_factory.make_constraint(*args[0].vargs, **args[0].kwargs) + con = self.constraint_factory.make_constraint( + *args[0].vargs, **args[0].kwargs) except AttributeError: con = args[0] else: @@ -769,7 +815,8 @@ def add_constraint(self, *args, **kwargs): con = self.constraint_factory.make_constraint(*args, **kwargs) con.path = self.prefix_path(con.path) - if self.do_verification: self.verify_constraint_paths([con]) + if self.do_verification: + self.verify_constraint_paths([con]) if hasattr(con, "code"): self.constraint_dict[con.code] = con else: @@ -797,7 +844,7 @@ def where(self, *cons, **kwargs): for con in conset: c.add_constraint(*con.vargs, **con.kwargs) try: - c.set_logic(lstr + conset.as_logic(start = start_c)) + c.set_logic(lstr + conset.as_logic(start=start_c)) except constraints.EmptyLogicError: pass for path, value in list(kwargs.items()): @@ -815,7 +862,8 @@ def column(self, col): Also available as Query.c """ - return self.model.column(self.prefix_path(str(col)), self.get_subclass_dict(), self) + return self.model.column(self.prefix_path(str(col)), + self.get_subclass_dict(), self) def verify_constraint_paths(self, cons=None): """ @@ -824,51 +872,77 @@ def verify_constraint_paths(self, cons=None): This method will check the path attribute of each constraint. In addition it will: - - Check that BinaryConstraints and MultiConstraints have an Attribute as their path + - Check that BinaryConstraints and MultiConstraints have an Attribute + as their path - Check that TernaryConstraints have a Reference as theirs - Check that SubClassConstraints have a correct subclass relationship - - Check that LoopConstraints have a valid loopPath, of a compatible type + - Check that LoopConstraints have a valid loopPath, of a compatible + type - Check that ListConstraints refer to an object - - Don't even try to check RangeConstraints: these have variable semantics + - Don't even try to check RangeConstraints: these have variable + semantics - @param cons: The constraints to check (defaults to all constraints on the query) + @param cons: The constraints to check (defaults to all constraints on + the query) @raise ModelError: if the paths are not valid - @raise ConstraintError: if the constraints do not satisfy the above rules + @raise ConstraintError: if the constraints do not satisfy the above + rules """ - if cons is None: cons = self.constraints + if cons is None: + cons = self.constraints for con in cons: pathA = self.model.make_path(con.path, self.get_subclass_dict()) if isinstance(con, constraints.RangeConstraint): - pass # No verification done on these, beyond checking its path, of course. + # No verification done on these, beyond checking its path, of + # course. + pass elif isinstance(con, constraints.IsaConstraint): if pathA.get_class() is None: - raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class") + raise ConstraintError( + "'" + str(pathA) + + "' does not represent a class, or a reference to a \ + class") for c in con.values: if c not in self.model.classes: - raise ConstraintError("Illegal constraint: " + repr(con) + " '" + str(c) + "' is not a class in this model") + raise ConstraintError("Illegal constraint: " + + repr(con) + " '" + str(c) + + "' is not a class in this model") elif isinstance(con, constraints.TernaryConstraint): if pathA.get_class() is None: - raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class") - elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint): + raise ConstraintError( + "'" + str(pathA) + + "' does not represent a class, or a reference to a \ + class") + elif isinstance(con, constraints.BinaryConstraint) or isinstance( + con, constraints.MultiConstraint): if not pathA.is_attribute(): - raise ConstraintError("'" + str(pathA) + "' does not represent an attribute") + raise ConstraintError("'" + str(pathA) + + "' does not represent an attribute") elif isinstance(con, constraints.SubClassConstraint): - pathB = self.model.make_path(con.subclass, self.get_subclass_dict()) + pathB = self.model.make_path(con.subclass, + self.get_subclass_dict()) if not pathB.get_class().isa(pathA.get_class()): - raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'") + raise ConstraintError("'" + con.subclass + + "' is not a subclass of '" + + con.path + "'") elif isinstance(con, constraints.LoopConstraint): - pathB = self.model.make_path(con.loopPath, self.get_subclass_dict()) + pathB = self.model.make_path(con.loopPath, + self.get_subclass_dict()) for path in [pathA, pathB]: if not path.get_class(): - raise ConstraintError("'" + str(path) + "' does not refer to an object") + raise ConstraintError("'" + str(path) + + "' does not refer to an object") (classA, classB) = (pathA.get_class(), pathB.get_class()) if not classA.isa(classB) and not classB.isa(classA): - raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB)) + raise ConstraintError( + "the classes are of incompatible types: " + + str(classA) + "," + str(classB)) elif isinstance(con, constraints.ListConstraint): if not pathA.get_class(): - raise ConstraintError("'" + str(pathA) + "' does not refer to an object") + raise ConstraintError("'" + str(pathA) + + "' does not refer to an object") @property def constraints(self): @@ -884,7 +958,8 @@ def constraints(self): @rtype: list(Constraint) """ - ret = sorted(list(self.constraint_dict.values()), key=lambda con: con.code) + ret = sorted(list(self.constraint_dict.values()), + key=lambda con: con.code) ret.extend(self.uncoded_constraints) return ret @@ -902,10 +977,10 @@ def get_constraint(self, code): if code in self.constraint_dict: return self.constraint_dict[code] else: - raise ConstraintError("There is no constraint with the code '" - + code + "' on this query") + raise ConstraintError("There is no constraint with the code '" + + code + "' on this query") - def add_join(self, *args ,**kwargs): + def add_join(self, *args, **kwargs): """ Add a join statement to the query ================================= @@ -949,7 +1024,8 @@ def add_join(self, *args ,**kwargs): """ join = Join(*args, **kwargs) join.path = self.prefix_path(join.path) - if self.do_verification: self.verify_join_paths([join]) + if self.do_verification: + self.verify_join_paths([join]) self.joins.append(join) return self @@ -967,36 +1043,40 @@ def verify_join_paths(self, joins=None): @raise ModelError: if the paths are invalid @raise QueryError: if the paths are not references """ - if joins is None: joins = self.joins + if joins is None: + joins = self.joins for join in joins: path = self.model.make_path(join.path, self.get_subclass_dict()) if not path.is_reference(): raise QueryError("'" + join.path + "' is not a reference") - def add_path_description(self, *args ,**kwargs): + def add_path_description(self, *args, **kwargs): """ Add a path description to the query =================================== example:: - query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain") + query.add_path_description("Gene.proteins.proteinDomains", + "Protein Domain") This allows you to alias the components of long paths to - improve the way they display column headers in a variety of circumstances. - In the above example, if the view included the unwieldy path - "Gene.proteins.proteinDomains.primaryIdentifier", it would (depending on the - mine) be displayed as "Protein Domain > DB Identifer". These - setting are taken into account by the webservice when generating - column headers for flat-file results with the columnheaders parameter given, and - always supplied when requesting jsontable results. + improve the way they display column headers in a variety of + circumstances. In the above example, if the view included the unwieldy + path "Gene.proteins.proteinDomains.primaryIdentifier", it would + (depending on the mine) be displayed as + "Protein Domain > DB Identifer". These setting are taken into account + by the webservice when generating column headers for flat-file results + with the columnheaders parameter given, and always supplied when + requesting jsontable results. @rtype: L{intermine.pathfeatures.PathDescription} """ path_description = PathDescription(*args, **kwargs) path_description.path = self.prefix_path(path_description.path) - if self.do_verification: self.verify_pd_paths([path_description]) + if self.do_verification: + self.verify_pd_paths([path_description]) self.path_descriptions.append(path_description) return path_description @@ -1009,7 +1089,8 @@ def verify_pd_paths(self, pds=None): @raise ModelError: if the paths are invalid """ - if pds is None: pds = self.path_descriptions + if pds is None: + pds = self.path_descriptions for pd in pds: self.model.validate_path(pd.path, self.get_subclass_dict()) @@ -1019,7 +1100,8 @@ def coded_constraints(self): Returns the list of constraints that have a code ================================================ - Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint) + Query.coded_constraints S{->} + list(intermine.constraints.CodedConstraint) This returns an up to date list of the constraints that can be used in a logic expression. The only kind of constraint @@ -1027,7 +1109,8 @@ def coded_constraints(self): @rtype: list(L{intermine.constraints.CodedConstraint}) """ - return sorted(list(self.constraint_dict.values()), key=lambda con: con.code) + return sorted(list(self.constraint_dict.values()), + key=lambda con: con.code) def get_logic(self): """ @@ -1047,7 +1130,7 @@ def get_logic(self): """ if self._logic is None: if len(self.coded_constraints) > 0: - return reduce(lambda x, y: x+y, self.coded_constraints) + return reduce(lambda x, y: x + y, self.coded_constraints) else: return "" else: @@ -1080,7 +1163,8 @@ def set_logic(self, value): raise else: return self - if self.do_verification: self.validate_logic(logic) + if self.do_verification: + self.validate_logic(logic) self._logic = logic return self @@ -1095,12 +1179,14 @@ def validate_logic(self, logic=None): @raise QueryError: if not every coded constraint is represented """ - if logic is None: logic = self._logic + if logic is None: + logic = self._logic logic_codes = set(logic.get_codes()) for con in self.coded_constraints: if con.code not in logic_codes: - raise QueryError("Constraint " + con.code + repr(con) - + " is not mentioned in the logic: " + str(logic)) + raise QueryError("Constraint " + con.code + repr(con) + + " is not mentioned in the logic: " + + str(logic)) def get_default_sort_order(self): """ @@ -1168,7 +1254,8 @@ def add_sort_order(self, path, direction=SortOrder.ASC): """ so = SortOrder(str(path), direction) so.path = self.prefix_path(so.path) - if self.do_verification: self.validate_sort_order(so) + if self.do_verification: + self.validate_sort_order(so) self._sort_order_list.append(so) return self @@ -1191,11 +1278,13 @@ def validate_sort_order(self, *so_elems): for so in so_elems: p = self.model.make_path(so.path, self.get_subclass_dict()) if p.prefix() not in from_paths: - raise QueryError("Sort order element %s is not in the query" % so.path) + raise QueryError("Sort order element %s is not in the query" % + so.path) def _from_paths(self): scd = self.get_subclass_dict() - froms = set([self.model.make_path(x, scd).prefix() for x in self.views]) + froms = set( + [self.model.make_path(x, scd).prefix() for x in self.views]) for c in self.constraints: p = self.model.make_path(c.path, scd) if p.is_attribute(): @@ -1244,8 +1333,10 @@ def results(self, row="object", start=0, size=None, summary_path=None): >>> for row in query.results(row="rr"): ... print row["symbol"] # handle strings by dict index ... total += row["length"] # handle numbers by dict index - ... print row["Gene.symbol"] # handle strings by full dict index - ... total += row["Gene.length"] # handle numbers by full dict index + ... print row["Gene.symbol"] # handle strings by full dict + index + ... total += row["Gene.length"] # handle numbers by full dict + index ... print row[0] # handle strings by list index ... total += row[1] # handle numbers by list index >>> for d in query.results(row="dict"): @@ -1255,7 +1346,8 @@ def results(self, row="object", start=0, size=None, summary_path=None): ... print row[0] # handle strings ... total += row[1] # handle numbers >>> import csv - >>> csv_reader = csv.reader(q.results(row="csv"), delimiter=",", quotechar='"') + >>> csv_reader = csv.reader(q.results(row="csv"), delimiter="," + , quotechar='"') >>> for row in csv_reader: ... print row[0] # handle strings ... length_sum += int(row[1]) # handle numbers @@ -1267,17 +1359,17 @@ def results(self, row="object", start=0, size=None, summary_path=None): This is the general method that allows access to any of the available result formats. The example above shows the ways these differ in terms of accessing fields of the rows, as well as dealing with different - data types. Results can either be retrieved as typed values (jsonobjects, - rr ['ResultRows'], dict, list), or as lists of strings (csv, tsv) which then require - further parsing. The default format for this method is "objects", where - information is grouped by its relationships. The other main format is - "rr", which stands for 'ResultRows', and can be accessed directly through - the L{rows} method. - - Note that when requesting object based results (the default), if your query - contains any kind of collection, it is highly likely that start and size won't do what - you think, as they operate only on the underlying - rows used to build up the returned objects. If you want rows + data types. Results can either be retrieved as typed values + (jsonobjects, rr ['ResultRows'], dict, list), or as lists of strings + (csv, tsv) which then require further parsing. The default format for + this method is "objects", where information is grouped by its + relationships. The other main format is "rr", which stands for + 'ResultRows', and can be accessed directly through the L{rows} method. + + Note that when requesting object based results (the default), if your + query contains any kind of collection, it is highly likely that start + and size won't do what you think, as they operate only on the + underlying rows used to build up the returned objects. If you want rows back, you are recommeded to use the simpler rows method. If no views have been specified, all attributes of the root class @@ -1290,10 +1382,11 @@ def results(self, row="object", start=0, size=None, summary_path=None): @type start: int @param size: The maximum number of results to return (default = all) @type size: int - @param summary_path: A column name to optionally summarise. Specifying a path - will force "jsonrows" format, and return an iterator over a list - of dictionaries. Use this when you are interested in processing - a summary in order of greatest count to smallest. + @param summary_path: A column name to optionally summarise. Specifying + a path will force "jsonrows" format, and return an + iterator over a list of dictionaries. Use this + when you are interested in processing a summary in + order of greatest count to smallest. @type summary_path: str or L{intermine.model.Path} @rtype: L{intermine.webservice.ResultIterator} @@ -1355,32 +1448,36 @@ def summarise(self, summary_path, **kwargs): ================================================ Usage:: - >>> query = service.select("Gene.*", "organism.*").where("Gene", "IN", "my-list") + >>> query = service.select("Gene.*", "organism.*").where("Gene", + "IN", "my-list") >>> print query.summarise("length")["average"] ... 12345.67890 >>> print query.summarise("organism.name")["Drosophila simulans"] ... 98 This method allows you to get statistics summarising the information - from just one column of a query. For numerical columns you get dictionary with - four keys ('average', 'stdev', 'max', 'min'), and for non-numerical - columns you get a dictionary where each item is a key and the values - are the number of occurrences of this value in the column. + from just one column of a query. For numerical columns you get + dictionary with four keys ('average', 'stdev', 'max', 'min'), and for + non-numerical columns you get a dictionary where each item is a key and + the values are the number of occurrences of this value in the column. Any key word arguments will be passed to the underlying results call - - so you can limit the result size to the top 100 items by passing "size = 100" - as part of the call. + so you can limit the result size to the top 100 items by passing + "size = 100" as part of the call. @see: L{intermine.query.Query.results} - @param summary_path: The column to summarise (either in long or short form) + @param summary_path: The column to summarise (either in long or + short form) @type summary_path: str or L{intermine.model.Path} @rtype: dict - This method is sugar for particular combinations of calls to L{results}. + This method is sugar for particular combinations of calls to + L{results}. """ - p = self.model.make_path(self.prefix_path(summary_path), self.get_subclass_dict()) - results = self.results(summary_path = summary_path, **kwargs) + p = self.model.make_path(self.prefix_path(summary_path), + self.get_subclass_dict()) + results = self.results(summary_path=summary_path, **kwargs) if p.end.type_name in Model.NUMERIC_TYPES: return dict((k, float(v)) for k, v in list(next(results).items())) else: @@ -1405,7 +1502,8 @@ def one(self, row="jsonobjects"): else: c = self.count() if (c != 1): - raise QueryError("Result size is not one: got %d results" % (c)) + raise QueryError("Result size is not one: got %d results" % + (c)) else: return self.first(row) @@ -1463,12 +1561,13 @@ def count(self): @raise WebserviceError: if the request is unsuccessful. """ count_str = "" - for row in self.results(row = "count"): + for row in self.results(row="count"): count_str += row try: return int(count_str) except ValueError: - raise ResultError("Server returned a non-integer count: " + count_str) + raise ResultError("Server returned a non-integer count: " + + count_str) def get_list_upload_uri(self): """ @@ -1498,7 +1597,6 @@ def get_list_append_uri(self): """ return self.service.root + self.service.QUERY_LIST_APPEND_PATH - def get_results_path(self): """ Returns the path section pointing to the REST resource @@ -1513,7 +1611,6 @@ def get_results_path(self): """ return self.service.QUERY_PATH - def children(self): """ Returns the child objects of the query @@ -1534,16 +1631,18 @@ def children(self): def to_query(self): """ - Implementation of trait that allows use of these objects as queries (casting). + Implementation of trait that allows use of these objects as queries + (casting). """ return self def make_list_constraint(self, path, op): """ - Implementation of trait that allows use of these objects in list constraints + Implementation of trait that allows use of these objects in list + constraints """ - l = self.service.create_list(self) - return ConstraintNode(path, op, l.name) + temp = self.service.create_list(self) + return ConstraintNode(path, op, temp.name) def to_query_params(self): """ @@ -1558,7 +1657,7 @@ def to_query_params(self): """ xml = self.to_xml() - params = {'query' : xml } + params = {'query': xml} return params def to_Node(self): @@ -1572,8 +1671,8 @@ def to_Node(self): @rtype: xml.minidom.Node """ - impl = getDOMImplementation() - doc = impl.createDocument(None, "query", None) + impl = getDOMImplementation() + doc = impl.createDocument(None, "query", None) query = doc.documentElement query.setAttribute('name', self.name) @@ -1644,13 +1743,20 @@ def clone(self): @return: same class as caller """ newobj = self.__class__(self.model) - for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict", "uncoded_constraints"]: + for attr in [ + "joins", "views", "_sort_order_list", "_logic", + "path_descriptions", "constraint_dict", "uncoded_constraints" + ]: setattr(newobj, attr, deepcopy(getattr(self, attr))) - for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]: + for attr in [ + "name", "description", "service", "do_verification", + "constraint_factory", "root" + ]: setattr(newobj, attr, getattr(self, attr)) return newobj + class Template(Query): """ A Class representing a predefined query @@ -1733,10 +1839,9 @@ def from_xml(cls, xml, *args, **kwargs): templates = doc.getElementsByTagName('template') if len(templates) != 1: - raise QueryParseError( - "wrong number of templates in xml. " - + "Only one