-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocs.py
248 lines (216 loc) · 8.16 KB
/
docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import string
import logging
import datetime
import models
import errors
from google.appengine.api import search
from google.appengine.ext import ndb
class BaseDocumentManager(object):
"""Abstract class. Provides helper methods to manage search.Documents."""
_INDEX_NAME = None
_VISIBLE_PRINTABLE_ASCII = frozenset(
set(string.printable) - set(string.whitespace))
def __init__(self, doc):
"""Builds a dict of the fields mapped against the field names, for
efficient access.
"""
self.doc = doc
fields = doc.fields
def getFieldVal(self, fname):
"""Get the value of the document field with the given name. If there is
more than one such field, the method returns None."""
try:
return self.doc.field(fname).value
except ValueError:
return None
def setFirstField(self, new_field):
"""Set the value of the (first) document field with the given name."""
for i, field in enumerate(self.doc.fields):
if field.name == new_field.name:
self.doc.fields[i] = new_field
return True
return False
@classmethod
def isValidDocId(cls, doc_id):
"""Checks if the given id is a visible printable ASCII string not starting
with '!'. Whitespace characters are excluded.
"""
for char in doc_id:
if char not in cls._VISIBLE_PRINTABLE_ASCII:
return False
return not doc_id.startswith('!')
@classmethod
def getIndex(cls):
return search.Index(name=cls._INDEX_NAME)
@classmethod
def deleteAllInIndex(cls):
"""Delete all the docs in the given index."""
docindex = cls.getIndex()
try:
while True:
# until no more documents, get a list of documents,
# constraining the returned objects to contain only the doc ids,
# extract the doc ids, and delete the docs.
document_ids = [document.doc_id
for document in docindex.get_range(ids_only=True)]
if not document_ids:
break
docindex.delete(document_ids)
except search.Error:
logging.exception("Error removing documents:")
@classmethod
def getDoc(cls, doc_id):
"""Return the document with the given doc id. One way to do this is via
the get_range method, as shown here. If the doc id is not in the
index, the first doc in the index will be returned instead, so we need
to check for that case."""
if not doc_id:
return None
try:
index = cls.getIndex()
response = index.get_range(
start_id=doc_id, limit=1, include_start_object=True)
if response.results and response.results[0].doc_id == doc_id:
return response.results[0]
return None
except search.InvalidRequest: # catches ill-formed doc ids
return None
@classmethod
def removeDocById(cls, doc_id):
"""Remove the doc with the given doc id."""
try:
cls.getIndex().delete(doc_id)
except search.Error:
logging.exception("Error removing doc id %s.", doc_id)
@classmethod
def add(cls, documents):
"""wrapper for search index add method; specifies the index name."""
try:
return cls.getIndex().put(documents)
except search.Error:
logging.exception("Error adding documents.")
class ModelDoc(BaseDocumentManager):
MID = 'mid'
_INDEX_NAME = 'model_doc3'
MODEL_NAME = 'model_name'
MODEL_AUTHOR = 'model_author'
MODEL_KEYWORDS = 'model_keywords'
MODEL_SKEYWORDS = 'model_skeywords'
MODEL_FILE = 'model_file'
UPDATED = 'updated'
_SORT_OPTIONS = [
[UPDATED, 'modified', search.SortExpression(
expression=UPDATED,
direction=search.SortExpression.DESCENDING, default_value=1)],
[MODEL_AUTHOR, 'model author', search.SortExpression(
expression=MODEL_AUTHOR,
direction=search.SortExpression.ASCENDING, default_value='')],
[MODEL_NAME, 'model name', search.SortExpression(
expression=MODEL_NAME,
direction=search.SortExpression.ASCENDING, default_value='zzz')]
]
_SORT_MENU = None
_SORT_DICT = None
def getName(self):
return self.getFieldVal(self.MODEL_NAME)
def getAuthor(self):
return self.getFieldVal(self.MODEL_AUTHOR)
def getKeywords(self):
return self.getFieldVal(self.MODEL_KEYWORDS)
def getMID(self):
return self.getFieldVal(self.MID)
def getSKeywords(self):
return self.getFieldVal(self.MODEL_SKEYWORDS)
def getFile(self):
return self.getFieldVal(self.MODEL_FILE)
@classmethod
def getSortMenu(cls):
if not cls._SORT_MENU:
cls._buildSortMenu()
return cls._SORT_MENU
@classmethod
def getSortDict(cls):
if not cls._SORT_DICT:
cls._buildSortDict()
return cls._SORT_DICT
@classmethod
def _buildSortMenu(cls):
"""Build the default set of sort options used for Product search.
Of these options, all but 'relevance' reference core fields that
all Products will have."""
res = [(elt[0], elt[1]) for elt in cls._SORT_OPTIONS]
cls._SORT_MENU = [('relevance', 'relevance')] + res
@classmethod
def _buildSortDict(cls):
"""Build a dict that maps sort option keywords to their corresponding
SortExpressions."""
cls._SORT_DICT = {}
for elt in cls._SORT_OPTIONS:
cls._SORT_DICT[elt[0]] = elt[2]
@classmethod
def getDocFromMid(cls, mid):
"""Given a pid, get its doc. We're using the pid as the doc id, so we can
do this via a direct fetch."""
return cls.getDoc(mid)
@classmethod
def _buildCoreModelFields(
cls, mid, name, author, keywords,skeywords,fileInfo):
"""Construct a 'core' document field list for the fields common to all
Models. """
fields = [search.TextField(name=cls.MID, value=mid),
# The 'updated' field is always set to the current date.
search.DateField(name=cls.UPDATED,
value=datetime.datetime.now().date()),
search.TextField(name=cls.MODEL_NAME,value=name),
search.TextField(name=cls.MODEL_AUTHOR, value=author),
search.TextField(name=cls.MODEL_KEYWORDS,value=keywords),
search.TextField(name=cls.MODEL_SKEYWORDS, value=skeywords),
search.TextField(name=cls.MODEL_FILE, value=fileInfo)
]
return fields
@classmethod
def _createDocument(
cls, mid, name, author, keywords,skeywords,fileInfo):
"""Create a Document object from given params."""
# construct the document fields from the params
resfields = cls._buildCoreModelFields(
mid=mid, name=name, author=author,keywords=keywords,
skeywords=skeywords,fileInfo=fileInfo)
# build and index the document. Use the pid (product id) as the doc id.
# (If we did not do this, and left the doc_id unspecified, an id would be
# auto-generated.)
d = search.Document(fields=resfields)
return d
@classmethod
def buildModel(cls, params):
"""Create/update a product document and its related datastore entity. The
product id and the field values are taken from the params dict.
"""
# check to see if doc already exists. We do this because we need to retain
# some information from the existing doc. We could skip the fetch if this
# were not the case.
#curr_doc = cls.getDocFromMid(params['mid'])
d = cls._createDocument(params['mid'],params['name'],' '.join(params['author'])
,' '.join(params['tags']),' '.join(params['structuredTags']),params['fileInfo'])
# This will reindex if a doc with that doc id already exists
doc_ids = cls.add(d)
try:
doc_id = doc_ids[0].id
except IndexError:
doc_id = None
raise errors.OperationFailedError('could not index document')
logging.debug('got new doc id %s for product: %s', doc_id, params['mid'])
# now update the entity
def _tx():
# Check whether the model entity exists. If so, we want to update
# from the params, but preserve its ratings-related info.
model = models.ModelInfo.get_by_id(doc_id)
if model: #update
model.update_core(params,doc_id)
else: # create new entity
model = models.ModelInfo.create(params, doc_id)
#model.put()
return model
model = ndb.transaction(_tx)
logging.debug('prod: %s', model)
return model