1 """Base classes for match providers.
2
3 They are used by business objects to give
4 phrasewheels the ability to guess phrases.
5
6 Copyright (C) GNUMed developers
7 license: GPL
8 """
9 __version__ = "$Revision: 1.34 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>, I.Haywood <ihaywood@gnu.org>, S.J.Tan <sjtan@bigpond.com>"
11
12
13 import string, types, time, sys, re as regex, logging
14
15
16
17 from Gnumed.pycommon import gmPG2
18
19
20 _log = logging.getLogger('gm.ui')
21 _log.info(__version__)
22
23
24 default_ignored_chars = "[?!.'\\(){}\[\]<>~#*$%^_]+" + '"'
25 default_word_separators = '[- \t=+&:@]+'
26
28 """Base class for match providing objects.
29
30 Match sources might be:
31 - database tables
32 - flat files
33 - previous input
34 - config files
35 - in-memory list created on the fly
36 """
37 print_queries = False
38
45
46
47
49 """Return matches according to aFragment and matching thresholds.
50
51 FIXME: design decision: we dont worry about data source changes
52 during the lifetime of a MatchProvider
53 FIXME: append _("*get all items*") on truncation
54 """
55
56 if aFragment is None:
57 raise ValueError, 'Cannot find matches without a fragment.'
58
59
60 if aFragment == u'*':
61 return self.getAllMatches()
62
63
64 tmpFragment = aFragment.lower()
65
66 if self.__ignored_chars is not None:
67 tmpFragment = self.__ignored_chars.sub('', tmpFragment)
68
69 if self.__word_separators is not None:
70 tmpFragment = u' '.join(self.__word_separators.split(tmpFragment))
71
72 lngFragment = len(tmpFragment)
73
74
75 if lngFragment >= self.__threshold_substring:
76 return self.getMatchesBySubstr(tmpFragment)
77 elif lngFragment >= self.__threshold_word:
78 return self.getMatchesByWord(tmpFragment)
79 elif lngFragment >= self.__threshold_phrase:
80 return self.getMatchesByPhrase(tmpFragment)
81 else:
82 return (False, [])
83
85 raise NotImplementedError
86
88 raise NotImplementedError
89
91 raise NotImplementedError
92
94 raise NotImplementedError
95
96
97
99 """Set match location thresholds.
100
101 - the fragment passed to getMatches() must contain at least this many
102 characters before it triggers a match search at:
103 1) phrase_start - start of phrase (first word)
104 2) word_start - start of any word within phrase
105 3) in_word - _inside_ any word within phrase
106 """
107
108 if aSubstring < aWord:
109 _log.error('Setting substring threshold (%s) lower than word-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_substring, self.__threshold_word))
110 return False
111 if aWord < aPhrase:
112 _log.error('Setting word-start threshold (%s) lower than phrase-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_word, self.__threshold_phrase))
113 return False
114
115
116 self.__threshold_phrase = aPhrase
117 self.__threshold_word = aWord
118 self.__threshold_substring = aSubstring
119
120 return True
121
127
129 if self.__word_separators is None:
130 return None
131 return self.__word_separators.pattern
132
133 word_separators = property(_get_word_separators, _set_word_separators)
134
140
142 if self.__ignored_chars is None:
143 return None
144 return self.__ignored_chars.pattern
145
146 ignored_chars = property(_get_ignored_chars, _set_ignored_chars)
147
148 - def set_context (self, context=None, val=None):
149 """Set value to provide context information for matches.
150
151 The matching code may ignore it depending on its exact
152 implementation. Names and values of the context depend
153 on what is being matched.
154
155 <context> -- the *placeholder* key *inside* the context
156 definition, not the context *definition* key
157 """
158 if context is None:
159 return False
160 self._context_vals[context] = val
161 return True
162
163 - def unset_context(self, context=None):
164 try:
165 del self._context_vals[context]
166 except KeyError:
167 pass
168
169
170
172 """Match provider where all possible options can be held
173 in a reasonably sized, pre-allocated list.
174 """
176 """aSeq must be a list of dicts. Each dict must have the keys (data, label, weight)
177 """
178 if not type(aSeq) in [types.ListType, types.TupleType]:
179 _log.error('fixed list match provider argument must be a list or tuple of dicts')
180 raise TypeError('fixed list match provider argument must be a list or tuple of dicts')
181
182 self.__items = aSeq
183 cMatchProvider.__init__(self)
184
185
186
187
188
189
190
191
193 """Return matches for aFragment at start of phrases."""
194 matches = []
195
196 for item in self.__items:
197
198 if string.find(string.lower(item['label']), aFragment) == 0:
199 matches.append(item)
200
201 if len(matches) == 0:
202 return (False, [])
203
204 matches.sort(self.__cmp_items)
205 return (True, matches)
206
208 """Return matches for aFragment at start of words inside phrases."""
209 matches = []
210
211 for item in self.__items:
212 pos = string.find(string.lower(item['label']), aFragment)
213
214 if pos == 0:
215 matches.append(item)
216
217 elif pos > 0:
218
219 if (item['label'])[pos-1] == ' ':
220 matches.append(item)
221
222 if len(matches) == 0:
223 return (False, [])
224
225 matches.sort(self.__cmp_items)
226 return (True, matches)
227
229 """Return matches for aFragment as a true substring."""
230 matches = []
231
232 for item in self.__items:
233 if string.find(string.lower(item['label']), aFragment) != -1:
234 matches.append(item)
235
236 if len(matches) == 0:
237 return (False, [])
238
239 matches.sort(self.__cmp_items)
240 return (True, matches)
241
243 """Return all items."""
244 matches = self.__items
245
246 if len(matches) == 0:
247 return (False, [])
248
249 matches.sort(self.__cmp_items)
250 return (True, matches)
251
253 """items must be a list of dicts. Each dict must have the keys (data, label, weight)"""
254 self.__items = items
255
257 """Compare items based on weight."""
258 if item1['weight'] == item2['weight']:
259 return 0
260
261
262 if item1['weight'] < item2['weight']:
263 return 1
264 if item1['weight'] > item2['weight']:
265 return -1
266
268 """Match provider which searches matches
269 in the results of a function call.
270 """
271 - def __init__(self, get_candidates = None):
272 """get_candidates() must return a list of strings."""
273 if get_candidates is None:
274 _log.error('must define function to retrieve match candidates list')
275 raise ValueError('must define function to retrieve match candidates list')
276
277 self._get_candidates = get_candidates
278 cMatchProvider.__init__(self)
279
280
281
282
283
284
285
286
288 """Return matches for aFragment at start of phrases."""
289 print "getting phrase matches"
290 matches = []
291 candidates = self._get_candidates()
292
293 for candidate in candidates:
294
295 if aFragment.startswith(candidate['label'].lower()):
296 matches.append(candidate)
297
298 if len(matches) == 0:
299 return (False, [])
300
301 matches.sort(self.__cmp_candidates)
302 return (True, matches)
303
305 """Return matches for aFragment at start of words inside phrases."""
306 print "getting word matches"
307 matches = []
308 candidates = self._get_candidates()
309
310 for candidate in candidates:
311 pos = candidate['label'].lower().find(aFragment)
312
313
314
315
316 if (pos == 0) or (candidate['label'][pos-1] == ' '):
317 matches.append(candidate)
318
319 if len(matches) == 0:
320 return (False, [])
321
322 matches.sort(self.__cmp_candidates)
323 return (True, matches)
324
326 """Return matches for aFragment as a true substring."""
327 matches = []
328 candidates = self._get_candidates()
329
330 for candidate in candidates:
331 if candidate['label'].lower().find(aFragment) != -1:
332
333 matches.append(candidate)
334
335 if len(matches) == 0:
336 return (False, [])
337
338 matches.sort(self.__cmp_candidates)
339 return (True, matches)
340
342 """Return all candidates."""
343 return self._get_candidates()
344
346 """naive ordering"""
347 return 0
348
349
350
351
352
353
354
355
357 """Match provider which searches matches
358 in possibly several database tables.
359
360 queries:
361 - a list of unicode strings
362 - each string is a query
363 - each string must contain: "... where <column> %(fragment_condition)s ..."
364 - each string can contain in the where clause: "... %(<context_key>)s ..."
365 - each query must return (data, label)
366
367 context definitions to be used in the queries
368 example: {'ctxt_country': {'where_part': 'and country = %(country)s', 'placeholder': 'country'}}
369 """
370 - def __init__(self, queries = None, context = None):
371 if type(queries) != types.ListType:
372 queries = [queries]
373
374 self._queries = queries
375
376 if context is None:
377 self._context = {}
378 else:
379 self._context = context
380
381 self._args = {}
382 cMatchProvider.__init__(self)
383
384
385
386
387
388
389
390
392 """Return matches for aFragment at start of phrases."""
393
394 fragment_condition = u"ILIKE %(fragment)s"
395 self._args['fragment'] = u"%s%%" % aFragment
396
397 return self.__find_matches(fragment_condition)
398
400 """Return matches for aFragment at start of words inside phrases."""
401
402 fragment_condition = u"~* %(fragment)s"
403 aFragment = gmPG2.sanitize_pg_regex(expression = aFragment, escape_all = False)
404 self._args['fragment'] = u"( %s)|(^%s)" % (aFragment, aFragment)
405
406 return self.__find_matches(fragment_condition)
407
409 """Return matches for aFragment as a true substring."""
410
411 fragment_condition = u"ILIKE %(fragment)s"
412 self._args['fragment'] = u"%%%s%%" % aFragment
413
414 return self.__find_matches(fragment_condition)
415
419
421 matches = []
422 for query in self._queries:
423 where_fragments = {'fragment_condition': fragment_condition}
424
425 for context_key, context_def in self._context.items():
426 try:
427 placeholder = context_def['placeholder']
428 where_part = context_def['where_part']
429 self._args[placeholder] = self._context_vals[placeholder]
430
431 where_fragments[context_key] = where_part
432 if self.print_queries:
433 print placeholder
434 print where_part
435 print self._context_vals[placeholder]
436 except KeyError:
437
438 where_fragments[context_key] = u''
439
440 cmd = query % where_fragments
441
442 if self.print_queries:
443 print self.__class__.__name__
444 print self._context_vals
445 print self._args
446 print cmd
447
448 try:
449 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': self._args}])
450 except:
451 _log.exception('[%s]: error running match provider SQL, dropping query', self.__class__.__name__)
452 idx = self._queries.index(query)
453 del self._queries[idx]
454 break
455
456
457 if len(rows) == 0:
458 continue
459
460 for row in rows:
461 matches.append({'data': row[0], 'label': row[1], 'weight': 0})
462
463 return (True, matches)
464
465 return (False, [])
466
467 if __name__ == '__main__':
468 pass
469
470
471