1 """Base classes for match providers.
2
3 They are used by business objects to give
4 phrasewheels the ability to guess phrases.
5
6 Copyright (C) GNUMed developers
7 license: GPL v2 or later
8 """
9 __version__ = "$Revision: 1.34 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>, I.Haywood <ihaywood@gnu.org>, S.J.Tan <sjtan@bigpond.com>"
11
12
13 import re as regex, logging
14
15
16
17 from Gnumed.pycommon import gmPG2
18
19
20 _log = logging.getLogger('gm.ui')
21 _log.info(__version__)
22
23
24
25
26 default_ignored_chars = "[?!.'\\(){}\[\]<>~#*$%^_]+" + '"'
27
28
29
30
31 default_word_separators = '[- \t=+&:@]+'
32
34 """Base class for match providing objects.
35
36 Match sources might be:
37 - database tables
38 - flat files
39 - previous input
40 - config files
41 - in-memory list created on the fly
42 """
43 print_queries = False
44
52
53
54
56 """Return matches according to aFragment and matching thresholds.
57
58 FIXME: design decision: we dont worry about data source changes
59 during the lifetime of a MatchProvider
60 FIXME: append _("*get all items*") on truncation
61 """
62
63 if aFragment is None:
64 raise ValueError, 'Cannot find matches without a fragment.'
65
66
67 if aFragment == u'*':
68 return self.getAllMatches()
69
70
71 tmpFragment = aFragment.lower()
72
73 if self.__ignored_chars is not None:
74 tmpFragment = self.__ignored_chars.sub('', tmpFragment)
75
76 if self.__word_separators is not None:
77 tmpFragment = u' '.join(self.__word_separators.split(tmpFragment))
78
79 lngFragment = len(tmpFragment)
80
81
82 if lngFragment >= self.__threshold_substring:
83 return self.getMatchesBySubstr(tmpFragment)
84 elif lngFragment >= self.__threshold_word:
85 return self.getMatchesByWord(tmpFragment)
86 elif lngFragment >= self.__threshold_phrase:
87 return self.getMatchesByPhrase(tmpFragment)
88 else:
89 return (False, [])
90
92 raise NotImplementedError
93
95 raise NotImplementedError
96
98 raise NotImplementedError
99
101 raise NotImplementedError
102
103
104
105 - def setThresholds(self, aPhrase = 1, aWord = 3, aSubstring = 5):
106 """Set match location thresholds.
107
108 - the fragment passed to getMatches() must contain at least this many
109 characters before it triggers a match search at:
110 1) phrase_start - start of phrase (first word)
111 2) word_start - start of any word within phrase
112 3) in_word - _inside_ any word within phrase
113 """
114
115 if aSubstring < aWord:
116 _log.error('Setting substring threshold (%s) lower than word-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_substring, self.__threshold_word))
117 return False
118 if aWord < aPhrase:
119 _log.error('Setting word-start threshold (%s) lower than phrase-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_word, self.__threshold_phrase))
120 return False
121
122
123 self.__threshold_phrase = aPhrase
124 self.__threshold_word = aWord
125 self.__threshold_substring = aSubstring
126
127 return True
128
134
136 if self.__word_separators is None:
137 return None
138 return self.__word_separators.pattern
139
140 word_separators = property(_get_word_separators, _set_word_separators)
141
147
149 if self.__ignored_chars is None:
150 return None
151 return self.__ignored_chars.pattern
152
153 ignored_chars = property(_get_ignored_chars, _set_ignored_chars)
154
155 - def set_context (self, context=None, val=None):
156 """Set value to provide context information for matches.
157
158 The matching code may ignore it depending on its exact
159 implementation. Names and values of the context depend
160 on what is being matched.
161
162 <context> -- the *placeholder* key *inside* the context
163 definition, not the context *definition* key
164 """
165 if context is None:
166 return False
167 self._context_vals[context] = val
168 return True
169
170 - def unset_context(self, context=None):
171 try:
172 del self._context_vals[context]
173 except KeyError:
174 pass
175
176
177
179 """Match provider where all possible options can be held
180 in a reasonably sized, pre-allocated list.
181 """
183 """aSeq must be a list of dicts. Each dict must have the keys (data, label, weight)
184 """
185 if not type(aSeq) in [type(None), type([]), type(())]:
186 _log.error('fixed list match provider argument must be a list/tuple of dicts/None')
187 raise TypeError('fixed list match provider argument must be a list/tuple of dicts/None')
188
189 self.__items = aSeq
190 cMatchProvider.__init__(self)
191
192
193
194
195
196
197
198
200 """Return matches for aFragment at start of phrases."""
201 matches = []
202
203 for item in self.__items:
204
205 if item['list_label'].lower().startswith(aFragment.lower()):
206 matches.append(item)
207
208 if len(matches) == 0:
209 return (False, [])
210
211 matches.sort(self.__cmp_items)
212 return (True, matches)
213
215 """Return matches for aFragment at start of words inside phrases."""
216 matches = []
217
218 for item in self.__items:
219 item_label = item['list_label'].lower()
220 fragment_pos = item_label.find(aFragment.lower())
221
222 if fragment_pos == 0:
223 matches.append(item)
224
225 elif fragment_pos > 0:
226
227 if item_label[fragment_pos-1] == u' ':
228 matches.append(item)
229
230 if len(matches) == 0:
231 return (False, [])
232
233 matches.sort(self.__cmp_items)
234 return (True, matches)
235
237 """Return matches for aFragment as a true substring."""
238 matches = []
239
240 for item in self.__items:
241 if item['list_label'].lower().find(aFragment.lower()) != -1:
242 matches.append(item)
243
244 if len(matches) == 0:
245 return (False, [])
246
247 matches.sort(self.__cmp_items)
248 return (True, matches)
249
251 """Return all items."""
252 matches = self.__items
253
254 if len(matches) == 0:
255 return (False, [])
256
257 matches.sort(self.__cmp_items)
258 return (True, matches)
259
261 """items must be a list of dicts. Each dict must have the keys (data, list_label, weight)"""
262 self.__items = items
263
265 """Compare items based on weight."""
266 if item1['weight'] == item2['weight']:
267 return 0
268
269
270 if item1['weight'] < item2['weight']:
271 return 1
272 if item1['weight'] > item2['weight']:
273 return -1
274
276 """Match provider which searches matches
277 in the results of a function call.
278 """
279 - def __init__(self, get_candidates = None):
280 """get_candidates() must return a list of strings."""
281 if get_candidates is None:
282 _log.error('must define function to retrieve match candidates list')
283 raise ValueError('must define function to retrieve match candidates list')
284
285 self._get_candidates = get_candidates
286 cMatchProvider.__init__(self)
287
288
289
290
291
292
293
294
296 """Return matches for aFragment at start of phrases."""
297 matches = []
298 candidates = self._get_candidates()
299
300 for candidate in candidates:
301
302 if aFragment.startswith(candidate['list_label'].lower()):
303 matches.append(candidate)
304
305 if len(matches) == 0:
306 return (False, [])
307
308 matches.sort(self.__cmp_candidates)
309 return (True, matches)
310
312 """Return matches for aFragment at start of words inside phrases."""
313 matches = []
314 candidates = self._get_candidates()
315
316 for candidate in candidates:
317 pos = candidate['list_label'].lower().find(aFragment)
318
319
320
321
322 if (pos == 0) or (candidate['list_label'][pos-1] == u' '):
323 matches.append(candidate)
324
325 if len(matches) == 0:
326 return (False, [])
327
328 matches.sort(self.__cmp_candidates)
329 return (True, matches)
330
332 """Return matches for aFragment as a true substring."""
333 matches = []
334 candidates = self._get_candidates()
335
336 for candidate in candidates:
337 if candidate['list_label'].lower().find(aFragment) != -1:
338
339 matches.append(candidate)
340
341 if len(matches) == 0:
342 return (False, [])
343
344 matches.sort(self.__cmp_candidates)
345 return (True, matches)
346
348 """Return all candidates."""
349 return self._get_candidates()
350
352 """naive ordering"""
353 return 0
354
355
356
357
358
359
360
361
363 """Match provider which searches matches
364 in possibly several database tables.
365
366 queries:
367 - a list of unicode strings
368 - each string is a query
369 - each string must contain: "... where <column> %(fragment_condition)s ..."
370 - each string can contain in the where clause: "... %(<context_key>)s ..."
371 - each query must return (data, label)
372
373 context definitions to be used in the queries
374 example: {'ctxt_country': {'where_part': 'and country = %(country)s', 'placeholder': 'country'}}
375 """
376 - def __init__(self, queries = None, context = None):
377 if type(queries) != type([]):
378 queries = [queries]
379
380 self._queries = queries
381
382 if context is None:
383 self._context = {}
384 else:
385 self._context = context
386
387 self._args = {}
388 cMatchProvider.__init__(self)
389
390
391
392
393
394
395
396
398 """Return matches for aFragment at start of phrases."""
399
400 fragment_condition = u"ILIKE %(fragment)s"
401 self._args['fragment'] = u"%s%%" % aFragment
402
403 return self._find_matches(fragment_condition)
404
406 """Return matches for aFragment at start of words inside phrases."""
407
408 fragment_condition = u"~* %(fragment)s"
409 aFragment = gmPG2.sanitize_pg_regex(expression = aFragment, escape_all = False)
410 self._args['fragment'] = u"( %s)|(^%s)" % (aFragment, aFragment)
411
412 return self._find_matches(fragment_condition)
413
415 """Return matches for aFragment as a true substring."""
416
417 fragment_condition = u"ILIKE %(fragment)s"
418 self._args['fragment'] = u"%%%s%%" % aFragment
419
420 return self._find_matches(fragment_condition)
421
425
427 if self.print_queries:
428 print "----------------------"
429 matches = []
430 for query in self._queries:
431 where_fragments = {'fragment_condition': fragment_condition}
432
433 for context_key, context_def in self._context.items():
434 try:
435 placeholder = context_def['placeholder']
436 where_part = context_def['where_part']
437 self._args[placeholder] = self._context_vals[placeholder]
438
439 where_fragments[context_key] = where_part
440 if self.print_queries:
441 print "ctxt ph:", placeholder
442 print "ctxt where:", where_part
443 print "ctxt val:", self._context_vals[placeholder]
444 except KeyError:
445
446 where_fragments[context_key] = u''
447
448 cmd = query % where_fragments
449
450 if self.print_queries:
451 print "class:", self.__class__.__name__
452 print "ctxt:", self._context_vals
453 print "args:", self._args
454 print "query:", cmd
455
456 try:
457 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': self._args}])
458 except:
459 _log.exception('[%s]: error running match provider SQL, dropping query', self.__class__.__name__)
460 idx = self._queries.index(query)
461 del self._queries[idx]
462 break
463
464
465 if len(rows) == 0:
466 continue
467
468 for row in rows:
469 match = {'weight': 0}
470
471 try:
472 match['data'] = row['data']
473 except KeyError:
474 match['data'] = row[0]
475
476 try:
477 match['list_label'] = row['list_label']
478 except KeyError:
479 match['list_label'] = row[1]
480
481
482 try:
483 match['field_label'] = row['field_label']
484
485 except KeyError:
486
487 try:
488 match['field_label'] = row[2]
489
490 except IndexError:
491 match['field_label'] = match['list_label']
492
493
494
495
496
497
498 matches.append(match)
499
500 return (True, matches)
501
502
503 return (False, [])
504
505 if __name__ == '__main__':
506 pass
507
508
509