1 """Base classes for match providers.
2
3 They are used by business objects to give
4 phrasewheels the ability to guess phrases.
5
6 Copyright (C) GNUMed developers
7 license: GPL
8 """
9
10
11
12 __version__ = "$Revision: 1.34 $"
13 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>, I.Haywood <ihaywood@gnu.org>, S.J.Tan <sjtan@bigpond.com>"
14
15
16 import string, types, time, sys, re as regex, logging
17
18
19
20 from Gnumed.pycommon import gmPG2
21
22
23 _log = logging.getLogger('gm.ui')
24 _log.info(__version__)
25
26
27 default_ignored_chars = "[?!.'\\(){}\[\]<>~#*$%^_]+" + '"'
28 default_word_separators = '[- \t=+&:@]+'
29
31 """Base class for match providing objects.
32
33 Match sources might be:
34 - database tables
35 - flat files
36 - previous input
37 - config files
38 - in-memory list created on the fly
39 """
40 print_queries = False
41
48
49
50
52 """Return matches according to aFragment and matching thresholds.
53
54 FIXME: design decision: we dont worry about data source changes
55 during the lifetime of a MatchProvider
56 FIXME: append _("*get all items*") on truncation
57 """
58
59 if aFragment is None:
60 raise ValueError, 'Cannot find matches without a fragment.'
61
62
63 if aFragment == u'*':
64 return self.getAllMatches()
65
66
67 tmpFragment = aFragment.lower()
68
69 if self.__ignored_chars is not None:
70 tmpFragment = self.__ignored_chars.sub('', tmpFragment)
71
72 if self.__word_separators is not None:
73 tmpFragment = u' '.join(self.__word_separators.split(tmpFragment))
74
75 lngFragment = len(tmpFragment)
76
77
78 if lngFragment >= self.__threshold_substring:
79 return self.getMatchesBySubstr(tmpFragment)
80 elif lngFragment >= self.__threshold_word:
81 return self.getMatchesByWord(tmpFragment)
82 elif lngFragment >= self.__threshold_phrase:
83 return self.getMatchesByPhrase(tmpFragment)
84 else:
85 return (False, [])
86
88 raise NotImplementedError
89
91 raise NotImplementedError
92
94 raise NotImplementedError
95
97 raise NotImplementedError
98
99
100
101 - def setThresholds(self, aPhrase = 1, aWord = 3, aSubstring = 5):
102 """Set match location thresholds.
103
104 - the fragment passed to getMatches() must contain at least this many
105 characters before it triggers a match search at:
106 1) phrase_start - start of phrase (first word)
107 2) word_start - start of any word within phrase
108 3) in_word - _inside_ any word within phrase
109 """
110
111 if aSubstring < aWord:
112 _log.error('Setting substring threshold (%s) lower than word-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_substring, self.__threshold_word))
113 return False
114 if aWord < aPhrase:
115 _log.error('Setting word-start threshold (%s) lower than phrase-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_word, self.__threshold_phrase))
116 return False
117
118
119 self.__threshold_phrase = aPhrase
120 self.__threshold_word = aWord
121 self.__threshold_substring = aSubstring
122
123 return True
124
130
132 if self.__word_separators is None:
133 return None
134 return self.__word_separators.pattern
135
136 word_separators = property(_get_word_separators, _set_word_separators)
137
143
145 if self.__ignored_chars is None:
146 return None
147 return self.__ignored_chars.pattern
148
149 ignored_chars = property(_get_ignored_chars, _set_ignored_chars)
150
151 - def set_context (self, context=None, val=None):
152 """Set value to provide context information for matches.
153
154 The matching code may ignore it depending on its exact
155 implementation. Names and values of the context depend
156 on what is being matched.
157
158 <context> -- the *placeholder* key *inside* the context
159 definition, not the context *definition* key
160 """
161 if context is None:
162 return False
163 self._context_vals[context] = val
164 return True
165
166 - def unset_context(self, context=None):
167 try:
168 del self._context_vals[context]
169 except KeyError:
170 pass
171
172
173
175 """Match provider where all possible options can be held
176 in a reasonably sized, pre-allocated list.
177 """
179 """aSeq must be a list of dicts. Each dict must have the keys (data, label, weight)
180 """
181 if not type(aSeq) in [types.ListType, types.TupleType]:
182 _log.error('fixed list match provider argument must be a list or tuple of dicts')
183 raise TypeError('fixed list match provider argument must be a list or tuple of dicts')
184
185 self.__items = aSeq
186 cMatchProvider.__init__(self)
187
188
189
190
191
192
193
194
196 """Return matches for aFragment at start of phrases."""
197 matches = []
198
199 for item in self.__items:
200
201 if string.find(string.lower(item['label']), aFragment) == 0:
202 matches.append(item)
203
204 if len(matches) == 0:
205 return (False, [])
206
207 matches.sort(self.__cmp_items)
208 return (True, matches)
209
211 """Return matches for aFragment at start of words inside phrases."""
212 matches = []
213
214 for item in self.__items:
215 pos = string.find(string.lower(item['label']), aFragment)
216
217 if pos == 0:
218 matches.append(item)
219
220 elif pos > 0:
221
222 if (item['label'])[pos-1] == ' ':
223 matches.append(item)
224
225 if len(matches) == 0:
226 return (False, [])
227
228 matches.sort(self.__cmp_items)
229 return (True, matches)
230
232 """Return matches for aFragment as a true substring."""
233 matches = []
234
235 for item in self.__items:
236 if string.find(string.lower(item['label']), aFragment) != -1:
237 matches.append(item)
238
239 if len(matches) == 0:
240 return (False, [])
241
242 matches.sort(self.__cmp_items)
243 return (True, matches)
244
246 """Return all items."""
247 matches = self.__items
248
249 if len(matches) == 0:
250 return (False, [])
251
252 matches.sort(self.__cmp_items)
253 return (True, matches)
254
256 """items must be a list of dicts. Each dict must have the keys (data, label, weight)"""
257 self.__items = items
258
260 """Compare items based on weight."""
261 if item1['weight'] == item2['weight']:
262 return 0
263
264
265 if item1['weight'] < item2['weight']:
266 return 1
267 if item1['weight'] > item2['weight']:
268 return -1
269
271 """Match provider which searches matches
272 in the results of a function call.
273 """
274 - def __init__(self, get_candidates = None):
275 """get_candidates() must return a list of strings."""
276 if get_candidates is None:
277 _log.error('must define function to retrieve match candidates list')
278 raise ArgumentError('must define function to retrieve match candidates list')
279
280 self._get_candidates = get_candidates
281 cMatchProvider.__init__(self)
282
283
284
285
286
287
288
289
291 """Return matches for aFragment at start of phrases."""
292 print "getting phrase matches"
293 matches = []
294 candidates = self._get_candidates()
295
296 for candidate in candidates:
297
298 if aFragment.startswith(candidate['label'].lower()):
299 matches.append(candidate)
300
301 if len(matches) == 0:
302 return (False, [])
303
304 matches.sort(self.__cmp_candidates)
305 return (True, matches)
306
308 """Return matches for aFragment at start of words inside phrases."""
309 print "getting word matches"
310 matches = []
311 candidates = self._get_candidates()
312
313 for candidate in candidates:
314 pos = candidate['label'].lower().find(aFragment)
315
316
317
318
319 if (pos == 0) or (candidate['label'][pos-1] == ' '):
320 matches.append(candidate)
321
322 if len(matches) == 0:
323 return (False, [])
324
325 matches.sort(self.__cmp_candidates)
326 return (True, matches)
327
329 """Return matches for aFragment as a true substring."""
330 matches = []
331 candidates = self._get_candidates()
332
333 for candidate in candidates:
334 if candidate['label'].lower().find(aFragment) != -1:
335
336 matches.append(candidate)
337
338 if len(matches) == 0:
339 return (False, [])
340
341 matches.sort(self.__cmp_candidates)
342 return (True, matches)
343
345 """Return all candidates."""
346 return self._get_candidates()
347
349 """naive ordering"""
350 return 0
351
352
353
354
355
356
357
358
360 """Match provider which searches matches
361 in possibly several database tables.
362
363 queries:
364 - a list of unicode strings
365 - each string is a query
366 - each string must contain: "... where <column> %(fragment_condition)s ..."
367 - each string can contain in the where clause: "... %(<context_key>)s ..."
368
369 context definitions to be used in the queries
370 example: {'ctxt_country': {'where_part': 'and country = %(country)s', 'placeholder': 'country'}}
371 """
372 - def __init__(self, queries = None, context = None):
373 if type(queries) != types.ListType:
374 queries = [queries]
375
376 self._queries = queries
377
378 if context is None:
379 self._context = {}
380 else:
381 self._context = context
382
383 self._args = {}
384 cMatchProvider.__init__(self)
385
386
387
388
389
390
391
392
394 """Return matches for aFragment at start of phrases."""
395 fragment_condition = u"ilike %(fragment)s"
396 self._args['fragment'] = u"%s%%" % aFragment
397 return self.__find_matches(fragment_condition)
398
400 """Return matches for aFragment at start of words inside phrases."""
401 fragment_condition = u"~* %(fragment)s"
402 aFragment = gmPG2.sanitize_pg_regex(expression = aFragment, escape_all = False)
403 self._args['fragment'] = u"( %s)|(^%s)" % (aFragment, aFragment)
404 return self.__find_matches(fragment_condition)
405
407 """Return matches for aFragment as a true substring."""
408 fragment_condition = u"ilike %(fragment)s"
409 self._args['fragment'] = u"%%%s%%" % aFragment
410 return self.__find_matches(fragment_condition)
411
415
417 matches = []
418 for query in self._queries:
419 where_fragments = {'fragment_condition': fragment_condition}
420
421 for context_key, context_def in self._context.items():
422 try:
423 placeholder = context_def['placeholder']
424 where_part = context_def['where_part']
425 self._args[placeholder] = self._context_vals[placeholder]
426
427 where_fragments[context_key] = where_part
428 except KeyError:
429
430 where_fragments[context_key] = u''
431
432 cmd = query % where_fragments
433
434 if self.print_queries:
435 print self.__class__.__name__
436 print self._context_vals
437 print self._args
438 print cmd
439
440 try:
441 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': self._args}])
442 except:
443 _log.exception('Error running match provider SQL, dropping query.')
444 idx = self._queries.index(query)
445 del self._queries[idx]
446 break
447
448
449 if len(rows) == 0:
450 continue
451
452 for row in rows:
453 matches.append({'data': row[0], 'label': row[1], 'weight': 0})
454
455 return (True, matches)
456
457 return (False, [])
458
459 if __name__ == '__main__':
460 pass
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607