1
2 """ATC/DDD handling code.
3
4 http://who.no
5
6 license: GPL
7 """
8
9
10
11 __version__ = "$Revision: 1.7 $"
12 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
13
14 import sys, codecs, logging, csv, re as regex, os.path
15
16
17 if __name__ == '__main__':
18 sys.path.insert(0, '../../')
19 from Gnumed.pycommon import gmPG2, gmTools, gmCfg2
20
21
22 _log = logging.getLogger('gm.atc')
23 _log.info(__version__)
24
25 _cfg = gmCfg2.gmCfgData()
26
28
29 _log.debug('substance <%s>, ATC <%s>', substance, atc)
30
31 if atc is not None:
32 if atc.strip() == u'':
33 atc = None
34
35 if atc is None:
36 atcs = text2atc(text = substance, fuzzy = False)
37 if len(atcs) == 0:
38 _log.debug(u'no ATC found, aborting')
39 return atc
40 if len(atcs) > 1:
41 _log.debug(u'non-unique ATC mapping, aborting')
42 return atc
43 atc = atcs[0][0].strip()
44
45 args = {'atc': atc, 'term': substance.strip()}
46 queries = [
47 {'cmd': u"UPDATE ref.substance_in_brand SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL",
48 'args': args},
49 {'cmd': u"UPDATE clin.consumed_substance SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL",
50 'args': args},
51 {'cmd': u"UPDATE ref.branded_drug SET atc_code = %(atc)s WHERE description = %(term)s AND atc_code IS NULL",
52 'args': args}
53 ]
54 gmPG2.run_rw_queries(queries = queries)
55
56 return atc
57
58 -def text2atc(text=None, fuzzy=False):
59
60 text = text.strip()
61
62 if fuzzy:
63 args = {'term': u'%%%s%%' % text}
64 cmd = u"""
65 SELECT DISTINCT ON (atc_code) *
66 FROM (
67 SELECT atc as atc_code, is_group_code, pk_data_source
68 FROM ref.v_atc
69 WHERE term ilike %(term)s AND atc IS NOT NULL
70 UNION
71 SELECT atc_code, null, null
72 FROM ref.substance_in_brand
73 WHERE description ilike %(term)s AND atc_code IS NOT NULL
74 UNION
75 SELECT atc_code, null, null
76 FROM ref.branded_drug
77 WHERE description ilike %(term)s AND atc_code IS NOT NULL
78 UNION
79 SELECT atc_code, null, null
80 FROM clin.consumed_substance
81 WHERE description ilike %(term)s AND atc_code IS NOT NULL
82 ) as tmp
83 ORDER BY atc_code
84 """
85 else:
86 args = {'term': text.lower()}
87 cmd = u"""
88 SELECT DISTINCT ON (atc_code) *
89 FROM (
90 SELECT atc as atc_code, is_group_code, pk_data_source
91 FROM ref.v_atc
92 WHERE lower(term) = %(term)s AND atc IS NOT NULL
93 UNION
94 SELECT atc_code, null, null
95 FROM ref.substance_in_brand
96 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL
97 UNION
98 SELECT atc_code, null, null
99 FROM ref.branded_drug
100 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL
101 UNION
102 SELECT atc_code, null, null
103 FROM clin.consumed_substance
104 WHERE lower(description) = %(term)s AND atc_code IS NOT NULL
105 ) as tmp
106 ORDER BY atc_code
107 """
108
109 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
110
111 _log.debug(u'term: %s => ATCs: %s (fuzzy: %s)', text, rows, fuzzy)
112
113 return rows
114
116 cmd = u"""
117 SELECT DISTINCT ON (atc) ddd, unit
118 FROM ref.atc
119 WHERE
120 code = %(atc)s
121 AND
122 ddd IS NOT NULL
123 """
124 args = {'atc': atc.strip()}
125 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}], get_col_idx = False)
126
127 _log.debug(u'ATC: %s => DDD: %s', atc, rows)
128
129 return rows
130
132 cmd = u'select * from ref.v_atc order by %s' % order_by
133 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd}], get_col_idx = False)
134 return rows
135
137
138
139 _cfg.add_file_source(source = 'atc', file = cfg_fname, encoding = 'utf8')
140
141 data_fname = os.path.join (
142 os.path.dirname(cfg_fname),
143 _cfg.get(group = 'atc', option = 'data file', source_order = [('atc', 'return')])
144 )
145 version = _cfg.get(group = 'atc', option = 'version', source_order = [('atc', 'return')])
146 lang = _cfg.get(group = 'atc', option = 'language', source_order = [('atc', 'return')])
147 desc = _cfg.get(group = 'atc', option = 'description', source_order = [('atc', 'return')])
148 url = _cfg.get(group = 'atc', option = 'url', source_order = [('atc', 'return')])
149 name_long = _cfg.get(group = 'atc', option = 'long name', source_order = [('atc', 'return')])
150 name_short = _cfg.get(group = 'atc', option = 'short name', source_order = [('atc', 'return')])
151
152 _cfg.remove_source(source = 'atc')
153
154 _log.debug('importing ATC version [%s] (%s) from [%s]', version, lang, data_fname)
155
156 args = {'ver': version, 'desc': desc, 'url': url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
157
158
159 queries = [
160 {
161 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
162 'args': args
163 }, {
164 'cmd': u"""
165 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
166 %(name_long)s,
167 %(name_short)s,
168 %(ver)s,
169 %(desc)s,
170 %(lang)s,
171 %(url)s
172 )""",
173 'args': args
174 }, {
175 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
176 'args': args
177 }
178 ]
179 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
180 data_src_pk = rows[0][0]
181 _log.debug('ATC data source record created, pk is #%s', data_src_pk)
182
183
184 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
185 atc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = ",", quotechar = '"')
186
187
188 curs = conn.cursor()
189 cmd = u"""delete from ref.atc_staging"""
190 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
191 curs.close()
192 conn.commit()
193 _log.debug('ATC staging table emptied')
194
195
196 curs = conn.cursor()
197 cmd = u"""insert into ref.atc_staging values (%s, %s, %s, %s, %s, %s)"""
198 first = False
199 for loinc_line in atc_reader:
200
201 if not first:
202 first = True
203 continue
204
205 if loinc_line[0] + loinc_line[1] + loinc_line[2] + loinc_line[3] + loinc_line[4] == u'':
206 continue
207
208 comment = u''
209 ddd_val = u''
210 unit = u''
211 adro = u''
212
213
214 if regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*$', loinc_line[4]):
215 ddd_val, unit, adro = regex.split('\s', loinc_line[4])
216
217 elif regex.match('\d{,3},\d{,3}\s.{1,2}\s.(,.)*\s.+$', loinc_line[4]):
218 ddd_val, unit, adro, comment = regex.split('\s', loinc_line[4], 3)
219
220 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*$', loinc_line[4]):
221 ddd_val, unit, adro = regex.split('\s', loinc_line[4])
222
223 elif regex.match('\d{,3}\s.{1,2}\s.(,.)*\s.+$', loinc_line[4]):
224 ddd_val, unit, adro, comment = regex.split('\s', loinc_line[4], 3)
225
226 else:
227 comment = loinc_line[4]
228
229 args = [
230 loinc_line[0].strip(),
231 loinc_line[2],
232 ddd_val.replace(',', '.'),
233 unit,
234 adro,
235 comment
236 ]
237
238 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
239
240 curs.close()
241 conn.commit()
242 csv_file.close()
243 _log.debug('ATC staging table loaded')
244
245
246 curs = conn.cursor()
247 args = {'src_pk': data_src_pk}
248 cmd = u"""
249 insert into ref.atc (
250 fk_data_source,
251 code,
252 term,
253 comment,
254 ddd,
255 unit,
256 administration_route
257 ) select
258 %(src_pk)s,
259 atc,
260 name,
261 nullif(comment, ''),
262 nullif(ddd, '')::numeric,
263 nullif(unit, ''),
264 nullif(adro, '')
265
266 from
267 ref.atc_staging
268 """
269
270 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
271
272 curs.close()
273 conn.commit()
274 _log.debug('transfer from ATC staging table to real ATC table done')
275
276
277 curs = conn.cursor()
278 cmd = u"""delete from ref.atc_staging"""
279 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
280 curs.close()
281 conn.commit()
282 _log.debug('ATC staging table emptied')
283
284 return True
285
286
287
288 if __name__ == "__main__":
289
290 if len(sys.argv) == 1:
291 sys.exit()
292
293 if sys.argv[1] != 'test':
294 sys.exit()
295
296 from Gnumed.pycommon import gmLog2
297 from Gnumed.pycommon import gmI18N
298
299 gmI18N.activate_locale()
300
301
302
305
307 print 'searching ATC code for:', sys.argv[2]
308 print ' ', text2atc(sys.argv[2])
309 print ' ', text2atc(sys.argv[2], True)
310
312 print "searching for DDD on ATC:", sys.argv[2]
313 print atc2ddd(atc = sys.argv[2])
314
319
320
321
322 test_atc2ddd()
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349