1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL
7 """
8
9
10
11 __version__ = "$Revision: 1.7 $"
12 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
13
14 import sys, codecs, logging, csv
15
16
17 if __name__ == '__main__':
18 sys.path.insert(0, '../../')
19 from Gnumed.pycommon import gmPG2, gmTools
20
21
22 _log = logging.getLogger('gm.loinc')
23 _log.info(__version__)
24
25 origin_url = u'http://loinc.org'
26 file_encoding = 'latin1'
27 license_delimiter = u'Clip Here for Data'
28 version_tag = u'LOINC(R) Database Version'
29 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
30 name_short = u'LOINC'
31
32 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
33
34
36
37 cmd = u"""
38 select coalesce (
39 (select term
40 from ref.v_coded_terms
41 where
42 coding_system = 'LOINC'
43 and
44 code = %(loinc)s
45 and
46 lang = i18n.get_curr_lang()
47 ),
48 (select term
49 from ref.v_coded_terms
50 where
51 coding_system = 'LOINC'
52 and
53 code = %(loinc)s
54 and
55 lang = 'en_EN'
56 ),
57 (select term
58 from ref.v_coded_terms
59 where
60 coding_system = 'LOINC'
61 and
62 code = %(loinc)s
63 )
64 )
65 """
66 args = {'loinc': loinc}
67 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}])
68
69 return [ r[0] for r in rows ]
70
72
73 _log.debug('splitting LOINC source file [%s]', input_fname)
74
75 if license_fname is None:
76 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license', suffix = '.txt')
77 _log.debug('LOINC header: %s', license_fname)
78
79 if data_fname is None:
80 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data', suffix = '.csv')
81 _log.debug('LOINC data: %s', data_fname)
82
83 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
84 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
85
86 for line in loinc_file:
87
88 if license_delimiter in line:
89 out_file.write(line)
90 out_file.close()
91 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
92 continue
93
94 out_file.write(line)
95
96 out_file.close()
97
98 return data_fname, license_fname
99
101
102 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
103 first_line = csv_file.readline()
104 sniffer = csv.Sniffer()
105 if sniffer.has_header(first_line):
106 pass
107
109
110 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
111
112 version = None
113 for line in in_file:
114 if line.startswith(version_tag):
115 version = line[len(version_tag):].strip()
116 break
117
118 in_file.close()
119 return version
120
121 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
122
123 if version is None:
124 version = get_version(license_fname = license_fname)
125
126 if version is None:
127 raise ValueError('cannot detect LOINC version')
128
129 _log.debug('importing LOINC version [%s]', version)
130
131 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
132 desc = in_file.read()
133 in_file.close()
134
135 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
136
137
138 queries = [{
139 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
140 'args': args
141 }, {
142 'cmd': u"""
143 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
144 %(name_long)s,
145 %(name_short)s,
146 %(ver)s,
147 %(desc)s,
148 %(lang)s,
149 %(url)s
150 )""",
151 'args': args
152 }, {
153 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
154 'args': args
155 }]
156 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
157 data_src_pk = rows[0][0]
158 _log.debug('data source record created, pk is #%s', data_src_pk)
159
160
161 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
162 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
163
164
165 curs = conn.cursor()
166 cmd = u"""delete from ref.loinc_staging"""
167 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
168 curs.close()
169 conn.commit()
170 _log.debug('staging table emptied')
171
172
173 curs = conn.cursor()
174 cmd = u"""insert into ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
175 first = False
176 for loinc_line in loinc_reader:
177 if not first:
178 first = True
179 continue
180 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
181 curs.close()
182 conn.commit()
183 csv_file.close()
184 _log.debug('staging table loaded')
185
186
187 curs = conn.cursor()
188 args = {'src_pk': data_src_pk}
189 cmd = u"""
190 insert into ref.loinc (
191 fk_data_source,
192
193 term,
194
195 code,
196 comment,
197 component,
198 property,
199 time_aspect,
200 system,
201 scale_type,
202 method_type,
203 related_names_1_old,
204 grouping_class,
205 loinc_internal_source,
206 dt_last_change,
207 change_type,
208 answer_list,
209 code_status,
210 maps_to,
211 scope,
212 normal_range,
213 ipcc_units,
214 reference,
215 exact_component_synonym,
216 molar_mass,
217 grouping_class_type,
218 formula,
219 species,
220 example_answers,
221 acs_synonyms,
222 base_name,
223 final,
224 naa_ccr_id,
225 code_table,
226 is_set_root,
227 panel_elements,
228 survey_question_text,
229 survey_question_source,
230 units_required,
231 submitted_units,
232 related_names_2,
233 short_name,
234 order_obs,
235 cdisc_common_tests,
236 hl7_field_subfield_id,
237 external_copyright_notice,
238 example_units,
239 inpc_percentage,
240 long_common_name
241 )
242
243 select
244
245 %(src_pk)s,
246
247 coalesce (
248 nullif(long_common_name, ''),
249 (
250 coalesce(nullif(component, '') || ':', '') ||
251 coalesce(nullif(property, '') || ':', '') ||
252 coalesce(nullif(time_aspect, '') || ':', '') ||
253 coalesce(nullif(system, '') || ':', '') ||
254 coalesce(nullif(scale_type, '') || ':', '') ||
255 coalesce(nullif(method_type, '') || ':', '')
256 )
257 ),
258
259 nullif(loinc_num, ''),
260 nullif(comments, ''),
261 nullif(component, ''),
262 nullif(property, ''),
263 nullif(time_aspect, ''),
264 nullif(system, ''),
265 nullif(scale_type, ''),
266 nullif(method_type, ''),
267 nullif(related_names_1_old, ''),
268 nullif(class, ''),
269 nullif(source, ''),
270 nullif(dt_last_change, ''),
271 nullif(change_type, ''),
272 nullif(answer_list, ''),
273 nullif(status, ''),
274 nullif(map_to, ''),
275 nullif(scope, ''),
276 nullif(normal_range, ''),
277 nullif(ipcc_units, ''),
278 nullif(reference, ''),
279 nullif(exact_component_synonym, ''),
280 nullif(molar_mass, ''),
281 nullif(class_type, '')::smallint,
282 nullif(formula, ''),
283 nullif(species, ''),
284 nullif(example_answers, ''),
285 nullif(acs_synonyms, ''),
286 nullif(base_name, ''),
287 nullif(final, ''),
288 nullif(naa_ccr_id, ''),
289 nullif(code_table, ''),
290 nullif(is_set_root, '')::boolean,
291 nullif(panel_elements, ''),
292 nullif(survey_question_text, ''),
293 nullif(survey_question_source, ''),
294 nullif(units_required, ''),
295 nullif(submitted_units, ''),
296 nullif(related_names_2, ''),
297 nullif(short_name, ''),
298 nullif(order_obs, ''),
299 nullif(cdisc_common_tests, ''),
300 nullif(hl7_field_subfield_id, ''),
301 nullif(external_copyright_notice, ''),
302 nullif(example_units, ''),
303 nullif(inpc_percentage, ''),
304 nullif(long_common_name, '')
305
306 from
307 ref.loinc_staging
308 """
309
310 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
311
312 curs.close()
313 conn.commit()
314 _log.debug('transfer from staging table to real table done')
315
316
317 curs = conn.cursor()
318 cmd = u"""delete from ref.loinc_staging"""
319 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
320 curs.close()
321 conn.commit()
322 _log.debug('staging table emptied')
323
324 return True
325
326
327
328 if __name__ == "__main__":
329
330 from Gnumed.pycommon import gmLog2
331 from Gnumed.pycommon import gmI18N
332
333 gmI18N.activate_locale()
334
335
336
339
342
343 if (len(sys.argv)) > 1 and (sys.argv[1] == 'test'):
344
345 test_loinc_import()
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452