1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL v2 or later
7 """
8
9 __version__ = "$Revision: 1.7 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
11
12 import sys, codecs, logging, csv
13
14
15 if __name__ == '__main__':
16 sys.path.insert(0, '../../')
17 from Gnumed.pycommon import gmPG2, gmTools
18
19
20 _log = logging.getLogger('gm.loinc')
21 _log.info(__version__)
22
23 origin_url = u'http://loinc.org'
24 file_encoding = 'latin1'
25 license_delimiter = u'Clip Here for Data'
26 version_tag = u'LOINC(R) Database Version'
27 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
28 name_short = u'LOINC'
29
30 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
31
32
34
35 cmd = u"""
36 select coalesce (
37 (select term
38 from ref.v_coded_terms
39 where
40 coding_system = 'LOINC'
41 and
42 code = %(loinc)s
43 and
44 lang = i18n.get_curr_lang()
45 ),
46 (select term
47 from ref.v_coded_terms
48 where
49 coding_system = 'LOINC'
50 and
51 code = %(loinc)s
52 and
53 lang = 'en_EN'
54 ),
55 (select term
56 from ref.v_coded_terms
57 where
58 coding_system = 'LOINC'
59 and
60 code = %(loinc)s
61 )
62 )
63 """
64 args = {'loinc': loinc}
65 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}])
66
67 return [ r[0] for r in rows ]
68
70
71 _log.debug('splitting LOINC source file [%s]', input_fname)
72
73 if license_fname is None:
74 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt')
75 _log.debug('LOINC header: %s', license_fname)
76
77 if data_fname is None:
78 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv')
79 _log.debug('LOINC data: %s', data_fname)
80
81 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
82 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
83
84 for line in loinc_file:
85
86 if license_delimiter in line:
87 out_file.write(line)
88 out_file.close()
89 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
90 continue
91
92 out_file.write(line)
93
94 out_file.close()
95
96 return data_fname, license_fname
97
99
100 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
101 first_line = csv_file.readline()
102 sniffer = csv.Sniffer()
103 if sniffer.has_header(first_line):
104 pass
105
107
108 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
109
110 version = None
111 for line in in_file:
112 if line.startswith(version_tag):
113 version = line[len(version_tag):].strip()
114 break
115
116 in_file.close()
117 return version
118
119 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
120
121 if version is None:
122 version = get_version(license_fname = license_fname)
123
124 if version is None:
125 raise ValueError('cannot detect LOINC version')
126
127 _log.debug('importing LOINC version [%s]', version)
128
129 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
130 desc = in_file.read()
131 in_file.close()
132
133 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
134
135
136 queries = [{
137 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
138 'args': args
139 }, {
140 'cmd': u"""
141 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
142 %(name_long)s,
143 %(name_short)s,
144 %(ver)s,
145 %(desc)s,
146 %(lang)s,
147 %(url)s
148 )""",
149 'args': args
150 }, {
151 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
152 'args': args
153 }]
154 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
155 data_src_pk = rows[0][0]
156 _log.debug('data source record created, pk is #%s', data_src_pk)
157
158
159 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
160 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
161
162
163 curs = conn.cursor()
164 cmd = u"""delete from ref.loinc_staging"""
165 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
166 curs.close()
167 conn.commit()
168 _log.debug('staging table emptied')
169
170
171 curs = conn.cursor()
172 cmd = u"""insert into ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
173 first = False
174 for loinc_line in loinc_reader:
175 if not first:
176 first = True
177 continue
178 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
179 curs.close()
180 conn.commit()
181 csv_file.close()
182 _log.debug('staging table loaded')
183
184
185 curs = conn.cursor()
186 args = {'src_pk': data_src_pk}
187 cmd = u"""
188 insert into ref.loinc (
189 fk_data_source,
190
191 term,
192
193 code,
194 comment,
195 component,
196 property,
197 time_aspect,
198 system,
199 scale_type,
200 method_type,
201 related_names_1_old,
202 grouping_class,
203 loinc_internal_source,
204 dt_last_change,
205 change_type,
206 answer_list,
207 code_status,
208 maps_to,
209 scope,
210 normal_range,
211 ipcc_units,
212 reference,
213 exact_component_synonym,
214 molar_mass,
215 grouping_class_type,
216 formula,
217 species,
218 example_answers,
219 acs_synonyms,
220 base_name,
221 final,
222 naa_ccr_id,
223 code_table,
224 is_set_root,
225 panel_elements,
226 survey_question_text,
227 survey_question_source,
228 units_required,
229 submitted_units,
230 related_names_2,
231 short_name,
232 order_obs,
233 cdisc_common_tests,
234 hl7_field_subfield_id,
235 external_copyright_notice,
236 example_units,
237 inpc_percentage,
238 long_common_name
239 )
240
241 select
242
243 %(src_pk)s,
244
245 coalesce (
246 nullif(long_common_name, ''),
247 (
248 coalesce(nullif(component, '') || ':', '') ||
249 coalesce(nullif(property, '') || ':', '') ||
250 coalesce(nullif(time_aspect, '') || ':', '') ||
251 coalesce(nullif(system, '') || ':', '') ||
252 coalesce(nullif(scale_type, '') || ':', '') ||
253 coalesce(nullif(method_type, '') || ':', '')
254 )
255 ),
256
257 nullif(loinc_num, ''),
258 nullif(comments, ''),
259 nullif(component, ''),
260 nullif(property, ''),
261 nullif(time_aspect, ''),
262 nullif(system, ''),
263 nullif(scale_type, ''),
264 nullif(method_type, ''),
265 nullif(related_names_1_old, ''),
266 nullif(class, ''),
267 nullif(source, ''),
268 nullif(dt_last_change, ''),
269 nullif(change_type, ''),
270 nullif(answer_list, ''),
271 nullif(status, ''),
272 nullif(map_to, ''),
273 nullif(scope, ''),
274 nullif(normal_range, ''),
275 nullif(ipcc_units, ''),
276 nullif(reference, ''),
277 nullif(exact_component_synonym, ''),
278 nullif(molar_mass, ''),
279 nullif(class_type, '')::smallint,
280 nullif(formula, ''),
281 nullif(species, ''),
282 nullif(example_answers, ''),
283 nullif(acs_synonyms, ''),
284 nullif(base_name, ''),
285 nullif(final, ''),
286 nullif(naa_ccr_id, ''),
287 nullif(code_table, ''),
288 nullif(is_set_root, '')::boolean,
289 nullif(panel_elements, ''),
290 nullif(survey_question_text, ''),
291 nullif(survey_question_source, ''),
292 nullif(units_required, ''),
293 nullif(submitted_units, ''),
294 nullif(related_names_2, ''),
295 nullif(short_name, ''),
296 nullif(order_obs, ''),
297 nullif(cdisc_common_tests, ''),
298 nullif(hl7_field_subfield_id, ''),
299 nullif(external_copyright_notice, ''),
300 nullif(example_units, ''),
301 nullif(inpc_percentage, ''),
302 nullif(long_common_name, '')
303
304 from
305 ref.loinc_staging
306 """
307
308 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
309
310 curs.close()
311 conn.commit()
312 _log.debug('transfer from staging table to real table done')
313
314
315 curs = conn.cursor()
316 cmd = u"""delete from ref.loinc_staging"""
317 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
318 curs.close()
319 conn.commit()
320 _log.debug('staging table emptied')
321
322 return True
323
324
325
326 if __name__ == "__main__":
327
328 from Gnumed.pycommon import gmLog2
329 from Gnumed.pycommon import gmI18N
330
331 gmI18N.activate_locale()
332
333
334
337
340
341 if (len(sys.argv)) > 1 and (sys.argv[1] == 'test'):
342
343 test_loinc_import()
344
345
346