Package Gnumed :: Package business :: Module gmLOINC
[frames] | no frames]

Source Code for Module Gnumed.business.gmLOINC

  1  # -*- coding: utf8 -*- 
  2  """LOINC handling code. 
  3   
  4  http://loinc.org 
  5   
  6  license: GPL v2 or later 
  7  """ 
  8  #============================================================ 
  9  __version__ = "$Revision: 1.7 $" 
 10  __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>" 
 11   
 12  import sys, codecs, logging, csv 
 13   
 14   
 15  if __name__ == '__main__': 
 16          sys.path.insert(0, '../../') 
 17  from Gnumed.pycommon import gmPG2, gmTools 
 18   
 19   
 20  _log = logging.getLogger('gm.loinc') 
 21  _log.info(__version__) 
 22   
 23  origin_url = u'http://loinc.org' 
 24  file_encoding = 'latin1'                        # encoding is empirical 
 25  license_delimiter = u'Clip Here for Data' 
 26  version_tag = u'LOINC(R) Database Version' 
 27  name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)' 
 28  name_short = u'LOINC' 
 29   
 30  loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split() 
 31   
 32  #============================================================ 
33 -def loinc2info(loinc=None):
34 35 cmd = u""" 36 select coalesce ( 37 (select term 38 from ref.v_coded_terms 39 where 40 coding_system = 'LOINC' 41 and 42 code = %(loinc)s 43 and 44 lang = i18n.get_curr_lang() 45 ), 46 (select term 47 from ref.v_coded_terms 48 where 49 coding_system = 'LOINC' 50 and 51 code = %(loinc)s 52 and 53 lang = 'en_EN' 54 ), 55 (select term 56 from ref.v_coded_terms 57 where 58 coding_system = 'LOINC' 59 and 60 code = %(loinc)s 61 ) 62 ) 63 """ 64 args = {'loinc': loinc} 65 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}]) 66 67 return [ r[0] for r in rows ]
68 #============================================================
69 -def split_LOINCDBTXT(input_fname=None, data_fname=None, license_fname=None):
70 71 _log.debug('splitting LOINC source file [%s]', input_fname) 72 73 if license_fname is None: 74 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license-', suffix = '.txt') 75 _log.debug('LOINC header: %s', license_fname) 76 77 if data_fname is None: 78 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data-', suffix = '.csv') 79 _log.debug('LOINC data: %s', data_fname) 80 81 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace') 82 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace') 83 84 for line in loinc_file: 85 86 if license_delimiter in line: 87 out_file.write(line) 88 out_file.close() 89 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace') 90 continue 91 92 out_file.write(line) 93 94 out_file.close() 95 96 return data_fname, license_fname
97 #============================================================
98 -def map_field_names(data_fname='loinc_data.csv'):
99 100 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 101 first_line = csv_file.readline() 102 sniffer = csv.Sniffer() 103 if sniffer.has_header(first_line): 104 pass
105 #============================================================
106 -def get_version(license_fname='loinc_license.txt'):
107 108 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 109 110 version = None 111 for line in in_file: 112 if line.startswith(version_tag): 113 version = line[len(version_tag):].strip() 114 break 115 116 in_file.close() 117 return version
118 #============================================================
119 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
120 121 if version is None: 122 version = get_version(license_fname = license_fname) 123 124 if version is None: 125 raise ValueError('cannot detect LOINC version') 126 127 _log.debug('importing LOINC version [%s]', version) 128 129 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace') 130 desc = in_file.read() 131 in_file.close() 132 133 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang} 134 135 # create data source record 136 queries = [{ 137 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 138 'args': args 139 }, { 140 'cmd': u""" 141 insert into ref.data_source (name_long, name_short, version, description, lang, source) values ( 142 %(name_long)s, 143 %(name_short)s, 144 %(ver)s, 145 %(desc)s, 146 %(lang)s, 147 %(url)s 148 )""", 149 'args': args 150 }, { 151 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""", 152 'args': args 153 }] 154 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True) 155 data_src_pk = rows[0][0] 156 _log.debug('data source record created, pk is #%s', data_src_pk) 157 158 # import data 159 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace') 160 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"') 161 162 # clean out staging area 163 curs = conn.cursor() 164 cmd = u"""delete from ref.loinc_staging""" 165 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 166 curs.close() 167 conn.commit() 168 _log.debug('staging table emptied') 169 170 # from file into staging table 171 curs = conn.cursor() 172 cmd = u"""insert into ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1)) 173 first = False 174 for loinc_line in loinc_reader: 175 if not first: 176 first = True 177 continue 178 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}]) 179 curs.close() 180 conn.commit() 181 csv_file.close() 182 _log.debug('staging table loaded') 183 184 # from staging table to real table 185 curs = conn.cursor() 186 args = {'src_pk': data_src_pk} 187 cmd = u""" 188 insert into ref.loinc ( 189 fk_data_source, 190 191 term, 192 193 code, 194 comment, 195 component, 196 property, 197 time_aspect, 198 system, 199 scale_type, 200 method_type, 201 related_names_1_old, 202 grouping_class, 203 loinc_internal_source, 204 dt_last_change, 205 change_type, 206 answer_list, 207 code_status, 208 maps_to, 209 scope, 210 normal_range, 211 ipcc_units, 212 reference, 213 exact_component_synonym, 214 molar_mass, 215 grouping_class_type, 216 formula, 217 species, 218 example_answers, 219 acs_synonyms, 220 base_name, 221 final, 222 naa_ccr_id, 223 code_table, 224 is_set_root, 225 panel_elements, 226 survey_question_text, 227 survey_question_source, 228 units_required, 229 submitted_units, 230 related_names_2, 231 short_name, 232 order_obs, 233 cdisc_common_tests, 234 hl7_field_subfield_id, 235 external_copyright_notice, 236 example_units, 237 inpc_percentage, 238 long_common_name 239 ) 240 241 select 242 243 %(src_pk)s, 244 245 coalesce ( 246 nullif(long_common_name, ''), 247 ( 248 coalesce(nullif(component, '') || ':', '') || 249 coalesce(nullif(property, '') || ':', '') || 250 coalesce(nullif(time_aspect, '') || ':', '') || 251 coalesce(nullif(system, '') || ':', '') || 252 coalesce(nullif(scale_type, '') || ':', '') || 253 coalesce(nullif(method_type, '') || ':', '') 254 ) 255 ), 256 257 nullif(loinc_num, ''), 258 nullif(comments, ''), 259 nullif(component, ''), 260 nullif(property, ''), 261 nullif(time_aspect, ''), 262 nullif(system, ''), 263 nullif(scale_type, ''), 264 nullif(method_type, ''), 265 nullif(related_names_1_old, ''), 266 nullif(class, ''), 267 nullif(source, ''), 268 nullif(dt_last_change, ''), 269 nullif(change_type, ''), 270 nullif(answer_list, ''), 271 nullif(status, ''), 272 nullif(map_to, ''), 273 nullif(scope, ''), 274 nullif(normal_range, ''), 275 nullif(ipcc_units, ''), 276 nullif(reference, ''), 277 nullif(exact_component_synonym, ''), 278 nullif(molar_mass, ''), 279 nullif(class_type, '')::smallint, 280 nullif(formula, ''), 281 nullif(species, ''), 282 nullif(example_answers, ''), 283 nullif(acs_synonyms, ''), 284 nullif(base_name, ''), 285 nullif(final, ''), 286 nullif(naa_ccr_id, ''), 287 nullif(code_table, ''), 288 nullif(is_set_root, '')::boolean, 289 nullif(panel_elements, ''), 290 nullif(survey_question_text, ''), 291 nullif(survey_question_source, ''), 292 nullif(units_required, ''), 293 nullif(submitted_units, ''), 294 nullif(related_names_2, ''), 295 nullif(short_name, ''), 296 nullif(order_obs, ''), 297 nullif(cdisc_common_tests, ''), 298 nullif(hl7_field_subfield_id, ''), 299 nullif(external_copyright_notice, ''), 300 nullif(example_units, ''), 301 nullif(inpc_percentage, ''), 302 nullif(long_common_name, '') 303 304 from 305 ref.loinc_staging 306 """ 307 308 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}]) 309 310 curs.close() 311 conn.commit() 312 _log.debug('transfer from staging table to real table done') 313 314 # clean out staging area 315 curs = conn.cursor() 316 cmd = u"""delete from ref.loinc_staging""" 317 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}]) 318 curs.close() 319 conn.commit() 320 _log.debug('staging table emptied') 321 322 return True
323 #============================================================ 324 # main 325 #------------------------------------------------------------ 326 if __name__ == "__main__": 327 328 from Gnumed.pycommon import gmLog2 329 from Gnumed.pycommon import gmI18N 330 331 gmI18N.activate_locale() 332 # gmDateTime.init() 333 334 #--------------------------------------------------------
335 - def test_loinc_split():
336 split_LOINCDBTXT(input_fname = sys.argv[2])
337 #--------------------------------------------------------
338 - def test_loinc_import():
339 loinc_import(version = '2.26')
340 #-------------------------------------------------------- 341 if (len(sys.argv)) > 1 and (sys.argv[1] == 'test'): 342 #test_loinc_split() 343 test_loinc_import() 344 345 #============================================================ 346