1 """GNUmed German XDT parsing objects.
2
3 This encapsulates some of the XDT data into
4 objects for easy access.
5 """
6
7
8
9 __version__ = "$Revision: 1.33 $"
10 __author__ = "K.Hilbert, S.Hilbert"
11 __license__ = "GPL"
12
13 import os.path, sys, linecache, codecs, re as regex, time, datetime as pyDT, logging
14
15
16 import mx.DateTime as mxDT
17
18
19 if __name__ == '__main__':
20 sys.path.insert(0, '../../')
21 from Gnumed.pycommon import gmDateTime, gmTools
22 from Gnumed.business import gmXdtMappings, gmPerson
23
24
25 _log = logging.getLogger('gm.xdt')
26 _log.info(__version__)
27
28
33
35
36 f = codecs.open(filename=filename, mode='rU', encoding='utf8', errors='ignore')
37
38 file_encoding = None
39 for line in f:
40 field = line[3:7]
41 if field in gmXdtMappings._charset_fields:
42 _log.debug('found charset field [%s] in <%s>', field, filename)
43 val = line[7:8]
44 file_encoding = gmXdtMappings._map_field2charset[field][val]
45 _log.debug('encoding in file is "%s" (%s)', file_encoding, val)
46 break
47 f.close()
48
49 if file_encoding is None:
50 _log.debug('no encoding found in <%s>, assuming [%s]', filename, default_encoding)
51 return default_encoding
52
53 return file_encoding
54
56
57 _map_id2name = {
58 '3101': 'lastnames',
59 '3102': 'firstnames',
60 '3103': 'dob',
61 '3110': 'gender',
62 '3106': 'zipurb',
63 '3107': 'street',
64 '3112': 'zip',
65 '3113': 'urb',
66 '8316': 'source'
67 }
68
69 needed_fields = (
70 '3101',
71 '3102'
72 )
73
74 interesting_fields = _map_id2name.keys()
75
76 data = {}
77
78
79 if encoding is None:
80 encoding = determine_xdt_encoding(filename=filename)
81
82 xdt_file = codecs.open(filename=filename, mode='rU', encoding=encoding)
83
84 for line in xdt_file:
85
86
87
88
89
90 line = line.replace('\015','')
91 line = line.replace('\012','')
92
93
94 field = line[3:7]
95
96 if field in interesting_fields:
97 try:
98 already_seen = data[_map_id2name[field]]
99 break
100 except KeyError:
101 data[_map_id2name[field]] = line[7:]
102
103 xdt_file.close()
104
105
106 if len(data) < len(needed_fields):
107 raise ValueError('insufficient patient data in XDT file [%s], found only: %s' % (filename, data))
108
109 from Gnumed.business import gmPerson
110 dto = gmPerson.cDTO_person()
111
112 dto.firstnames = data['firstnames']
113 dto.lastnames = data['lastnames']
114
115
116
117 try:
118 dob = time.strptime(data['dob'], gmTools.coalesce(dob_format, '%d%m%Y'))
119 dto.dob = pyDT.datetime(dob.tm_year, dob.tm_mon, dob.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
120 except KeyError:
121 dto.dob = None
122
123 try:
124 dto.gender = gmXdtMappings.map_gender_xdt2gm[data['gender'].lower()]
125 except KeyError:
126 dto.gender = None
127
128 dto.zip = None
129 try:
130 dto.zip = regex.match('\d{5}', data['zipurb']).group()
131 except KeyError: pass
132 try:
133 dto.zip = data['zip']
134 except KeyError: pass
135
136 dto.urb = None
137 try:
138 dto.urb = regex.sub('\d{5} ', '', data['zipurb'])
139 except KeyError: pass
140 try:
141 dto.urb = data['urb']
142 except KeyError: pass
143
144 try:
145 dto.street = data['street']
146 except KeyError:
147 dto.street = None
148
149 try:
150 dto.source = data['source']
151 except KeyError:
152 dto.source = None
153
154 return dto
155
157
158 - def __init__(self, filename=None, encoding=None, override_encoding=False):
180
182
183 if self.__header is not None:
184 return self.__header
185
186 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding)
187 self.__header = []
188 for line in ldt_file:
189 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
190
191 if field == u'8000':
192 if content in [u'8202']:
193 break
194 self.__header.append(line)
195
196 ldt_file.close()
197 return self.__header
198
199 header = property(_get_header, lambda x:x)
200
202
203 if self.__tail is not None:
204 return self.__tail
205
206 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding)
207 self.__tail = []
208 in_tail = False
209 for line in ldt_file:
210 if in_tail:
211 self.__tail.append(line)
212 continue
213
214 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
215
216
217 if field == u'8000':
218 if content not in [u'8221']:
219 continue
220 in_tail = True
221 self.__tail.append(line)
222
223 ldt_file.close()
224 return self.__tail
225
226 tail = property(_get_tail, lambda x:x)
227
229
230 ldt_file = codecs.open(filename = self.filename, mode = 'rU', encoding = self.encoding)
231 out_file = None
232
233 in_patient = False
234 for line in ldt_file:
235
236 if in_patient:
237 out_file.write(line)
238 continue
239
240 length, field, content = line[:3], line[3:7], line[7:].replace('\015','').replace('\012','')
241
242
243 if field == u'8000':
244
245 if content == u'8202':
246 in_patient = True
247 if out_file is not None:
248 out_file.write(u''.join(self.tail))
249 out_file.close()
250
251 out_file.write(u''.join(self.header))
252 else:
253 in_patient = False
254 if out_file is not None:
255 out_file.write(u''.join(self.tail))
256 out_file.close()
257
258 if out_file is not None:
259 if not out_file.closed:
260 out_file.write(u''.join(self.tail))
261 out_file.close()
262
263 ldt_file.close()
264
265
266
268 pat_ids = []
269 pat_names = []
270 pats = {}
271
272
273 for line in fileinput.input(aFile):
274
275 line = line.replace('\015','')
276 line = line.replace('\012','')
277
278 field = line[3:7]
279
280 if field == '3000':
281 pat_id = line[7:]
282 if pat_id not in pat_ids:
283 pat_ids.append(pat_id)
284 continue
285
286 if field == '3101':
287 pat_name = line [7:]
288 if pat_name not in pat_names:
289 pat_names.append(pat_name)
290 pats[pat_id] = pat_name
291 continue
292 fileinput.close()
293
294 _log.debug("patients found: %s" % len(pat_ids))
295 return pats
296
297 -def get_pat_files(aFile, ID, name, patdir = None, patlst = None):
298 _log.debug("getting files for patient [%s:%s]" % (ID, name))
299 files = patlst.get(aGroup = "%s:%s" % (ID, name), anOption = "files")
300 _log.debug("%s => %s" % (patdir, files))
301 return [patdir, files]
302
304 content=[]
305 lineno = []
306
307
308
309 content = []
310 record_start_lines = []
311
312
313 for line in fileinput.input(aFile):
314 strippedline = line.replace('\015','')
315 strippedline = strippedline.replace('\012','')
316
317 if strippedline[3:7] == '8000':
318 record_start_lines.append(fileinput.filelineno())
319
320
321 for aline in record_start_lines:
322
323 line = linecache.getline(aFile,aline+2)
324
325 strippedline = line.replace('\015','')
326 strippedline = strippedline.replace('\012','')
327
328 field = strippedline[3:7]
329
330 if field == '3000':
331 ID = strippedline[7:]
332 line = linecache.getline(aFile,aline+3)
333
334 strippedline = line.replace('\015','')
335 strippedline = strippedline.replace('\012','')
336
337 field = strippedline[3:7]
338 if field == '3101':
339 name = strippedline [7:]
340 startline=aline
341 endline=record_start_lines[record_start_lines.index(aline)+1]
342 _log.debug("reading from%s" %str(startline)+' '+str(endline) )
343 for tmp in range(startline,endline):
344 content.append(linecache.getline(aFile,tmp))
345 _log.debug("reading %s"%tmp )
346 hashes = check_for_previous_records(ID,name,patlst)
347
348 data_hash = md5.new()
349 map(data_hash.update, content)
350 digest = data_hash.hexdigest()
351 if digest not in hashes:
352 pat_dir = cfg.get("xdt-viewer", "export-dir")
353 file = write_xdt_pat_data(content, pat_dir)
354 add_file_to_patlst(ID, name, patlst, file, ahash)
355 content = []
356 else:
357 continue
358
359 fileinput.close()
360 patlst.store()
361 return 1
362
364 tmpname = gmTools.get_unique_filename(prefix='', suffix = time.strftime(".%Y%m%d-%H%M%S", time.localtime()), tmp_dir=aDir)
365 path, fname = os.path.split(tmpname)
366 return fname
367
369 """write record for this patient to new file"""
370 pat_file = open(os.path.join(aDir, get_rand_fname(aDir)), "w")
371 map(pat_file.write, data)
372 pat_file.close()
373 return fname
374
376 anIdentity = "%s:%s" % (ID, name)
377 hashes = []
378
379 if anIdentity not in patlst.getGroups():
380 _log.debug("identity not yet in list" )
381 patlst.set(aGroup = anIdentity, anOption = 'files', aValue = [], aComment = '')
382
383 file_defs = patlst.get(aGroup = anIdentity, anOption = "files")
384 for line in file_defs:
385 file, ahash = line.split(':')
386 hashes.append(ahash)
387
388 return hashes
389
391 anIdentity = "%s:%s" % (ID, name)
392 files = patlst.get(aGroup = anIdentity, anOption = "files")
393 for file in new_files:
394 files.append("%s:%s" % (file, ahash))
395 _log.debug("files now there : %s" % files)
396 patlst.set(aGroup=anIdentity, anOption="files", aValue = files, aComment="")
397
398
399
400 if __name__ == "__main__":
401 from Gnumed.pycommon import gmI18N, gmLog2
402
403 root_log = logging.getLogger()
404 root_log.setLevel(logging.DEBUG)
405 _log = logging.getLogger('gm.xdt')
406
407
408 gmI18N.activate_locale()
409 gmI18N.install_domain()
410 gmDateTime.init()
411
412 ldt = cLDTFile(filename = sys.argv[1])
413 print "header:"
414 for line in ldt.header:
415 print line.encode('utf8', 'replace')
416 print "tail:"
417 for line in ldt.tail:
418 print line.encode('utf8', 'replace')
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551