* Simple text input reader. Imported from pyqmc.utils.text_input .
This commit is contained in:
211
iofmt/text_input.py
Normal file
211
iofmt/text_input.py
Normal file
@@ -0,0 +1,211 @@
|
||||
#!/usr/bin/python
|
||||
# $Id: text_input.py,v 1.1 2010-09-27 19:54:05 wirawan Exp $
|
||||
#
|
||||
# wpylib.iofmt.text_input module
|
||||
# Quick-n-dirty text input utilities
|
||||
#
|
||||
# Wirawan Purwanto
|
||||
# Created: 20090601
|
||||
#
|
||||
# Routines put here are commonly used in my own scripts.
|
||||
# They are not necessarily suitable for general-purpose uses; evaluate
|
||||
# your needs and see if they can them as well.
|
||||
#
|
||||
# 20090601: Created as pyqmc.utils.text_input .
|
||||
# 20100927: Moved to wpylib.iofmt.text_input .
|
||||
#
|
||||
# TODO
|
||||
# - book-keep the line number. Also note superfile must have its own line
|
||||
# number keeping.
|
||||
#
|
||||
"""
|
||||
Simple text-based input reader.
|
||||
|
||||
This module is part of wpylib project.
|
||||
"""
|
||||
|
||||
import re
|
||||
import numpy
|
||||
|
||||
from wpylib.file.file_utils import open_input_file
|
||||
|
||||
|
||||
class text_input(object):
|
||||
'''Text input reader with support for UNIX-style comment marker (#) and
|
||||
standard field separation (tabs and whitespaces).
|
||||
Used for quick and dirty data reading (iterating only once in forward
|
||||
direction without the need of rewinding or skipping).
|
||||
This object can be treated like an input file, e.g. used as an iterator,
|
||||
etc.
|
||||
|
||||
To support more fancy options (e.g., rewinding), use "superize=1" when
|
||||
creating the instance.'''
|
||||
|
||||
def __init__(self, fname, **opts):
|
||||
if opts.get("superize", 0):
|
||||
open_opts = { "superize" : opts["superize"] }
|
||||
del opts["superize"]
|
||||
else:
|
||||
open_opts = {}
|
||||
self.file = open_input_file(fname, **open_opts)
|
||||
# field_filtering_proc field can be used to filter unwanted fields, or do
|
||||
# some additional transformations before final feed to the main iteration.
|
||||
self.field_filtering_proc = lambda flds : flds
|
||||
# Default fancy options:
|
||||
self.skip_blank_lines = True
|
||||
if len(opts) > 0:
|
||||
self.set_options(**opts)
|
||||
|
||||
def __del__(self):
|
||||
if getattr(self, "file", None):
|
||||
self.file.close()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
"""
|
||||
def next(self):
|
||||
while True:
|
||||
L = self.file.next()
|
||||
F = self.field_filtering_proc(L.split("#")[0].split())
|
||||
if len(F) > 0:
|
||||
return F
|
||||
"""
|
||||
|
||||
def next_rec(self):
|
||||
'''Yields the next record, which is already separated into fields.'''
|
||||
while True:
|
||||
L = self.file.next()
|
||||
F = self.field_filtering_proc(L.split("#")[0].split())
|
||||
if len(F) > 0 or not self.skip_blank_lines:
|
||||
return F
|
||||
|
||||
def next_line(self):
|
||||
'''Yields the next line, which is already separated into fields.'''
|
||||
while True:
|
||||
L = self.file.next()
|
||||
F = self.field_filtering_proc(L.split("#")[0].rstrip())
|
||||
if len(F) > 0 or not self.skip_blank_lines:
|
||||
return F
|
||||
|
||||
# Do NOT touch the "next" field below unless you know what you're doing:
|
||||
next = next_line
|
||||
|
||||
def seek_text(self, regex=None, match=None):
|
||||
'''Seeks the file until a particular piece text is encountered.
|
||||
We ignore all comments.
|
||||
The `regex' argument can be either a regex string or a standard python
|
||||
regular expression object.'''
|
||||
|
||||
if regex:
|
||||
if isinstance(regex, str):
|
||||
Regexp = re.compile(regex)
|
||||
else:
|
||||
Regexp = regex
|
||||
match_proc = lambda x: Regexp.search(x)
|
||||
else:
|
||||
match_proc = match
|
||||
|
||||
while True:
|
||||
L = self.next_line()
|
||||
if match_proc(L):
|
||||
return L
|
||||
|
||||
|
||||
def read_floats(self, *cols, **kwd):
|
||||
"""Quickly reads a set of floats from a text file.
|
||||
Returns a numpy array of the values in double precision.
|
||||
|
||||
Example usage:
|
||||
>>> arr = text_input("/tmp/file.txt").read_floats(0, 2, 3)
|
||||
to read columns 1, 3, and 4 of the text file /tmp/file.txt, while disregarding
|
||||
comments.
|
||||
"""
|
||||
# float_fields extracts the desired columns and converts them to floats
|
||||
float_fields = lambda vals : [ float(vals[col]) for col in cols ]
|
||||
if "maxcount" in kwd:
|
||||
rslt = [ float_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]
|
||||
else:
|
||||
rslt = [ float_fields(vals.split()) for vals in self ]
|
||||
# finally convert them to a numpy ndarray:
|
||||
return numpy.array(rslt)
|
||||
|
||||
def read_items(self, *col_desc, **kwd):
|
||||
"""Quickly reads a set of items from records of whitespace-separated fields
|
||||
in a text file.
|
||||
Returns a structured numpy array of the values read.
|
||||
|
||||
Example usage:
|
||||
|
||||
>>> arr = text_input("/tmp/file.txt").read_items(0, (2, int), (3, "S10", "Atom"))
|
||||
|
||||
reads columns 1 (as floats, by default), 3 (as integers), and 4 (as strings of
|
||||
max length of 10, which field is named "Atom") from the text file /tmp/file.txt,
|
||||
while disregarding comments.
|
||||
|
||||
If the tuple contains the third field, it is used as the name of the field;
|
||||
otherwise the fields are named f0, f1, f2, ....
|
||||
|
||||
Additional keyword options:
|
||||
* deftype: default datatype
|
||||
* maxcount: maximum number of records to be read
|
||||
|
||||
TODO: Needs ability to read in complex data.
|
||||
"""
|
||||
deftype = kwd.get("deftype", float)
|
||||
|
||||
# float_fields extracts the desired columns and converts them to floats
|
||||
flds = []
|
||||
cols = []
|
||||
for (i,c) in zip(xrange(len(col_desc)), col_desc):
|
||||
if type(c) == int:
|
||||
cols.append(c)
|
||||
flds.append(('f' + str(i), deftype))
|
||||
elif len(c) == 1:
|
||||
cols.append(c[0])
|
||||
flds.append(('f' + str(i), deftype))
|
||||
elif len(c) == 2:
|
||||
cols.append(c[0])
|
||||
flds.append(('f' + str(i), c[1]))
|
||||
elif len(c) == 3:
|
||||
cols.append(c[0])
|
||||
flds.append((c[2], c[1]))
|
||||
|
||||
#print cols
|
||||
#print flds
|
||||
get_fields = lambda vals : tuple([ vals[col] for col in cols ])
|
||||
if "maxcount" in kwd:
|
||||
#print "hello"
|
||||
rslt = [ get_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]
|
||||
else:
|
||||
rslt = [ get_fields(vals.split()) for vals in self ]
|
||||
#print rslt
|
||||
# finally convert them to a numpy ndarray:
|
||||
return numpy.array(rslt, dtype=flds)
|
||||
|
||||
# Sets fancy options
|
||||
def set_options(self, **opts):
|
||||
for (o,v) in opts.iteritems():
|
||||
if o == "expand_errorbar":
|
||||
self.expand_errorbar(v)
|
||||
if o == "skip_blank_lines":
|
||||
self.skip_blank_lines = v
|
||||
else:
|
||||
raise "ValueError", "Invalid option: %s" % (o,)
|
||||
return self
|
||||
|
||||
# Option for errorbar expansion:
|
||||
def expand_errorbar(self, v=True):
|
||||
'''Enables or disables errorbar expansion.'''
|
||||
if v:
|
||||
self.opt_expand_errorbar = True
|
||||
self.field_filtering_proc = self.expand_errorbar_hook
|
||||
else:
|
||||
self.opt_expand_errorbar = False
|
||||
self.field_filtering_proc = lambda flds : flds
|
||||
return self
|
||||
|
||||
def expand_errorbar_hook(self, F):
|
||||
# A hook for field_filtering_proc for expanding errorbars:
|
||||
from pyqmc.stats.errorbar import expand
|
||||
return expand(F, flatten=True)
|
||||
Reference in New Issue
Block a user