* Added my tools to parse/process textual table, from Cr2 project.
This commit is contained in:
@@ -128,10 +128,10 @@ class text_input(object):
|
||||
return self.next_(self)
|
||||
|
||||
def seek_text(self, regex=None, match=None):
|
||||
'''Seeks the file until a particular piece text is encountered.
|
||||
"""Seeks the file until a particular piece text is encountered.
|
||||
We ignore all comments.
|
||||
The `regex' argument can be either a regex string or a standard python
|
||||
regular expression object.'''
|
||||
regular expression object."""
|
||||
|
||||
if regex:
|
||||
if isinstance(regex, basestring):
|
||||
@@ -349,3 +349,93 @@ def tail(filename, maxlines):
|
||||
pass
|
||||
return out[-maxlines:]
|
||||
|
||||
|
||||
|
||||
# More tools for extracting data from table-like text stream/string.
|
||||
|
||||
tbl_filter_num1_rx = re.compile('^\s*[-+]?(?:[0-9]+|[0-9]+\.|\.[0-9]+|[0-9]+\.[0-9]+)(?:[EeDd][-+]?[0-9]+)?')
|
||||
def tbl_filter_num1(flds, col=0, **args):
|
||||
"""Simple filter function: given a list of splitted text in `flds`,
|
||||
if the col-th field of the row is a numerical
|
||||
string, then it is a valid row; otherwise we will ignore this row.
|
||||
"""
|
||||
return tbl_filter_num1_rx.match(flds[col])
|
||||
|
||||
|
||||
def filter_table_text(T, filter=tbl_filter_num1, filter_args={}):
|
||||
"""Filters out irrelevant text (junk) from the table by commenting them out.
|
||||
Using the default filter, we assume that the target column (default==0)
|
||||
is a numerical value (usually a geometry value or a similar parameter).
|
||||
|
||||
Input:
|
||||
* T = a text table (a multi-line string, with the linebreaks)
|
||||
* filter = a filter function
|
||||
* filter_args = dict-style arguments for the filter function."""
|
||||
Lines = T.splitlines()
|
||||
for (i,L) in enumerate(Lines):
|
||||
F = L.split()
|
||||
if len(F) == 0:
|
||||
pass
|
||||
elif not F[0].startswith("#") and not filter(F, **filter_args):
|
||||
Lines[i] = "#" + L
|
||||
return "\n".join(Lines)
|
||||
|
||||
|
||||
class tbl_filter_num1_limited_range(object):
|
||||
"""Fancy filtering: Assume that the first column is numerical
|
||||
(e.g., rbond); and only include rows where this `rbond` fall
|
||||
within a given range.
|
||||
"""
|
||||
def __init__(self, rmin, rmax, col=0):
|
||||
self.rmin, self.rmax = rmin, rmax
|
||||
self.col = col
|
||||
def __call__(self, flds, **args):
|
||||
if tbl_filter_num1_rx.match(flds[self.col]):
|
||||
r = float(flds[0])
|
||||
return self.rmin <= r <= self.rmax
|
||||
else:
|
||||
return False
|
||||
def mk_table_filter(self):
|
||||
return lambda T: filter_table_text(T,filter=self)
|
||||
@classmethod
|
||||
def create(cls, rmin, rmax, col=0):
|
||||
o = cls(rmin, rmax, col=col)
|
||||
func = o.mk_table_filter()
|
||||
func.__name__ = "%s.create(%.4f,%.4f,%d)" \
|
||||
% (cls.__name__, rmin, rmax, col)
|
||||
return func
|
||||
|
||||
|
||||
def read_table(F, maps={}):
|
||||
"""Reads in a 2-D table from a text stream.
|
||||
Returns a list of lists containing the table content, in each cell by
|
||||
default as a string, unless a mapping function is provided (for simple
|
||||
data conversion only).
|
||||
|
||||
This is a legacy tool. It appears that numpy.genfromtxt can do what
|
||||
this tool can do, and better.
|
||||
You should probably check if numpy.genfromtxt can do the required job
|
||||
before using read_table/read_table_text provided in this module.
|
||||
"""
|
||||
rows = []
|
||||
comment_char = "#"
|
||||
for L in F:
|
||||
L = L.split(comment_char,1)[0]
|
||||
flds = L.split()
|
||||
if len(flds) == 0:
|
||||
continue
|
||||
if maps:
|
||||
for i in xrange(len(flds)):
|
||||
if i in maps:
|
||||
flds[i] = maps[i](flds[i])
|
||||
rows.append(flds)
|
||||
return rows
|
||||
|
||||
|
||||
def read_table_text(txt, maps={}):
|
||||
"""Reads in a 2-D table from a text stream.
|
||||
The text (as a whole string) is given in the txt argument.
|
||||
"""
|
||||
from StringIO import StringIO
|
||||
return read_table(StringIO(txt), maps)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user