* text_input.read_items(): added option end_line_match' and last_line_match'
(mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset.
This commit is contained in:
@@ -27,10 +27,25 @@ This module is part of wpylib project.
|
||||
import re
|
||||
import numpy
|
||||
|
||||
from wpylib.sugar import zip_gen
|
||||
from wpylib.file.file_utils import open_input_file
|
||||
from wpylib.py import make_unbound_instance_method
|
||||
import wpylib.py.im_weakref
|
||||
|
||||
def make_match_proc(match):
|
||||
"""Make matching procedure: simple string becomes regexp,
|
||||
regexp remains regexp, and other callable object is passed as is."""
|
||||
if isinstance(match, basestring):
|
||||
Regexp = re.compile(match)
|
||||
match_proc = lambda x: Regexp.search(x)
|
||||
elif hasattr(getattr(match, "search", None), "__call__"):
|
||||
Regexp = match
|
||||
match_proc = lambda x: Regexp.search(x)
|
||||
else:
|
||||
match_proc = match
|
||||
return match_proc
|
||||
|
||||
|
||||
class text_input(object):
|
||||
'''Text input reader with support for UNIX-style comment marker (#) and
|
||||
standard field separation (tabs and whitespaces).
|
||||
@@ -167,6 +182,7 @@ class text_input(object):
|
||||
If the tuple contains the third field, it is used as the name of the field;
|
||||
otherwise the fields are named f0, f1, f2, ....
|
||||
|
||||
Preliminary ability to read in complex data has been added!
|
||||
Complex data (floating-point only) must be specified as a tuple of two columns
|
||||
containing the real and imaginary data, like this:
|
||||
((2, 3), complex, 'ampl')
|
||||
@@ -177,8 +193,13 @@ class text_input(object):
|
||||
Additional keyword options:
|
||||
* deftype: default datatype
|
||||
* maxcount: maximum number of records to be read
|
||||
* end_line_match: a regular expression or test subroutine accepting a
|
||||
single argument (i.e. the text line) marking the end boundary of the list
|
||||
to be read (i.e. one line past the list contents)
|
||||
* last_line_match: a regular expression or test subroutine accepting a
|
||||
single argument (i.e. the text line) marking the last element of the list
|
||||
to be read
|
||||
|
||||
TODO: Needs ability to read in complex data.
|
||||
"""
|
||||
deftype = kwd.get("deftype", float)
|
||||
|
||||
@@ -226,7 +247,28 @@ class text_input(object):
|
||||
cols = reg.cols
|
||||
flds = reg.flds
|
||||
get_fields = lambda vals : tuple([ filt(vals,col) for (filt,col) in cols ])
|
||||
|
||||
if "maxcount" in kwd:
|
||||
src_iter = zip_gen(xrange(kwd['maxcount']),self)
|
||||
else:
|
||||
src_iter = enumerate(self)
|
||||
# FIXME below: zip() evaluates the function before the loop, thus may
|
||||
# eat a lot of memory.
|
||||
if 'end_line_match' in kwd:
|
||||
rslt = []
|
||||
match = make_match_proc(kwd['end_line_match'])
|
||||
for (c,vals) in src_iter:
|
||||
if match(vals):
|
||||
break
|
||||
rslt.append(get_fields(vals.split()))
|
||||
elif 'last_line_match' in kwd:
|
||||
rslt = []
|
||||
match = make_match_proc(kwd['end_line_match'])
|
||||
for (c,vals) in src_iter:
|
||||
rslt.append(get_fields(vals.split()))
|
||||
if match(vals):
|
||||
break
|
||||
elif "maxcount" in kwd:
|
||||
#print "hello"
|
||||
rslt = [ get_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user