* Committing all dirty work to CVS. We are migrating to GIT.

Beware: changes in this commit may or may not work properly.
2011-10-06 19:15:05 +00:00
parent 8ebf1e422e
commit 7017fdc6af
6 changed files with 326 additions and 4 deletions
--- a/db/tables.py
+++ b/db/tables.py
@@ -0,0 +1,191 @@
+# $Id: tables.py,v 1.1 2011-10-06 19:14:47 wirawan Exp $
+#
+# wpylib.db.tables module
+# Created: 20100223
+# Wirawan Purwanto
+#
+
+"""Simple table accessors for sqlite database."""
+
+import sys
+import os
+import os.path
+import time
+
+try:
+  import sqlite3
+except:
+  # For Python < 2.5:
+  import pysqlite2.dbapi2 as sqlite3
+
+
+# dtype map from python types to sqlite3 types:
+dtype_map = {
+  str: 'TEXT',
+  int: 'INTEGER',
+  float: 'REAL',
+}
+
+#
+simple_row_type = None # returns tuple
+indexable_row_type = sqlite3.Row
+
+
+class simple_table(object):
+  """Simple table with no primary key."""
+  dtypes_default = []
+  def __init__(self, src_name, table_name, dtypes=None):
+    self.src_name = src_name
+    self.table_name = table_name
+    if isinstance(src_name, str): # os.path.isfile(src_name):
+      self.db = sqlite3.connect(src_name)
+      self.dbc = self.db.cursor()
+    elif isinstance(src_name, sqlite3.Connection):
+      self.src_name = None
+      self.db = src_name
+      self.dbc = self.db.cursor()
+    else:
+      raise ValueError, "Invalid src_name data type"
+    self.db.text_factory = str
+    self.sql_params = {
+        'table_name': table_name,
+    }
+    self.debug = 1
+
+    create_sql = """\
+      CREATE TABLE IF NOT EXISTS '%(table_name)s' (
+        """ \
+        + ", ".join(["'%s' %s" % (dname, self.sqlite_dtype_map[dtyp])
+                     for (dname,dtyp) in self.dtypes_default + list(dtypes)
+                  ]) \
+        + """
+      );
+      """
+    self.exec_sql(create_sql)
+    self.db.commit()
+
+  def exec_sql(self, stmt, params=None):
+    sql_stmt = stmt % self.sql_params
+    if params:
+      if self.debug:
+        print "--SQL::", sql_stmt.rstrip()
+        print "--val::", params
+      return self.dbc.execute(sql_stmt, params)
+    else:
+      if self.debug:
+        print "--SQL::", sql_stmt.rstrip()
+      return self.dbc.execute(sql_stmt)
+
+  def add_fields(self, dtypes):
+    """Adds columns to the table."""
+    for (dname, dtyp) in dtypes:
+      self.exec_sql("ALTER TABLE '%(table_name)s' ADD COLUMN" \
+                    + " '%s' %s;" % (dname, self.sqlite_dtype_map[dtyp])
+                   )
+    self.db.commit()
+
+  def register_file(self, filename, replace=False, extra_values=None):
+    """Register a file, note its mtime, and size, and digests its content."""
+    filestats = get_file_stats(filename)
+    fields = [
+      ('md5sum', filestats['md5sum']),
+      ('date', filestats['mdate']),
+      ('time', filestats['mtime']),
+      ('size', filestats['size']),
+    ] + [
+      kwpair for kwpair in extra_values
+    ]
+    dnames = [ dname for (dname,dval) in fields ]
+    dvals = [ dval for (dname,dval) in fields ]
+
+    if replace:
+      # Test if we want to replace or to add.
+      count = [
+        x for x in self.exec_sql(
+          "SELECT count(*) from '%(table_name)s' where filename = ?;",
+          (filename,)
+        )
+      ][0][0]
+      if count == 0: replace = False
+
+    if replace:
+      # WARNING: This will replace all the occurences of the entry with
+      # the same filename.
+      # Replaceable insert is not intended for tables with duplicate entries
+      # of the same filename.
+      insert_sql = "UPDATE '%(table_name)s' SET " \
+        + ', '.join(["'%s' = ?" % d for d in dnames]) \
+        + " WHERE filename = ?;"
+      vals = tuple(dvals + [filename])
+    else:
+      insert_sql = "INSERT INTO '%(table_name)s' (filename, " \
+        + ", ".join(["'%s'" % d for d in dnames]) \
+        + ") VALUES (?" + ',?'*(len(fields)) + ");"
+      vals = tuple([filename] + dvals)
+    self.exec_sql(insert_sql, vals)
+
+  def flush(self):
+    self.db.commit()
+
+  def get_filenames(self):
+    """Reads all the file names in the table to memory."""
+    return [
+      rslt[0] for rslt in
+        self.exec_sql("SELECT filename FROM '%(table_name)s' ORDER BY filename;")
+    ]
+
+  def __getitem__(self, **criteria):
+    # Criteria could be SQL stmt
+    """Reads all the entries matching in the `filename' field."""
+    if filename.find("%") >= 0:
+      sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename LIKE ?;"
+    else:
+      sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename = ?;"
+    return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ]
+
+  def __setitem__(self, filename, newdata):
+    """Updates the metadata on the filename. Any other field than the filename
+    can be updated. The filename serves as a unique key here.
+    The newdata can be a hash, like this:
+
+       A_file_table[filename] = {'date': 20041201, 'time': 122144}
+
+    or a list of tuples:
+
+       A_file_table[filename] = [('date': 20041201), ('time': 122144)]
+    """
+    if isinstance(newdata, dict) or "keys" in dir(newdata):
+      dnames = newdata.keys()
+      dvals = [ newdata[k] for k in dnames ]
+    else:
+      # Assuming an iterable with ('field', 'value') tuples.
+      dnames = [ dname for (dname,dval) in newdata ]
+      dvals = [ dval for (dname,dval) in newdata ]
+    update_sql = "UPDATE '%(table_name)s' SET " \
+      + ', '.join(["'%s' = ?" % d for d in dnames]) \
+      + " WHERE filename = ?;"
+    vals = tuple(dvals + [filename])
+    self.exec_sql(update_sql, vals)
+
+  def __contains__(self, filename):
+    """Counts the number of record entries matching in the `filename' field."""
+    if filename.find("%") >= 0:
+      sql_stmt = "SELECT count(*) FROM '%(table_name)s' WHERE filename LIKE ?;"
+    else:
+      sql_stmt = "SELECT count(*) FROM '%(table_name)s' WHERE filename = ?;"
+    return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ][0][0]
+
+  count = __contains__
+
+  def fields(self):
+    """Returns the field names of the table of the latest query."""
+    return [ z[0] for z in self.dbc.description ]
+
+  def row_kind(self, kind=None):
+    if kind:
+      self.db.row_factory = kind
+      # We will reload the cursor to account for the new factory
+      self.dbc = self.db.cursor()
+    return self.db.row_factory
+
+
--- a/math/fft.py
+++ b/math/fft.py
@@ -1,4 +1,4 @@
-# $Id: fft.py,v 1.1 2010-02-24 14:27:23 wirawan Exp $
+# $Id: fft.py,v 1.2 2011-10-06 19:14:48 wirawan Exp $
 #
 # wpylib.math.fft module
 # Created: 20100205
@@ -37,7 +37,7 @@ The slice [gmin:gmax:gstep] will certainly result in an empty slice.
 To do this, we define two functions below.
 First, fft_grid_ranges1 generates the ranges for each dimension, then
 fft_grid_ranges itself generates all the combination of ranges (which cover
-all combinations of positive and ndgative frequency domains for all
+all combinations of positive and negative frequency domains for all
 dimensions.)

 For a (5x8) FFT grid, we will have
@@ -70,6 +70,57 @@ fft_grid_ranges = lambda Gmin, Gmax, Gstep : \
                  all_combinations(fft_grid_ranges1(Gmin, Gmax, Gstep))


+class fft_grid(object):
+  """A class describing a N-dimensional grid for plane wave
+  (or real-space) basis.
+  In this version, the grid is centered at (0,0,...) coordinate.
+  To actually create a grid, use the new_dens() method.
+  """
+  dtype = complex
+  def __init__(self, Gsize=None, Gmin=None, Gmax=None, dtype=None):
+    """Creates a new grid descriptor.
+    There are two possible methods, and you must choose either one for
+    initialization:
+    * Gsize = an N-dimensional array (list, tuple, ndarray) specifying
+      the number of grid points in each dimension.
+    or
+    * Gmin, Gmax = a pair of N-dimensional arrays (list, tuple, ndarray)
+      specifying the smallest (most negative) and largest (most positive)
+      coordinates in each dimension.
+      The grid size will be specified to fit this range.
+    """
+    from numpy import maximum
+    if Gsize != None:
+      self.Gsize = numpy.array(Gsize, dtype=int)
+      (self.Gmin, self.Gmax) = fft_grid_bounds(self.Gsize)
+    elif Gmin != None and Gmax != None:
+      self.Gmin = numpy.array(Gmin, dtype=int)
+      self.Gmax = numpy.array(Gmax, dtype=int)
+      # Figure out the minimum grid size to fit this data:
+      Gsize_min = abs(self.Gmin) * 2
+      Gsize_max = abs(self.Gmax) * 2 + (abs(self.Gmax) % 2)
+      Gsize_def = self.Gmax - self.Gmin + 1
+      self.Gsize = maximum(maximum(Gsize_min, Gsize_max), Gsize_def)
+    else:
+      raise ValueError, \
+        "Either Gsize or (Gmin,Gmax) parameters have to be specified."
+    if dtype != None:
+      self.dtype = dtype
+    self.ndim = len(self.Gsize)
+
+  def new_dens(self, zero=False, dtype=None):
+    """Creates a new N-dimensional array (grid)."""
+    if dtype == None: dtype = self.dtype
+    if zero:
+      return numpy.zeros(self.Gsize, dtype=dtype)
+    else:
+      return numpy.empty(self.Gsize, dtype=dtype)
+
+  def check_index(self, G):
+    """Check if an index is valid according to Gmin, Gmax boundary."""
+    return numpy.all(self.Gmin <= G) and numpy.all(G <= self.Gmax)
+
+
 def fft_r2g(dens):
  """Do real-to-G space transformation.
  According to our covention, this transformation gets the 1/Vol prefactor."""
--- a/math/linalg/init.py
+++ b/math/linalg/init.py
@@ -1,4 +1,4 @@
-# $Id: __init__.py,v 1.1 2011-07-14 19:00:59 wirawan Exp $
+# $Id: __init__.py,v 1.2 2011-10-06 19:14:49 wirawan Exp $
 #
 # wpylib.math.linalg main module
 # Created: 20110714
@@ -13,6 +13,12 @@ already provided by numpy.
 """

 import numpy
+import numpy.linalg
+
+# My favorites:
+from numpy import dot, trace
+from numpy.linalg import det, inv
+

 def matmul(*Mats):
  """Do successive matrix product. For example,
--- a/math/stats/linear_regression.py
+++ b/math/stats/linear_regression.py
@@ -0,0 +1,13 @@
+# $Id: linear_regression.py,v 1.1 2011-10-06 19:14:50 wirawan Exp $
+#
+# Module wpylib.math.stats.linear_regression
+#
+# Created: 20110414
+# Wirawan Purwanto
+#
+# Transcribed from my cp.inc's stats1.cpp
+
+class linreg(object):
+  """Class linreg provides my standard recipe for linear regression.
+  """
+
--- a/params/params_flat_test.py
+++ b/params/params_flat_test.py
@@ -1,4 +1,4 @@
-# $Id: params_flat_test.py,v 1.2 2011-09-09 18:58:48 wirawan Exp $
+# $Id: params_flat_test.py,v 1.3 2011-10-06 19:14:51 wirawan Exp $
 # 20100930

 from wpylib.params import flat as params
@@ -55,6 +55,10 @@ def test2b(**_opts_):
  print "new deltau = ", p.deltau


+def dump_objects():
+  """See what's in each dicts.
+  """
+  pass


 if __name__ == "__main__":
--- a/py/wrapper.py
+++ b/py/wrapper.py
@@ -0,0 +1,57 @@
+# $Id: wrapper.py,v 1.1 2011-10-06 19:15:05 wirawan Exp $
+#
+# wpylib.py.wrapper module
+# Created: 20110608
+# Wirawan Purwanto
+#
+# Wrapper base class.
+# Used for automatic wrapping of (especially) methods to
+# dispatch it to a host of object possibilities.
+#
+
+
+class wrapper_base(object):
+  """Wrapper or proxy object to provide uniform API to other routines,
+  etc.
+
+  This class allows dirty tricks such as injecting external functions
+  to accomplish certain required tasks in object-oriented manner.
+  If using external procedure, it must be callable with "self" as
+  its first argument.
+
+  Reserved attributes:
+  * _obj_ = the wrapped object
+  * _procnames_[:] = method names to wrap automatically.
+  * _obj_path_[:] = list of objects (instances) from which to look
+    for the methods.
+  * _set_obj_path_() = object method to define what objects to be
+    included in the object path (_obj_path_).
+
+  """
+  def __init__(self, obj):
+    """Creates a wrapper."""
+    self._obj_ = obj
+    if hasattr(self, '_set_obj_path_'):
+      self._set_obj_path_()
+    else:
+      self._obj_path_ = [ obj ]
+
+  def _autoset_proc_(self, procname, extproc=None):
+    from wpylib.py import make_unbound_method
+    from wpylib.py.im_weakref import im_ref
+    from weakref import ref
+
+    procname_ = procname + '_'
+    procname_proc = procname + '_proc'
+    if hasattr(self, procname_proc):
+      # In case the derived-class has the procedure, we will use
+      # that.
+      setattr(self, procname, im_ref(getattr(self, procname_proc)))
+    else:
+      for o in self._obj_path_:
+        if hasattr(o, procname):
+          setattr(self, procname, im_ref(getattr(o, procname)))
+          return
+      # May implement a global fallback hook here?
+      pass
+