1   
  2  """KhipuDB is the main interface to the database of khipus. 
  3   
  4  It contains code to query the database for various khipu properties, as well 
  5  as some code to interpret the information in the database or make it easier 
  6  to use.  For examples, see `Khipu.sorted_cords` and `Cord.numerical`. 
  7   
  8  A simple example of the use of KhipuDB:: 
  9   
 10   import KhipuDB 
 11   db = KhipuDB.KhipuDB ()  ## loads 'khipu.db' from wherever KhipuDB.py lives 
 12   db = KhipuDB.KhipuDB ('your_db') 
 13   ## not for faint of heart:  db.execute ('...arbitrary SQL command...') 
 14   khipu = db.khipu_with_investigator_num ('UR001') 
 15   print khipu.description ()  ## ASCII description of most text fields 
 16   print [cord.cord_length for cord in khipu.cords ()] 
 17   cord = khipu.cords () [11]  ## some random cord 
 18   print cord.short_string ()  ## 'S1 L9' 
 19   print cord.numerical ()  ## True 
 20   print cord.numerify ()  ## 19 
 21   
 22  """ 
 23  from __future__ import division 
 24  import cPickle as pickle, datetime, os, re 
 25   
 26   
 27  preferred_db = ['sqlite', 'mysql'] 
 28   
 29  """This allows the user to specify whether they prefer to use the sqlite or 
 30  mysql interface to the database. 
 31   
 32  Sqlite is local, but poorly indexed.  MySQL queries can run faster due 
 33  to better indexing, but the network imposes latency on each request. 
 34  """ 
 35   
 36  for dbmode in preferred_db: 
 37    try: 
 38      if dbmode == 'sqlite': 
 39        import sqlite3 as sqlite 
 40        assert sqlite.paramstyle == 'qmark' 
 41      elif dbmode == 'mysql': 
 42        import MySQLdb 
 43      else: 
 44        continue 
 45    except ImportError: 
 46      continue 
 47    else: 
 48      break 
 49  else: 
 50    raise ImportError ('Neither sqlite3 nor MySQL could be imported') 
 51   
 54   
 57      self.db = db 
 58      self.__dict__.update (data) 
  59    @classmethod 
 60 -  def make (Class, db, **fields): 
  61      c = db.cache.setdefault (Class.__name__, {}) 
 62      id = fields[Class.id_field] 
 63      if id not in c: 
 64        c[id] = Class (db, **fields) 
 65      return c[id] 
  66    @classmethod 
 68      c = db.cache.setdefault (Class.__name__, {}) 
 69      if id not in c: 
 70        all = Class.select (db, where = (Class.id_field, '=', id)) 
 71        if not all: return Class.none 
 72        c[id] = all[0] 
 73      return c[id] 
  74    @classmethod 
 75 -  def select (Class, db, join = None, where = None, order_by = None): 
  76      c = db.cache.setdefault (Class.__name__, {}) 
 77      if (join, where, order_by) not in c: 
 78        args = [] 
 79        query = 'SELECT ' + ', '.join ('`%s`' % field for field in Class.fields) 
 80        query += ' FROM `%s`' % Class.table 
 81        if join is not None: 
 82          query += ' JOIN `%s` ON `%s` = `%s`' % join 
 83        if where is not None: 
 84          def parse_where (where): 
 85            if isinstance (where, list): 
 86              return ' AND '.join (parse_where (piece for piece in where)) 
 87            elif isinstance (where, tuple): 
 88              if len (where) == 1: 
 89                return ' WHERE `%s`' % where[0] 
 90              elif len (where) == 2: 
 91                return ' WHERE `%s` %s' % (where[0], where[1]) 
 92              elif len (where) == 3: 
 93                args.append (where[2]) 
 94                return ' WHERE `%s` %s ?' % (where[0], where[1]) 
 95              else: 
 96                raise ValueError ('where too long: %r' % where) 
 97            else: 
 98              return ' WHERE %s' % where 
  99          query += parse_where (where) 
100        if order_by is not None: 
101          if isinstance (order_by, (list, tuple)): 
102            query += ' ORDER BY ' + ', '.join ('`%s`' % field for field in order_by) 
103          else: 
104            query += ' ORDER BY `%s`' % order_by 
105        c[join, where, order_by] = \ 
106          [Class.make (db, **dict ((Class.fields[i], row[i]) 
107                                   for i in range (len (Class.fields)))) 
108           for row in db.execute (query, args)] 
109      return c[join, where, order_by] 
111      d = dict (self.__dict__) 
112      if 'db' in d: del d['db'] 
113      return d 
 117      d = dict (self.__dict__) 
118      if 'db' in d: del d['db'] 
119      return '%s(%r, %s)' % (self.__class__, self.db, 
120        ', '.join ('%s=%r' % (key, value) for key, value in d.iteritems ())) 
 122      d = dict (self.__dict__) 
123      if 'db' in d: del d['db'] 
124      print '<%s %s>' % (self.__class__, 
125        ', '.join ('%s=%r' % (key, value) for key, value in d.iteritems ())) 
 126   
128 -  def __init__ (self, *args, **keywords): 
 131      return self.__class__.__name__ + '()' 
 133      return self.__class__.__name__ + '()' 
 136 -  def __init__ (self, table, key_field, value_field): 
 137      self.table = table 
138      self.key_field = key_field 
139      self.value_field = value_field 
 141      c = db.cache.setdefault (self.table, {}) 
142      if key not in c: 
143        answer = db.execute ('SELECT `%s` FROM `%s` WHERE `%s` = ?' % 
144                   (self.value_field, self.table, self.key_field), [key]) 
145        try: 
146          c[key] = answer.next () [0] 
147        except StopIteration: 
148          c[key] = None 
149      return c[key] 
 156 -class Khipu (DatabaseObject): 
 157    table = 'khipu_main' 
158    fields = ['investigator_num', 'khipu_id', 'museum_name', 'museum_num', 
159              'nickname', 'provenance', 'region', 'archive_num', 'museum_descr', 
160              'discovered_by', 'date_discovered', 'earliest_age', 'latest_age', 
161              'complete', 'duplicate_flag', 'duplicate_id'] 
162    id_field = 'khipu_id' 
166      complete = ('yes' if self.complete else 'no') 
167      if not self.duplicate_flag: 
168        duplicate = ['no'] 
169      else: 
170        duplicate = ['of %d' % self.duplicate_id] 
171       
172       
173      return '''KHIPU %s (internal id %s) 
174  Museum Name: %s 
175  Museum Number: %s 
176  Nickname: %s 
177   
178  Provenance: %s 
179  Region: %s 
180  Archive: %s - %s 
181  Museum Description: %s 
182   
183  Discovered By: %s 
184  Date Discovered: %s 
185  Age Range: %s -- %s 
186   
187  Complete: %s 
188  Duplicate: %s 
189   
190  %s 
191  Number of Cord Clusters: %s 
192  Number of Cords: %s 
193  ''' % (self.investigator_num, self.khipu_id, self.museum_name, self.museum_num, 
194         self.nickname or 'none', self.provenance, self.region, self.archive_num, 
195         self.archive_name (), self.museum_descr or 'none', 
196         self.discovered_by or 'unknown', 
197         (self.date_discovered if self.date_discovered != '0000-00-00' 
198                               else 'unknown'), 
199         self.earliest_age, self.latest_age, 
200         ('yes' if self.complete else 'no'), 
201         (self.duplicate_id if self.duplicate_flag else 'no'), 
202         self.primary_cord ().describe (), 
203         sum (1 for x in self.cord_clusters ()), 
204         sum (1 for x in self.cords ())) 
 206      return Cord.select (self.db, 
207        where = ('khipu_id', '=', self.khipu_id), order_by = 'cord_ordinal') 
 208       
214       
222      return self.db.execute ('SELECT MIN(`thickness`) FROM `cord` WHERE `thickness` != 0 AND `khipu_id` = ?', [self.khipu_id]).next ()[0] 
  227   
230  Khipu.NoneType = KhipuNone 
231  Khipu.none = Khipu.NoneType () 
232   
234    table = 'primary_cord' 
235    fields = ['pcord_id', 'structure', 'thickness', 
236               
237              'pcord_length', 'fiber', 'termination', 'beginning', 'twist'] 
238    id_field = 'pcord_id' 
246      return '''Primary Cord: 
247    Total Length: %s 
248    Beginning: %s - %s 
249    Termination: %s - %s 
250    Final Twist: %s 
251    Thickness: %s 
252    Fiber: %s - %s 
253  ''' % (self.pcord_length, self.beginning, self.beginning_descr (), 
254         self.termination, self.termination_descr (), self.twist, 
255         self.thickness, self.fiber, self.fiber_descr ()) 
 261      return CordCluster.select (self.db, 
262        join = ('cord', 'cord_cluster.cluster_id', '=', 'cord.cluster_id'), 
263        where = ('cord.pendant_from', '=', self.pcord_id), order_by = 'ordinal') 
 265      return Cord.select (self.db, 
266        where = ('pendant_from', '=', self.pcord_id), order_by = 'cord_ordinal') 
 272      return 'Primary Cord (%s) of Khipu %s (%s): length %s, beginning %s, termination %s, final twist %s, thickness %s, fiber %s' % (self.pcord_id, self.investigator_num (), self.khipu_id, self.pcord_length, self.beginning_descr (), self.termination_descr (), self.twist, self.thickness, self.fiber_descr ()) 
  273   
275    table = 'cord_cluster' 
276    fields = ['khipu_id', 'cord_id', 'cluster_id', 'ordinal', 'start_position', 
277              'end_position', 'num_cords', 'cluster_level', 'spacing', 
278               
279               
280             ] 
281    id_field = 'cluster_id' 
287      return Cord.select (self.db, where = ('cluster_id', '=', self.cluster_id), 
288                          order_by = 'cord_ordinal') 
 290      return url (self.khipu (), 'C%d' % self.cluster_id) 
 292      return '%s CordCluster (%s) of Khipu %s (%s): level %s from %s to %s spaced %s cm' % (self.num_cords, self.cluster_id, self.investigator_num (), self.khipu_id, self.cluster_level, self.start_position, self.end_position, self.spacing) 
  293   
294 -class Cord (DatabaseObject): 
 295    table = 'cord' 
296    fields = ['khipu_id', 'cord_id', 'twist_angle', 'thickness', 'fiber', 
297              'termination', 'twist', 'pendant_from', 
298               
299              'attachment_type', 
300              'cluster_id', 'cluster_ordinal', 'cord_ordinal', 'cord_length', 
301              'cord_level', 
302               
303             ] 
304    id_field = 'cord_id' 
316      return CordCluster.select (self.db, 
317        join = ('cord', 'cord_cluster.cluster_id', '=', 'cord.cluster_id'), 
318        where = ('cord.pendant_from', '=', self.cord_id), order_by = 'ordinal') 
 320      return Cord.select (self.db, 
321        where = ('pendant_from', '=', self.cord_id), order_by = 'cord_ordinal') 
 323      yield self 
324      for c in self.cords(): 
325        for cc in c.sorted_cords(): 
326          yield cc 
 328      return KnotCluster.select (self.db, 
329        where = ('cord_id', '=', self.cord_id), order_by = 'ordinal') 
 331      return Knot.select (self.db, 
332        where = ('cord_id', '=', self.cord_id), order_by = 'knot_ordinal') 
 334      cluster = self.cord_cluster () 
335      if cluster.num_cords == 1: 
336        assert self.cluster_ordinal == 1 
337        return cluster.start_position 
338      elif cluster.start_position == cluster.end_position: 
339        return cluster.start_position 
340      else: 
341        assert cluster.end_position 
342        return cluster.start_position + \ 
343          (cluster.end_position - cluster.start_position) * \ 
344          (self.cluster_ordinal - 1) / (cluster.num_cords - 1) 
 350      """Test whether the cord encodes a numerical value (approximate). 
351   
352      This function is inexact: it returns true for knot sequences that 
353      could be numbers, even if the knot spacing indicates otherwise. 
354      On the other hand, it has been conjectured that multiple numbers might 
355      be encoded on a single string, using the long knots as a separator; 
356      these sequences will be marked *non*-numerical by this function.""" 
357      return numerical_short_string (self.short_string ()) 
 359      """Convert the cord into a numerical value encodeed (approximate). 
360   
361      This function is *not* guaranteed to return the exact number encoded. 
362      We ignore "zeros" in the input string (which are encoded by gaps 
363      between knots in a fuzzy way, and therefore hard to make algorithmic), 
364      except for the last zero which we can detect because the ones digit is 
365      specially marked.  Thus the numbers 10, 100, and 1000 all return 10 
366      from `numerify`, although at least we can distinguish this equivalence 
367      group from 1 (because the ones digit is specially marked).  Similarly, 
368      the numbers 164, 1064, and 1604 all return 164 from `numerify`, 
369      although 1640 (and 16400) will return 1640. 
370   
371      Returns ``None`` if the string is not considered numerical 
372      according to `numerical`.""" 
373      return numerify_short_string (self.short_string ()) 
 375      return url (self.khipu (), 'c%d' % self.cord_id) 
 377      return '%s Cord (%s) of Khipu %s (%s): length %s, termination %s, final twist %s, thickness %s, fiber %s' % (self.short_string () or 'blank', self.cord_id, self.investigator_num (), self.khipu_id, self.cord_length, self.termination_descr (), self.twist or 'unknown', self.thickness, self.fiber_descr ()) 
 378   
380    table = 'knot_cluster' 
381    fields = ['cord_id', 'cluster_id', 'start_pos', 'end_pos', 'total_value', 
382              'ordinal', 'num_knots'] 
383    id_field = 'cluster_id' 
393      return Knot.select (self.db, 
394        where =  
395                 ('cluster_id', '=', self.cluster_id), 
396        order_by = 'knot_ordinal') 
 398      s = ''.join (knot.short_string () for knot in self.knots ()) 
399      if s.replace ('S', '') == '': 
400        return 'S%d' % len (s) 
401      return s 
 403      return url (self.khipu (), 'K%d' % self.cluster_id) 
  406   
407 -class Knot (DatabaseObject): 
 408    table = 'knot' 
409    fields = ['cord_id', 'type_code', 'direction', 'knot_value', 'cluster_id', 
410              'knot_id', 'knot_ordinal', 'cluster_ordinal', 'num_turns'] 
411    id_field = 'knot_id' 
423      cluster = self.knot_cluster () 
424      if cluster.num_knots == 1: 
425        assert self.cluster_ordinal == 1 
426        return cluster.start_pos 
427      elif cluster.start_pos == cluster.end_pos: 
428        return cluster.start_pos 
429      else: 
430        if not cluster.end_pos: return cluster.start_pos   
431        return cluster.start_pos + \ 
432          ((self.cluster_ordinal - 1) / (cluster.num_knots - 1)) * \ 
433          (cluster.end_pos - cluster.start_pos) 
 435      if self.num_turns: 
436        return '%s%d' % (self.type_code, self.num_turns) 
437      else: 
438        return self.type_code 
 440      return url (self.khipu (), 'k%d' % self.knot_id) 
  443   
446      if dbmode == 'sqlite' or filename is not None: 
447        if filename is None:  
448          filename = os.path.join (os.path.dirname (__file__), 'khipu.db') 
449        self.db = sqlite.connect (filename) 
450        self.dbmode = 'sqlite' 
451      else: 
452        import MySQLdb  
453        self.db = MySQLdb.connect(host='sql.mit.edu', user='khipu', 
454                                  passwd='c7bfe561', db='khipu+khipu') 
455        self.dbmode = 'mysql' 
456      self.filename = (filename if filename is not None else 'sql') + '.cache' 
457      self.load_cache () 
 461      if self.dbmode == 'sqlite': 
462        return self.db.execute (*args) 
463       
464      isSelect = len(args)>0 and args[0].lower().startswith('select') 
465      if len(args)>1: 
466        first = args[0].replace('?','%s') 
467        args = (first,) + args[1:] 
468      cursor = self.db.cursor() 
469      result = cursor.execute (*args) 
470      if isSelect: 
471        return iter(cursor.fetchall()) 
472      else: 
473        return result 
 475      return Khipu.select (self, order_by = 'investigator_num') 
 477      return Cord.select (self, order_by = 'cord_id') 
 481      return Knot.select (self, order_by = 'knot_id') 
 485      return iter (self.khipus ()) 
 487      return Khipu.select (self, where = ('investigator_num', '=', inum)) [0] 
 489      try: 
490        cache_file = open (self.filename, 'rb') 
491        self.cache = pickle.load (cache_file) 
492        if not self.cache: self.cache = {} 
493        cache_file.close () 
494      except (IOError, OSError, EOFError, pickle.PickleError, pickle.UnpicklingError): 
495        self.cache = {} 
496      fix_cache (self.cache, self) 
 498      cache_file = open (self.filename, 'wb') 
499      pickle.dump (self.cache, cache_file) 
500      cache_file.close () 
 503    if isinstance (cache, list): 
504      for x in cache: 
505        fix_cache (x, db) 
506    elif isinstance (cache, dict): 
507      for x in cache.itervalues (): 
508        fix_cache (x, db) 
509    elif isinstance (cache, (str, unicode, int, float)) or cache is None: 
510      pass 
511    else: 
512      cache.db = db 
 513   
514  numerical_re = r'^(S. )*(S.|L.|E)?$' 
515  numerical_rec = re.compile (numerical_re) 
517    """WARNING: Results are inexact; see `Cord.numerical` above.""" 
518    return bool (numerical_rec.search (short)) 
 519   
521    """WARNING: Results are inexact; see `Cord.numerify` above.""" 
522    if not numerical_short_string(short): 
523      return None 
524    short = re.sub (r'(S.)$', r'\1 L0', short) 
525    short = re.sub (r'E$', r'L1', short) 
526    short = re.sub (r'\s+', r'', short) 
527    short = re.sub (r'[SL]', r'', short) 
528    if short != '': 
529      return int (short) 
530    return 0 
 531   
532  url_map = { 
533    Cord: ("cords.html", "cords_by_short_string.html" ), 
534    Khipu: ("khipus.html", "khipus_by_short_string.html" ), 
535  } 
536   
537 -def url (kind, part = None):  
 538    if isinstance (kind, Khipu): 
539      kind = kind.investigator_num + '.svg' 
540    url = 'http://projects.csail.mit.edu/khipu/draw/' + kind 
541    if part is not None: url += '#' + part 
542    return url 
543   
545    """Prints out the Urton-style description string for the khipu with the 
546    investigator number given on the command line.""" 
547    import sys 
548    db = KhipuDB ('khipu.db') 
549    q = db.khipu_with_investigator_num (sys.argv[1]) 
550    print q.description () 
 551   
552  if __name__ == '__main__': main () 
553