Module KhipuDB
[hide private]
[frames] | no frames]

Source Code for Module KhipuDB

  1  #!/usr/bin/python2.5 
  2  """KhipuDB is the main interface to the database of khipus. 
  3   
  4  It contains code to query the database for various khipu properties, as well 
  5  as some code to interpret the information in the database or make it easier 
  6  to use.  For examples, see `Khipu.sorted_cords` and `Cord.numerical`. 
  7   
  8  A simple example of the use of KhipuDB:: 
  9   
 10   import KhipuDB 
 11   db = KhipuDB.KhipuDB ()  ## loads 'khipu.db' from wherever KhipuDB.py lives 
 12   db = KhipuDB.KhipuDB ('your_db') 
 13   ## not for faint of heart:  db.execute ('...arbitrary SQL command...') 
 14   khipu = db.khipu_with_investigator_num ('UR001') 
 15   print khipu.description ()  ## ASCII description of most text fields 
 16   print [cord.cord_length for cord in khipu.cords ()] 
 17   cord = khipu.cords () [11]  ## some random cord 
 18   print cord.short_string ()  ## 'S1 L9' 
 19   print cord.numerical ()  ## True 
 20   print cord.numerify ()  ## 19 
 21   
 22  """ 
 23  from __future__ import division 
 24  import cPickle as pickle, datetime, os, re 
 25   
 26  ## Set your database type preference.  mysql is remote. 
 27  preferred_db = ['sqlite', 'mysql'] 
 28  #preferred_db = ['mysql', 'sqlite'] 
 29  """This allows the user to specify whether they prefer to use the sqlite or 
 30  mysql interface to the database. 
 31   
 32  Sqlite is local, but poorly indexed.  MySQL queries can run faster due 
 33  to better indexing, but the network imposes latency on each request. 
 34  """ 
 35   
 36  for dbmode in preferred_db: 
 37    try: 
 38      if dbmode == 'sqlite': 
 39        import sqlite3 as sqlite 
 40        assert sqlite.paramstyle == 'qmark' 
 41      elif dbmode == 'mysql': 
 42        import MySQLdb 
 43      else: 
 44        continue 
 45    except ImportError: 
 46      continue 
 47    else: 
 48      break 
 49  else: 
 50    raise ImportError ('Neither sqlite3 nor MySQL could be imported') 
 51   
52 -class MissingPart (Exception): pass
53 -class MissingPrimaryCord (MissingPart): pass
54
55 -class DatabaseObject:
56 - def __init__ (self, db, **data):
57 self.db = db 58 self.__dict__.update (data)
59 @classmethod
60 - def make (Class, db, **fields):
61 c = db.cache.setdefault (Class.__name__, {}) 62 id = fields[Class.id_field] 63 if id not in c: 64 c[id] = Class (db, **fields) 65 return c[id]
66 @classmethod
67 - def with_id (Class, db, id):
68 c = db.cache.setdefault (Class.__name__, {}) 69 if id not in c: 70 all = Class.select (db, where = (Class.id_field, '=', id)) 71 if not all: return Class.none 72 c[id] = all[0] 73 return c[id]
74 @classmethod
75 - def select (Class, db, join = None, where = None, order_by = None):
76 c = db.cache.setdefault (Class.__name__, {}) 77 if (join, where, order_by) not in c: 78 args = [] 79 query = 'SELECT ' + ', '.join ('`%s`' % field for field in Class.fields) 80 query += ' FROM `%s`' % Class.table 81 if join is not None: 82 query += ' JOIN `%s` ON `%s` = `%s`' % join 83 if where is not None: 84 def parse_where (where): 85 if isinstance (where, list): 86 return ' AND '.join (parse_where (piece for piece in where)) 87 elif isinstance (where, tuple): 88 if len (where) == 1: 89 return ' WHERE `%s`' % where[0] 90 elif len (where) == 2: 91 return ' WHERE `%s` %s' % (where[0], where[1]) 92 elif len (where) == 3: 93 args.append (where[2]) 94 return ' WHERE `%s` %s ?' % (where[0], where[1]) 95 else: 96 raise ValueError ('where too long: %r' % where) 97 else: 98 return ' WHERE %s' % where
99 query += parse_where (where) 100 if order_by is not None: 101 if isinstance (order_by, (list, tuple)): 102 query += ' ORDER BY ' + ', '.join ('`%s`' % field for field in order_by) 103 else: 104 query += ' ORDER BY `%s`' % order_by 105 c[join, where, order_by] = \ 106 [Class.make (db, **dict ((Class.fields[i], row[i]) 107 for i in range (len (Class.fields)))) 108 for row in db.execute (query, args)] 109 return c[join, where, order_by]
110 - def __getstate__ (self):
111 d = dict (self.__dict__) 112 if 'db' in d: del d['db'] 113 return d
114 - def __setstate__ (self, d):
115 self.__dict__.update (d)
116 - def __repr__ (self):
117 d = dict (self.__dict__) 118 if 'db' in d: del d['db'] 119 return '%s(%r, %s)' % (self.__class__, self.db, 120 ', '.join ('%s=%r' % (key, value) for key, value in d.iteritems ()))
121 - def __str__ (self):
122 d = dict (self.__dict__) 123 if 'db' in d: del d['db'] 124 print '<%s %s>' % (self.__class__, 125 ', '.join ('%s=%r' % (key, value) for key, value in d.iteritems ()))
126
127 -class NoneType:
128 - def __init__ (self, *args, **keywords):
129 pass
130 - def __str__ (self):
131 return self.__class__.__name__ + '()'
132 - def __repr__ (self):
133 return self.__class__.__name__ + '()'
134
135 -class DescriptionTable:
136 - def __init__ (self, table, key_field, value_field):
137 self.table = table 138 self.key_field = key_field 139 self.value_field = value_field
140 - def lookup (self, db, key):
141 c = db.cache.setdefault (self.table, {}) 142 if key not in c: 143 answer = db.execute ('SELECT `%s` FROM `%s` WHERE `%s` = ?' % 144 (self.value_field, self.table, self.key_field), [key]) 145 try: 146 c[key] = answer.next () [0] 147 except StopIteration: 148 c[key] = None 149 return c[key]
150 151 archive_dc = DescriptionTable ('archive_dc', 'archive_num', 'archive_name') 152 beginning_dc = DescriptionTable ('beginning_dc', 'beg_cd', 'beginning_descr') 153 termination_dc = DescriptionTable ('termination_dc', 'term_cd', 'termination_descr') 154 fiber_dc = DescriptionTable ('fiber_dc', 'fiber_cd', 'fiber_descr') 155
156 -class Khipu (DatabaseObject):
157 table = 'khipu_main' 158 fields = ['investigator_num', 'khipu_id', 'museum_name', 'museum_num', 159 'nickname', 'provenance', 'region', 'archive_num', 'museum_descr', 160 'discovered_by', 'date_discovered', 'earliest_age', 'latest_age', 161 'complete', 'duplicate_flag', 'duplicate_id'] 162 id_field = 'khipu_id'
163 - def archive_name (self):
164 return archive_dc.lookup (self.db, self.archive_num)
165 - def description (self):
166 complete = ('yes' if self.complete else 'no') 167 if not self.duplicate_flag: 168 duplicate = ['no'] 169 else: 170 duplicate = ['of %d' % self.duplicate_id] 171 #data = tuple ((x if x and x != '0000-00-00' else 'none') 172 # for x in data) 173 return '''KHIPU %s (internal id %s) 174 Museum Name: %s 175 Museum Number: %s 176 Nickname: %s 177 178 Provenance: %s 179 Region: %s 180 Archive: %s - %s 181 Museum Description: %s 182 183 Discovered By: %s 184 Date Discovered: %s 185 Age Range: %s -- %s 186 187 Complete: %s 188 Duplicate: %s 189 190 %s 191 Number of Cord Clusters: %s 192 Number of Cords: %s 193 ''' % (self.investigator_num, self.khipu_id, self.museum_name, self.museum_num, 194 self.nickname or 'none', self.provenance, self.region, self.archive_num, 195 self.archive_name (), self.museum_descr or 'none', 196 self.discovered_by or 'unknown', 197 (self.date_discovered if self.date_discovered != '0000-00-00' 198 else 'unknown'), 199 self.earliest_age, self.latest_age, 200 ('yes' if self.complete else 'no'), 201 (self.duplicate_id if self.duplicate_flag else 'no'), 202 self.primary_cord ().describe (), 203 sum (1 for x in self.cord_clusters ()), 204 sum (1 for x in self.cords ()))
205 - def cords (self):
206 return Cord.select (self.db, 207 where = ('khipu_id', '=', self.khipu_id), order_by = 'cord_ordinal')
208 #select `cord_id`, `pendant_from`, `cluster_id` from `cord` where `khipu_id` == 1000023
209 - def sorted_cords (self):
210 return self.primary_cord().sorted_cords()
211 - def cord_clusters (self):
212 return CordCluster.select (self.db, 213 where = ('khipu_id', '=', self.khipu_id), order_by = 'ordinal')
214 #select `ordinal`, `num_cords`, `start_position`, `end_position`, `spacing`, `CLUSTER_LEVEL` from `cord_cluster` where `khipu_id` == 1000023
215 - def primary_cord (self):
216 try: 217 return PrimaryCord.select (self.db, 218 where = ('khipu_id', '=', self.khipu_id)) [0] 219 except (StopIteration, IndexError): 220 raise MissingPrimaryCord ('%s (%s)' % (self.investigator_num, self.khipu_id))
221 - def min_nonzero_thickness (self):
222 return self.db.execute ('SELECT MIN(`thickness`) FROM `cord` WHERE `thickness` != 0 AND `khipu_id` = ?', [self.khipu_id]).next ()[0]
223 - def nice_str (self):
224 return 'Khipu %s (%s)' % (self.investigator_num, self.khipu_id)
225 - def url (self):
226 return url (self)
227
228 -class KhipuNone (NoneType, Khipu):
229 investigator_num = 'None'
230 Khipu.NoneType = KhipuNone 231 Khipu.none = Khipu.NoneType () 232
233 -class PrimaryCord (DatabaseObject):
234 table = 'primary_cord' 235 fields = ['pcord_id', 'structure', 'thickness', 236 #'attached_to', ## always zero 237 'pcord_length', 'fiber', 'termination', 'beginning', 'twist'] 238 id_field = 'pcord_id'
239 - def beginning_descr (self):
240 return beginning_dc.lookup (self.db, self.beginning)
241 - def termination_descr (self):
242 return termination_dc.lookup (self.db, self.termination)
243 - def fiber_descr (self):
244 return fiber_dc.lookup (self.db, self.fiber)
245 - def describe (self):
246 return '''Primary Cord: 247 Total Length: %s 248 Beginning: %s - %s 249 Termination: %s - %s 250 Final Twist: %s 251 Thickness: %s 252 Fiber: %s - %s 253 ''' % (self.pcord_length, self.beginning, self.beginning_descr (), 254 self.termination, self.termination_descr (), self.twist, 255 self.thickness, self.fiber, self.fiber_descr ())
256 - def khipu (self):
257 return Khipu.with_id (self.db, self.khipu_id)
258 - def investigator_num (self):
259 return self.khipu ().investigator_num
260 - def cord_clusters (self):
261 return CordCluster.select (self.db, 262 join = ('cord', 'cord_cluster.cluster_id', '=', 'cord.cluster_id'), 263 where = ('cord.pendant_from', '=', self.pcord_id), order_by = 'ordinal')
264 - def cords (self):
265 return Cord.select (self.db, 266 where = ('pendant_from', '=', self.pcord_id), order_by = 'cord_ordinal')
267 - def sorted_cords (self):
268 for c in self.cords(): 269 for cc in c.sorted_cords(): 270 yield cc
271 - def nice_str (self):
272 return 'Primary Cord (%s) of Khipu %s (%s): length %s, beginning %s, termination %s, final twist %s, thickness %s, fiber %s' % (self.pcord_id, self.investigator_num (), self.khipu_id, self.pcord_length, self.beginning_descr (), self.termination_descr (), self.twist, self.thickness, self.fiber_descr ())
273
274 -class CordCluster (DatabaseObject):
275 table = 'cord_cluster' 276 fields = ['khipu_id', 'cord_id', 'cluster_id', 'ordinal', 'start_position', 277 'end_position', 'num_cords', 'cluster_level', 'spacing', 278 #'beg_cord', 'end_cord', 'grouping_class', 'beg_inv_cord', 279 #'end_inv_cord', 280 ] 281 id_field = 'cluster_id'
282 - def khipu (self):
283 return Khipu.with_id (self.db, self.khipu_id)
284 - def investigator_num (self):
285 return self.khipu ().investigator_num
286 - def cords (self):
287 return Cord.select (self.db, where = ('cluster_id', '=', self.cluster_id), 288 order_by = 'cord_ordinal')
289 - def url (self):
290 return url (self.khipu (), 'C%d' % self.cluster_id)
291 - def nice_str (self):
292 return '%s CordCluster (%s) of Khipu %s (%s): level %s from %s to %s spaced %s cm' % (self.num_cords, self.cluster_id, self.investigator_num (), self.khipu_id, self.cluster_level, self.start_position, self.end_position, self.spacing)
293
294 -class Cord (DatabaseObject):
295 table = 'cord' 296 fields = ['khipu_id', 'cord_id', 'twist_angle', 'thickness', 'fiber', 297 'termination', 'twist', 'pendant_from', 298 #'attached_to', 'attach_pos', 299 'attachment_type', 300 'cluster_id', 'cluster_ordinal', 'cord_ordinal', 'cord_length', 301 'cord_level', 302 #'cord_classification', 'investigator_cord_num' 303 ] 304 id_field = 'cord_id'
305 - def termination_descr (self):
306 return termination_dc.lookup (self.db, self.termination)
307 - def fiber_descr (self):
308 return fiber_dc.lookup (self.db, self.fiber)
309 - def khipu (self):
310 return Khipu.with_id (self.db, self.khipu_id)
311 - def investigator_num (self):
312 return self.khipu ().investigator_num
313 - def cord_cluster (self):
314 return CordCluster.with_id (self.db, self.cluster_id)
315 - def cord_clusters (self):
316 return CordCluster.select (self.db, 317 join = ('cord', 'cord_cluster.cluster_id', '=', 'cord.cluster_id'), 318 where = ('cord.pendant_from', '=', self.cord_id), order_by = 'ordinal')
319 - def cords (self):
320 return Cord.select (self.db, 321 where = ('pendant_from', '=', self.cord_id), order_by = 'cord_ordinal')
322 - def sorted_cords (self):
323 yield self 324 for c in self.cords(): 325 for cc in c.sorted_cords(): 326 yield cc
327 - def knot_clusters (self):
328 return KnotCluster.select (self.db, 329 where = ('cord_id', '=', self.cord_id), order_by = 'ordinal')
330 - def knots (self):
331 return Knot.select (self.db, 332 where = ('cord_id', '=', self.cord_id), order_by = 'knot_ordinal')
333 - def position (self): # returns a decimal when using MySQL backend
334 cluster = self.cord_cluster () 335 if cluster.num_cords == 1: 336 assert self.cluster_ordinal == 1 337 return cluster.start_position 338 elif cluster.start_position == cluster.end_position: 339 return cluster.start_position 340 else: 341 assert cluster.end_position 342 return cluster.start_position + \ 343 (cluster.end_position - cluster.start_position) * \ 344 (self.cluster_ordinal - 1) / (cluster.num_cords - 1)
345 - def short_string (self):
346 if not hasattr (self, 'short_string_'): 347 self.short_string_ = ' '.join (knot_cluster.short_string () for knot_cluster in self.knot_clusters ()) 348 return self.short_string_
349 - def numerical (self):
350 """Test whether the cord encodes a numerical value (approximate). 351 352 This function is inexact: it returns true for knot sequences that 353 could be numbers, even if the knot spacing indicates otherwise. 354 On the other hand, it has been conjectured that multiple numbers might 355 be encoded on a single string, using the long knots as a separator; 356 these sequences will be marked *non*-numerical by this function.""" 357 return numerical_short_string (self.short_string ())
358 - def numerify (self):
359 """Convert the cord into a numerical value encodeed (approximate). 360 361 This function is *not* guaranteed to return the exact number encoded. 362 We ignore "zeros" in the input string (which are encoded by gaps 363 between knots in a fuzzy way, and therefore hard to make algorithmic), 364 except for the last zero which we can detect because the ones digit is 365 specially marked. Thus the numbers 10, 100, and 1000 all return 10 366 from `numerify`, although at least we can distinguish this equivalence 367 group from 1 (because the ones digit is specially marked). Similarly, 368 the numbers 164, 1064, and 1604 all return 164 from `numerify`, 369 although 1640 (and 16400) will return 1640. 370 371 Returns ``None`` if the string is not considered numerical 372 according to `numerical`.""" 373 return numerify_short_string (self.short_string ())
374 - def url (self):
375 return url (self.khipu (), 'c%d' % self.cord_id)
376 - def nice_str (self):
377 return '%s Cord (%s) of Khipu %s (%s): length %s, termination %s, final twist %s, thickness %s, fiber %s' % (self.short_string () or 'blank', self.cord_id, self.investigator_num (), self.khipu_id, self.cord_length, self.termination_descr (), self.twist or 'unknown', self.thickness, self.fiber_descr ())
378
379 -class KnotCluster (DatabaseObject):
380 table = 'knot_cluster' 381 fields = ['cord_id', 'cluster_id', 'start_pos', 'end_pos', 'total_value', 382 'ordinal', 'num_knots'] 383 id_field = 'cluster_id'
384 - def cord (self):
385 return Cord.with_id (self.db, self.cord_id)
386 - def khipu_id (self):
387 return self.cord ().khipu_id
388 - def khipu (self):
389 return self.cord ().khipu ()
390 - def investigator_num (self):
391 return self.khipu ().investigator_num
392 - def knots (self):
393 return Knot.select (self.db, 394 where = #[('cord_id', '=', self.cord_id), 395 ('cluster_id', '=', self.cluster_id), 396 order_by = 'knot_ordinal')
397 - def short_string (self):
398 s = ''.join (knot.short_string () for knot in self.knots ()) 399 if s.replace ('S', '') == '': 400 return 'S%d' % len (s) 401 return s
402 - def url (self):
403 return url (self.khipu (), 'K%d' % self.cluster_id)
404 - def nice_str (self):
405 return '%s KnotCluster (%s) of Khipu %s (%s): from %s to %s' % (self.short_string (), self.cluster_id, self.investigator_num (), self.khipu_id, self.start_pos, self.end_pos)
406
407 -class Knot (DatabaseObject):
408 table = 'knot' 409 fields = ['cord_id', 'type_code', 'direction', 'knot_value', 'cluster_id', 410 'knot_id', 'knot_ordinal', 'cluster_ordinal', 'num_turns'] 411 id_field = 'knot_id'
412 - def cord (self):
413 return Cord.with_id (self.db, self.cord_id)
414 - def khipu_id (self):
415 return self.cord ().khipu_id
416 - def khipu (self):
417 return self.cord ().khipu ()
418 - def investigator_num (self):
419 return self.khipu ().investigator_num
420 - def knot_cluster (self):
421 return KnotCluster.with_id (self.db, self.cluster_id)
422 - def position (self):
423 cluster = self.knot_cluster () 424 if cluster.num_knots == 1: 425 assert self.cluster_ordinal == 1 426 return cluster.start_pos 427 elif cluster.start_pos == cluster.end_pos: 428 return cluster.start_pos 429 else: 430 if not cluster.end_pos: return cluster.start_pos ## xxx hack 431 return cluster.start_pos + \ 432 ((self.cluster_ordinal - 1) / (cluster.num_knots - 1)) * \ 433 (cluster.end_pos - cluster.start_pos)
434 - def short_string (self):
435 if self.num_turns: 436 return '%s%d' % (self.type_code, self.num_turns) 437 else: 438 return self.type_code
439 - def url (self):
440 return url (self.khipu (), 'k%d' % self.knot_id)
441 - def nice_str (self):
442 return '%s Knot (%s) of Khipu %s (%s): direction %s' % (self.short_string (), self.knot_id, self.investigator_num (), self.khipu_id, self.direction)
443
444 -class KhipuDB:
445 - def __init__ (self, filename=None):
446 if dbmode == 'sqlite' or filename is not None: 447 if filename is None: 448 filename = os.path.join (os.path.dirname (__file__), 'khipu.db') 449 self.db = sqlite.connect (filename) 450 self.dbmode = 'sqlite' 451 else: 452 import MySQLdb # if filename is None, might not have been imported 453 self.db = MySQLdb.connect(host='sql.mit.edu', user='khipu', 454 passwd='c7bfe561', db='khipu+khipu') 455 self.dbmode = 'mysql' 456 self.filename = (filename if filename is not None else 'sql') + '.cache' 457 self.load_cache ()
458 - def close (self):
459 self.db.close ()
460 - def execute (self, *args):
461 if self.dbmode == 'sqlite': 462 return self.db.execute (*args) 463 # otherwise, we need to munge the syntax a little bit for MySQL 464 isSelect = len(args)>0 and args[0].lower().startswith('select') 465 if len(args)>1: 466 first = args[0].replace('?','%s') 467 args = (first,) + args[1:] 468 cursor = self.db.cursor() 469 result = cursor.execute (*args) 470 if isSelect: 471 return iter(cursor.fetchall()) 472 else: 473 return result
474 - def khipus (self):
475 return Khipu.select (self, order_by = 'investigator_num')
476 - def cords (self):
477 return Cord.select (self, order_by = 'cord_id')
478 - def cord_clusters (self):
479 return CordCluster.select (self, order_by = 'cluster_id')
480 - def knots (self):
481 return Knot.select (self, order_by = 'knot_id')
482 - def knot_clusters (self):
483 return KnotCluster.select (self, order_by = 'cluster_id')
484 - def __iter__ (self):
485 return iter (self.khipus ())
486 - def khipu_with_investigator_num (self, inum):
487 return Khipu.select (self, where = ('investigator_num', '=', inum)) [0]
488 - def load_cache (self):
489 try: 490 cache_file = open (self.filename, 'rb') 491 self.cache = pickle.load (cache_file) 492 if not self.cache: self.cache = {} 493 cache_file.close () 494 except (IOError, OSError, EOFError, pickle.PickleError, pickle.UnpicklingError): 495 self.cache = {} 496 fix_cache (self.cache, self)
497 - def save_cache (self):
498 cache_file = open (self.filename, 'wb') 499 pickle.dump (self.cache, cache_file) 500 cache_file.close ()
501
502 -def fix_cache (cache, db):
503 if isinstance (cache, list): 504 for x in cache: 505 fix_cache (x, db) 506 elif isinstance (cache, dict): 507 for x in cache.itervalues (): 508 fix_cache (x, db) 509 elif isinstance (cache, (str, unicode, int, float)) or cache is None: 510 pass 511 else: 512 cache.db = db
513 514 numerical_re = r'^(S. )*(S.|L.|E)?$' 515 numerical_rec = re.compile (numerical_re)
516 -def numerical_short_string (short):
517 """WARNING: Results are inexact; see `Cord.numerical` above.""" 518 return bool (numerical_rec.search (short))
519
520 -def numerify_short_string (short):
521 """WARNING: Results are inexact; see `Cord.numerify` above.""" 522 if not numerical_short_string(short): 523 return None 524 short = re.sub (r'(S.)$', r'\1 L0', short) 525 short = re.sub (r'E$', r'L1', short) 526 short = re.sub (r'\s+', r'', short) 527 short = re.sub (r'[SL]', r'', short) 528 if short != '': 529 return int (short) 530 return 0
531 532 url_map = { 533 Cord: ("cords.html", "cords_by_short_string.html" ), 534 Khipu: ("khipus.html", "khipus_by_short_string.html" ), 535 } 536
537 -def url (kind, part = None): #, concordance = False):
538 if isinstance (kind, Khipu): 539 kind = kind.investigator_num + '.svg' 540 url = 'http://projects.csail.mit.edu/khipu/draw/' + kind 541 if part is not None: url += '#' + part 542 return url 543
544 -def main ():
545 """Prints out the Urton-style description string for the khipu with the 546 investigator number given on the command line.""" 547 import sys 548 db = KhipuDB ('khipu.db') 549 q = db.khipu_with_investigator_num (sys.argv[1]) 550 print q.description ()
551 552 if __name__ == '__main__': main () 553