1
2 """KhipuDB is the main interface to the database of khipus.
3
4 It contains code to query the database for various khipu properties, as well
5 as some code to interpret the information in the database or make it easier
6 to use. For examples, see `Khipu.sorted_cords` and `Cord.numerical`.
7
8 A simple example of the use of KhipuDB::
9
10 import KhipuDB
11 db = KhipuDB.KhipuDB () ## loads 'khipu.db' from wherever KhipuDB.py lives
12 db = KhipuDB.KhipuDB ('your_db')
13 ## not for faint of heart: db.execute ('...arbitrary SQL command...')
14 khipu = db.khipu_with_investigator_num ('UR001')
15 print khipu.description () ## ASCII description of most text fields
16 print [cord.cord_length for cord in khipu.cords ()]
17 cord = khipu.cords () [11] ## some random cord
18 print cord.short_string () ## 'S1 L9'
19 print cord.numerical () ## True
20 print cord.numerify () ## 19
21
22 """
23 from __future__ import division
24 import cPickle as pickle, datetime, os, re
25
26
27 preferred_db = ['sqlite', 'mysql']
28
29 """This allows the user to specify whether they prefer to use the sqlite or
30 mysql interface to the database.
31
32 Sqlite is local, but poorly indexed. MySQL queries can run faster due
33 to better indexing, but the network imposes latency on each request.
34 """
35
36 for dbmode in preferred_db:
37 try:
38 if dbmode == 'sqlite':
39 import sqlite3 as sqlite
40 assert sqlite.paramstyle == 'qmark'
41 elif dbmode == 'mysql':
42 import MySQLdb
43 else:
44 continue
45 except ImportError:
46 continue
47 else:
48 break
49 else:
50 raise ImportError ('Neither sqlite3 nor MySQL could be imported')
51
54
57 self.db = db
58 self.__dict__.update (data)
59 @classmethod
60 - def make (Class, db, **fields):
61 c = db.cache.setdefault (Class.__name__, {})
62 id = fields[Class.id_field]
63 if id not in c:
64 c[id] = Class (db, **fields)
65 return c[id]
66 @classmethod
68 c = db.cache.setdefault (Class.__name__, {})
69 if id not in c:
70 all = Class.select (db, where = (Class.id_field, '=', id))
71 if not all: return Class.none
72 c[id] = all[0]
73 return c[id]
74 @classmethod
75 - def select (Class, db, join = None, where = None, order_by = None):
76 c = db.cache.setdefault (Class.__name__, {})
77 if (join, where, order_by) not in c:
78 args = []
79 query = 'SELECT ' + ', '.join ('`%s`' % field for field in Class.fields)
80 query += ' FROM `%s`' % Class.table
81 if join is not None:
82 query += ' JOIN `%s` ON `%s` = `%s`' % join
83 if where is not None:
84 def parse_where (where):
85 if isinstance (where, list):
86 return ' AND '.join (parse_where (piece for piece in where))
87 elif isinstance (where, tuple):
88 if len (where) == 1:
89 return ' WHERE `%s`' % where[0]
90 elif len (where) == 2:
91 return ' WHERE `%s` %s' % (where[0], where[1])
92 elif len (where) == 3:
93 args.append (where[2])
94 return ' WHERE `%s` %s ?' % (where[0], where[1])
95 else:
96 raise ValueError ('where too long: %r' % where)
97 else:
98 return ' WHERE %s' % where
99 query += parse_where (where)
100 if order_by is not None:
101 if isinstance (order_by, (list, tuple)):
102 query += ' ORDER BY ' + ', '.join ('`%s`' % field for field in order_by)
103 else:
104 query += ' ORDER BY `%s`' % order_by
105 c[join, where, order_by] = \
106 [Class.make (db, **dict ((Class.fields[i], row[i])
107 for i in range (len (Class.fields))))
108 for row in db.execute (query, args)]
109 return c[join, where, order_by]
111 d = dict (self.__dict__)
112 if 'db' in d: del d['db']
113 return d
117 d = dict (self.__dict__)
118 if 'db' in d: del d['db']
119 return '%s(%r, %s)' % (self.__class__, self.db,
120 ', '.join ('%s=%r' % (key, value) for key, value in d.iteritems ()))
122 d = dict (self.__dict__)
123 if 'db' in d: del d['db']
124 print '<%s %s>' % (self.__class__,
125 ', '.join ('%s=%r' % (key, value) for key, value in d.iteritems ()))
126
128 - def __init__ (self, *args, **keywords):
131 return self.__class__.__name__ + '()'
133 return self.__class__.__name__ + '()'
134
136 - def __init__ (self, table, key_field, value_field):
137 self.table = table
138 self.key_field = key_field
139 self.value_field = value_field
141 c = db.cache.setdefault (self.table, {})
142 if key not in c:
143 answer = db.execute ('SELECT `%s` FROM `%s` WHERE `%s` = ?' %
144 (self.value_field, self.table, self.key_field), [key])
145 try:
146 c[key] = answer.next () [0]
147 except StopIteration:
148 c[key] = None
149 return c[key]
150
151 archive_dc = DescriptionTable ('archive_dc', 'archive_num', 'archive_name')
152 beginning_dc = DescriptionTable ('beginning_dc', 'beg_cd', 'beginning_descr')
153 termination_dc = DescriptionTable ('termination_dc', 'term_cd', 'termination_descr')
154 fiber_dc = DescriptionTable ('fiber_dc', 'fiber_cd', 'fiber_descr')
155
156 -class Khipu (DatabaseObject):
157 table = 'khipu_main'
158 fields = ['investigator_num', 'khipu_id', 'museum_name', 'museum_num',
159 'nickname', 'provenance', 'region', 'archive_num', 'museum_descr',
160 'discovered_by', 'date_discovered', 'earliest_age', 'latest_age',
161 'complete', 'duplicate_flag', 'duplicate_id']
162 id_field = 'khipu_id'
166 complete = ('yes' if self.complete else 'no')
167 if not self.duplicate_flag:
168 duplicate = ['no']
169 else:
170 duplicate = ['of %d' % self.duplicate_id]
171
172
173 return '''KHIPU %s (internal id %s)
174 Museum Name: %s
175 Museum Number: %s
176 Nickname: %s
177
178 Provenance: %s
179 Region: %s
180 Archive: %s - %s
181 Museum Description: %s
182
183 Discovered By: %s
184 Date Discovered: %s
185 Age Range: %s -- %s
186
187 Complete: %s
188 Duplicate: %s
189
190 %s
191 Number of Cord Clusters: %s
192 Number of Cords: %s
193 ''' % (self.investigator_num, self.khipu_id, self.museum_name, self.museum_num,
194 self.nickname or 'none', self.provenance, self.region, self.archive_num,
195 self.archive_name (), self.museum_descr or 'none',
196 self.discovered_by or 'unknown',
197 (self.date_discovered if self.date_discovered != '0000-00-00'
198 else 'unknown'),
199 self.earliest_age, self.latest_age,
200 ('yes' if self.complete else 'no'),
201 (self.duplicate_id if self.duplicate_flag else 'no'),
202 self.primary_cord ().describe (),
203 sum (1 for x in self.cord_clusters ()),
204 sum (1 for x in self.cords ()))
206 return Cord.select (self.db,
207 where = ('khipu_id', '=', self.khipu_id), order_by = 'cord_ordinal')
208
214
222 return self.db.execute ('SELECT MIN(`thickness`) FROM `cord` WHERE `thickness` != 0 AND `khipu_id` = ?', [self.khipu_id]).next ()[0]
227
230 Khipu.NoneType = KhipuNone
231 Khipu.none = Khipu.NoneType ()
232
234 table = 'primary_cord'
235 fields = ['pcord_id', 'structure', 'thickness',
236
237 'pcord_length', 'fiber', 'termination', 'beginning', 'twist']
238 id_field = 'pcord_id'
246 return '''Primary Cord:
247 Total Length: %s
248 Beginning: %s - %s
249 Termination: %s - %s
250 Final Twist: %s
251 Thickness: %s
252 Fiber: %s - %s
253 ''' % (self.pcord_length, self.beginning, self.beginning_descr (),
254 self.termination, self.termination_descr (), self.twist,
255 self.thickness, self.fiber, self.fiber_descr ())
261 return CordCluster.select (self.db,
262 join = ('cord', 'cord_cluster.cluster_id', '=', 'cord.cluster_id'),
263 where = ('cord.pendant_from', '=', self.pcord_id), order_by = 'ordinal')
265 return Cord.select (self.db,
266 where = ('pendant_from', '=', self.pcord_id), order_by = 'cord_ordinal')
272 return 'Primary Cord (%s) of Khipu %s (%s): length %s, beginning %s, termination %s, final twist %s, thickness %s, fiber %s' % (self.pcord_id, self.investigator_num (), self.khipu_id, self.pcord_length, self.beginning_descr (), self.termination_descr (), self.twist, self.thickness, self.fiber_descr ())
273
275 table = 'cord_cluster'
276 fields = ['khipu_id', 'cord_id', 'cluster_id', 'ordinal', 'start_position',
277 'end_position', 'num_cords', 'cluster_level', 'spacing',
278
279
280 ]
281 id_field = 'cluster_id'
287 return Cord.select (self.db, where = ('cluster_id', '=', self.cluster_id),
288 order_by = 'cord_ordinal')
290 return url (self.khipu (), 'C%d' % self.cluster_id)
292 return '%s CordCluster (%s) of Khipu %s (%s): level %s from %s to %s spaced %s cm' % (self.num_cords, self.cluster_id, self.investigator_num (), self.khipu_id, self.cluster_level, self.start_position, self.end_position, self.spacing)
293
294 -class Cord (DatabaseObject):
295 table = 'cord'
296 fields = ['khipu_id', 'cord_id', 'twist_angle', 'thickness', 'fiber',
297 'termination', 'twist', 'pendant_from',
298
299 'attachment_type',
300 'cluster_id', 'cluster_ordinal', 'cord_ordinal', 'cord_length',
301 'cord_level',
302
303 ]
304 id_field = 'cord_id'
316 return CordCluster.select (self.db,
317 join = ('cord', 'cord_cluster.cluster_id', '=', 'cord.cluster_id'),
318 where = ('cord.pendant_from', '=', self.cord_id), order_by = 'ordinal')
320 return Cord.select (self.db,
321 where = ('pendant_from', '=', self.cord_id), order_by = 'cord_ordinal')
323 yield self
324 for c in self.cords():
325 for cc in c.sorted_cords():
326 yield cc
328 return KnotCluster.select (self.db,
329 where = ('cord_id', '=', self.cord_id), order_by = 'ordinal')
331 return Knot.select (self.db,
332 where = ('cord_id', '=', self.cord_id), order_by = 'knot_ordinal')
334 cluster = self.cord_cluster ()
335 if cluster.num_cords == 1:
336 assert self.cluster_ordinal == 1
337 return cluster.start_position
338 elif cluster.start_position == cluster.end_position:
339 return cluster.start_position
340 else:
341 assert cluster.end_position
342 return cluster.start_position + \
343 (cluster.end_position - cluster.start_position) * \
344 (self.cluster_ordinal - 1) / (cluster.num_cords - 1)
350 """Test whether the cord encodes a numerical value (approximate).
351
352 This function is inexact: it returns true for knot sequences that
353 could be numbers, even if the knot spacing indicates otherwise.
354 On the other hand, it has been conjectured that multiple numbers might
355 be encoded on a single string, using the long knots as a separator;
356 these sequences will be marked *non*-numerical by this function."""
357 return numerical_short_string (self.short_string ())
359 """Convert the cord into a numerical value encodeed (approximate).
360
361 This function is *not* guaranteed to return the exact number encoded.
362 We ignore "zeros" in the input string (which are encoded by gaps
363 between knots in a fuzzy way, and therefore hard to make algorithmic),
364 except for the last zero which we can detect because the ones digit is
365 specially marked. Thus the numbers 10, 100, and 1000 all return 10
366 from `numerify`, although at least we can distinguish this equivalence
367 group from 1 (because the ones digit is specially marked). Similarly,
368 the numbers 164, 1064, and 1604 all return 164 from `numerify`,
369 although 1640 (and 16400) will return 1640.
370
371 Returns ``None`` if the string is not considered numerical
372 according to `numerical`."""
373 return numerify_short_string (self.short_string ())
375 return url (self.khipu (), 'c%d' % self.cord_id)
377 return '%s Cord (%s) of Khipu %s (%s): length %s, termination %s, final twist %s, thickness %s, fiber %s' % (self.short_string () or 'blank', self.cord_id, self.investigator_num (), self.khipu_id, self.cord_length, self.termination_descr (), self.twist or 'unknown', self.thickness, self.fiber_descr ())
378
380 table = 'knot_cluster'
381 fields = ['cord_id', 'cluster_id', 'start_pos', 'end_pos', 'total_value',
382 'ordinal', 'num_knots']
383 id_field = 'cluster_id'
393 return Knot.select (self.db,
394 where =
395 ('cluster_id', '=', self.cluster_id),
396 order_by = 'knot_ordinal')
398 s = ''.join (knot.short_string () for knot in self.knots ())
399 if s.replace ('S', '') == '':
400 return 'S%d' % len (s)
401 return s
403 return url (self.khipu (), 'K%d' % self.cluster_id)
406
407 -class Knot (DatabaseObject):
408 table = 'knot'
409 fields = ['cord_id', 'type_code', 'direction', 'knot_value', 'cluster_id',
410 'knot_id', 'knot_ordinal', 'cluster_ordinal', 'num_turns']
411 id_field = 'knot_id'
423 cluster = self.knot_cluster ()
424 if cluster.num_knots == 1:
425 assert self.cluster_ordinal == 1
426 return cluster.start_pos
427 elif cluster.start_pos == cluster.end_pos:
428 return cluster.start_pos
429 else:
430 if not cluster.end_pos: return cluster.start_pos
431 return cluster.start_pos + \
432 ((self.cluster_ordinal - 1) / (cluster.num_knots - 1)) * \
433 (cluster.end_pos - cluster.start_pos)
435 if self.num_turns:
436 return '%s%d' % (self.type_code, self.num_turns)
437 else:
438 return self.type_code
440 return url (self.khipu (), 'k%d' % self.knot_id)
443
446 if dbmode == 'sqlite' or filename is not None:
447 if filename is None:
448 filename = os.path.join (os.path.dirname (__file__), 'khipu.db')
449 self.db = sqlite.connect (filename)
450 self.dbmode = 'sqlite'
451 else:
452 import MySQLdb
453 self.db = MySQLdb.connect(host='sql.mit.edu', user='khipu',
454 passwd='c7bfe561', db='khipu+khipu')
455 self.dbmode = 'mysql'
456 self.filename = (filename if filename is not None else 'sql') + '.cache'
457 self.load_cache ()
461 if self.dbmode == 'sqlite':
462 return self.db.execute (*args)
463
464 isSelect = len(args)>0 and args[0].lower().startswith('select')
465 if len(args)>1:
466 first = args[0].replace('?','%s')
467 args = (first,) + args[1:]
468 cursor = self.db.cursor()
469 result = cursor.execute (*args)
470 if isSelect:
471 return iter(cursor.fetchall())
472 else:
473 return result
475 return Khipu.select (self, order_by = 'investigator_num')
477 return Cord.select (self, order_by = 'cord_id')
481 return Knot.select (self, order_by = 'knot_id')
485 return iter (self.khipus ())
487 return Khipu.select (self, where = ('investigator_num', '=', inum)) [0]
489 try:
490 cache_file = open (self.filename, 'rb')
491 self.cache = pickle.load (cache_file)
492 if not self.cache: self.cache = {}
493 cache_file.close ()
494 except (IOError, OSError, EOFError, pickle.PickleError, pickle.UnpicklingError):
495 self.cache = {}
496 fix_cache (self.cache, self)
498 cache_file = open (self.filename, 'wb')
499 pickle.dump (self.cache, cache_file)
500 cache_file.close ()
501
503 if isinstance (cache, list):
504 for x in cache:
505 fix_cache (x, db)
506 elif isinstance (cache, dict):
507 for x in cache.itervalues ():
508 fix_cache (x, db)
509 elif isinstance (cache, (str, unicode, int, float)) or cache is None:
510 pass
511 else:
512 cache.db = db
513
514 numerical_re = r'^(S. )*(S.|L.|E)?$'
515 numerical_rec = re.compile (numerical_re)
517 """WARNING: Results are inexact; see `Cord.numerical` above."""
518 return bool (numerical_rec.search (short))
519
521 """WARNING: Results are inexact; see `Cord.numerify` above."""
522 if not numerical_short_string(short):
523 return None
524 short = re.sub (r'(S.)$', r'\1 L0', short)
525 short = re.sub (r'E$', r'L1', short)
526 short = re.sub (r'\s+', r'', short)
527 short = re.sub (r'[SL]', r'', short)
528 if short != '':
529 return int (short)
530 return 0
531
532 url_map = {
533 Cord: ("cords.html", "cords_by_short_string.html" ),
534 Khipu: ("khipus.html", "khipus_by_short_string.html" ),
535 }
536
537 -def url (kind, part = None):
538 if isinstance (kind, Khipu):
539 kind = kind.investigator_num + '.svg'
540 url = 'http://projects.csail.mit.edu/khipu/draw/' + kind
541 if part is not None: url += '#' + part
542 return url
543
545 """Prints out the Urton-style description string for the khipu with the
546 investigator number given on the command line."""
547 import sys
548 db = KhipuDB ('khipu.db')
549 q = db.khipu_with_investigator_num (sys.argv[1])
550 print q.description ()
551
552 if __name__ == '__main__': main ()
553