#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function


## Serialization helpers
import itertools as it, operator as op, functools as ft
from struct import pack, unpack, calcsize
head = 'I'
hs = calcsize(head)
pack, unpack = ft.partial(pack, head), ft.partial(unpack, head)

blob = lambda data, *head: pack(*head) + data
blob_head = lambda blob: unpack(blob[:hs])
blob_data = lambda blob: blob[hs:]


from bsddb import btopen
import fcntl

def backup(src, dst):
	from os.path import join, basename, dirname
	from os import stat
	from time import sleep
	import os

	db = btopen(dst, 'c')
	os.chdir(src)
	try:
		fcntl.lockf(db.db.fd(), fcntl.LOCK_EX) # important, to prevent corruption due to concurrent access
		for root,dirs,files in os.walk('.'):
			files = dict( (k, (int(stat(path).st_mtime), path))
				for k,path in ( (b'{0}/\0{1}'.format(root, name),
					b'{0}/{1}'.format(root, name) ) for name in files) )
			dirs = dict(( b'{0}/\1{1}'.format(root, name),
				 b'{0}/{1}/'.format(root, name) ) for name in dirs)

			root = b'{0}/'.format(root)
			ts = int(stat(root).st_mtime)
			try: k,v = db.set_location(root)
			except KeyError: k,v_ts = b'',0
			else: v_ts, = blob_head(v)

			# Update root node, if necessary
			if k != root or ts > v_ts:
				db[root] = blob(b'', ts)
				root_updated = True
			else: root_updated = False

			# Check db files
			try: k,v = db.next()
			except KeyError: pass
			else:
				ks = len(root)
				while True:
					if not k.startswith(root) or b'/' in k[ks:]: break

					if k != root:
						kt = k[ks]
						if kt == b'\0':
							if root_updated and k not in files: del db[k] # deleted file
							else:
								(ts, path), (v_ts,) = files.pop(k), blob_head(v)
								if ts > v_ts:
									db[k] = blob(open(path, 'rb').read(), ts) # update stored file
						elif root_updated and kt == b'\1' and k not in dirs: # deleted dir
								# Prune all the paths that are recorded under it
								subroot = dirs[k]
								sk,v = db.set_location(subroot)
								while True:
									if not sk.startswith(subroot): break
									del db[sk]
									try: sk,v = db.next()
									except KeyError: break
								# Return db cursor to the previous location
								db.set_location(k)
								del db[k]
						else: break # subdir files ahead

					try: k,v = db.next()
					except KeyError: break

			# Store new files
			# New dirs will be stored when os.walk will recurse them
			if root_updated:
				for k,(ts,path) in files.iteritems():
					db[k] = blob(open(path, 'rb').read(), ts)

			# Delay next iteration, if requested
			if optz.delay: sleep(optz.delay)

	finally:
		db.sync()
		db.close()


def extract(src, dst=None, verbose=False, dry_run=False):
	from time import sleep
	from os.path import join, basename, dirname
	import os

	if dst: abs_path = ft.partial(join, dst)

	db = btopen(src, 'c')
	fcntl.lockf(db.db.fd(), fcntl.LOCK_SH)
	try: k,v = db.first()
	except KeyError: pass
	else:
		while True:
			if k.endswith(b'/'): # "root" dir - create
				if k != b'./':
					if verbose: print(k)
					if not dry_run:
						path = abs_path(k.rstrip(b'/'))
						os.mkdir(path)
						os.utime(path, blob_head(v)*2)
			else: # file or subdir
				path, name = k.rsplit(b'/', 1)
				if name[0] == b'\0': # file, (re)create
					path = join(path, name.lstrip(b'\0'))
					if verbose: print(path)
					if not dry_run:
						path = abs_path(path)
						open(path, 'wb').write(blob_data(v))
						os.utime(path, blob_head(v)*2)
				elif name[0] == b'\1': pass # subdir - will be created when it'll be "root"

			try: k,v = db.next()
			except KeyError: break


## Invocation
from optparse import OptionParser
parser = OptionParser(usage='%prog [options] db [path]',
	description='Compare/backup path with lots of small files into berkley db.')
parser.add_option('-d', '--delay', type='float', default=0,
	help='Delay between fs iterations, to minimize the load (float, default: %defaults).')
parser.add_option('-x', '--extract', action='store_true',
	help='Unpack db to a specified existing path.')
parser.add_option('-l', '--list', action='store_true',
	help='List the paths in the db, can be combined with --extract.')
optz, argz = parser.parse_args()
if not optz.list and len(argz) < 2: parser.error('Need exactly two arguments.')
elif not optz.extract and len(argz) < 1: parser.error('Need at least one argument.')

import sys
if not optz.extract and not optz.list: backup(*reversed(argz))
else: extract(*argz, verbose=optz.list, dry_run=not optz.extract)
