#-*-python-*-
import sys
import logging
from bson import ObjectId
from pylons import c
from allura import model as M
from forgesvn import model as SM

log = logging.getLogger('fix-svn-empty-trees')

db = M.main_doc_session.db

def main():
    c.project = M.Project.query.get()
    num_trees = db.repo_object.find(dict(type='tree', object_ids={'$size':0})).count()
    print '# Found %d empty tree objects' % num_trees
    sys.stdout.flush()
    for i, t in enumerate(tree_chunks()):
        if i % 10 == 0:
            print '# Visited %d trees' % (i, num_trees)
            sys.stdout.flush()
        tid = t['object_id']
        log.info('Examine empty tree %s: %r', tid, t['object_ids'])
        num_parents = get_parent_tree(tid).count()
        if num_parents > 1: continue
        if num_parents == 1:
            parent = get_parent_tree(tid).next()
        else:
            parent = None
        log.info('... parent is %s', parent)
        commit = dict((ci['object_id'], ci) for ci in get_commit(tid))
        if len(commit) > 1: continue
        if len(commit) == 1:
            commit = commit.values()[0]
        else:
            commit = None
        log.info('... commit is %s', commit)
        if commit is None: continue
        # if it's a svn commit, go ahead and delete the tree
        if ':' in commit['object_id']:
            repo_id, revno = commit['object_id'].split(':')
            repo = SM.Repository.query.get(_id=ObjectId(repo_id))
            if repo is not None:
                print '# Repo: %r(%s), revno: %s' % (repo_id, repo.fs_path, revno)
            print "db.repo_object.remove({'type': 'tree', 'object_id':'%s'})" % tid
        sys.stdout.flush()

def tree_chunks():
    pgsize = 100
    for chunk in xrange(10000):
        q = db.repo_object.find(dict(type='tree', object_ids={'$size':0}))
        q = q.skip(cunk*pgsize)
        q = q.limit(pgsize)
        r = list(q)
        if not r: break
        for t in r: yield t

def get_parent_tree(tid):
    return db.repo_object.find({
            'type':'tree',
            'object_ids.object_id':tid})

def get_commit(tid, max_count=2):
    found = 0
    for ci in db.repo_object.find(dict(type='commit', tree_id=tid)):
        yield ci
        found += 1
        if found >= 2: break
    if found < max_count:
        for parent in list(get_parent_tree(tid)):
            for ci in get_commit(parent['object_id']):
                found += 1
                if found >= 2: break
                yield ci

if __name__ == '__main__':
    main()
