Apache Allura™

Forge software for hosting software projects

Brought to you by: alexluberg, brondsem, ccruz, deshani, and 11 others
[e455b7]: / Allura / allura / scripts / reindex_projects.py Maximize Restore History

118 lines (101 with data), 4.8 kB

#       Licensed to the Apache Software Foundation (ASF) under one
#       or more contributor license agreements.  See the NOTICE file
#       distributed with this work for additional information
#       regarding copyright ownership.  The ASF licenses this file
#       to you under the Apache License, Version 2.0 (the
#       "License"); you may not use this file except in compliance
#       with the License.  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#       Unless required by applicable law or agreed to in writing,
#       software distributed under the License is distributed on an
#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#       KIND, either express or implied.  See the License for the
#       specific language governing permissions and limitations
#       under the License.

import argparse
import logging

from pymongo.errors import InvalidDocument
from tg import tmpl_context as c

from allura.scripts import ScriptTask
from allura import model as M
from allura.tasks.index_tasks import add_projects
from allura.lib.utils import chunked_find, chunked_list
from allura.lib.exceptions import CompoundError


log = logging.getLogger(__name__)


class ReindexProjects(ScriptTask):

    @classmethod
    def execute(cls, options):
        q_project = {}
        if options.nbhd:
            nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
            if not nbhd:
                return "Invalid neighborhood url prefix."
            q_project['neighborhood_id'] = nbhd._id
        if options.project:
            q_project['shortname'] = options.project
        elif options.project_regex:
            q_project['shortname'] = {'$regex': options.project_regex}

        for chunk in chunked_find(M.Project, q_project):
            project_ids = []
            for p in chunk:
                log.info('Reindex project %s', p.shortname)
                if options.dry_run:
                    continue
                c.project = p
                project_ids.append(p._id)

            try:
                for chunk in chunked_list(project_ids, options.max_chunk):
                    if options.tasks:
                        cls._post_add_projects(chunk)
                    else:
                        add_projects(chunk)
            except CompoundError as err:
                log.exception('Error indexing projects:\n%r', err)
                log.error('%s', err.format_error())
            M.main_orm_session.flush()
            M.main_orm_session.clear()
        log.info('Reindex %s', 'queued' if options.tasks else 'done')

    @classmethod
    def _post_add_projects(cls, chunk):
        """
        Post task, recursively splitting and re-posting if the resulting
        mongo document is too large.
        """
        try:
            add_projects.post(chunk)
        except InvalidDocument as e:
            # there are many types of InvalidDocument, only recurse if its
            # expected to help
            if e.args[0].startswith('BSON document too large'):
                cls._post_add_projects(chunk[:len(chunk) // 2])
                cls._post_add_projects(chunk[len(chunk) // 2:])
            else:
                raise

    @classmethod
    def parser(cls):
        parser = argparse.ArgumentParser(description='Reindex all project records into Solr (for searching)')
        parser.add_argument('-n', '--nbhd', action='store', default='', dest='nbhd',
                            help='Restrict reindex to a particular neighborhood, e.g. /p/.')
        parser.add_argument(
            '-p', '--project', action='store', default='', dest='project',
            help='Restrict update to a particular project. To specify a '
            'subproject, use a slash: project/subproject.')
        parser.add_argument('--project-regex', action='store', default='',
                            dest='project_regex',
                            help='Restrict update to projects for which the shortname matches '
                            'the provided regex.')
        parser.add_argument('--dry-run', action='store_true', dest='dry_run',
                            default=False, help='Log names of projects that would be reindexed, '
                            'but do not perform the actual reindex.')
        parser.add_argument('--tasks', action='store_true', dest='tasks',
                            help='Run each individual index operation as a background task.')
        parser.add_argument(
            '--max-chunk', dest='max_chunk', type=int, default=100 * 1000,
            help='Max number of artifacts to index in one Solr update command')
        return parser


def get_parser():
    return ReindexProjects.parser()


if __name__ == '__main__':
    ReindexProjects.main()
Apache Allura™

Forge software for hosting software projects

Branches

Tags

[e455b7]: / Allura / allura / scripts / reindex_projects.py Maximize Restore History

118 lines (101 with data), 4.8 kB