#!/usr/bin/env python3 # License: BSD 3 clause ''' Clone all projects from GitLab and recreate them on Stash :author: Dan Blanchard ([email protected]) :organization: ETS :date: June 2014 ''' import argparse import getpass import logging import os import re import subprocess import sys import tempfile import stashy from gitlab import Gitlab as GitLab __version__ = '0.1.0' def gen_all_results(method, *args, per_page=20, **kwargs): ''' Little helper function to generate all pages of results for a given method in one list. ''' get_more = True page_num = 0 if 'page' in kwargs: kwargs.pop('page') while get_more: page_num += 1 proj_page = method(*args, page=page_num, per_page=per_page, **kwargs) # proj_page will be False if method fails if proj_page: get_more = len(proj_page) == per_page yield from iter(proj_page) else: get_more = False def main(argv=None): ''' Process the command line arguments and create the JSON dump. :param argv: List of arguments, as if specified on the command-line. If None, ``sys.argv[1:]`` is used instead. :type argv: list of str ''' # Get command line arguments parser = argparse.ArgumentParser( description="Transfer all projects/repositories from GitLab to Stash. \ Note: This script assumes you have your SSH key \ registered with both GitLab and Stash.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve') parser.add_argument('gitlab_url', help='The full URL to your GitLab instance.') parser.add_argument('stash_url', help='The full URL to your Stash instance.') parser.add_argument('-p', '--password', help='The password to use to authenticate if token is \ not specified. If password and token are both \ unspecified, you will be prompted to enter a \ password.') parser.add_argument('-P', '--page_size', help='When retrieving result from GitLab, how many \ results should be included in a given page?.', type=int, default=20) parser.add_argument('-s', '--verify_ssl', help='Enable SSL certificate verification', action='store_true') parser.add_argument('-S', '--skip_existing', help='Do not update existing repositories and just \ skip them.', action='store_true') parser.add_argument('-t', '--token', help='The private GitLab API token to use for \ authentication. Either this or username and \ password must be set.') parser.add_argument('-u', '--username', help='The username to use for authentication, if token\ is unspecified.') parser.add_argument('-v', '--verbose', help='Print more status information. For every ' + 'additional time this flag is specified, ' + 'output gets more verbose.', default=0, action='count') parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(__version__)) args = parser.parse_args(argv) args.page_size = max(100, args.page_size) # Convert verbose flag to actually logging level log_levels = [logging.WARNING, logging.INFO, logging.DEBUG] log_level = log_levels[min(args.verbose, 2)] # Make warnings from built-in warnings module get formatted more nicely logging.captureWarnings(True) logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' + '%(message)s'), level=log_level) # Setup authenticated GitLab and Stash instances if args.token: git = GitLab(args.gitlab_url, token=args.token, verify_ssl=args.verify_ssl) else: git = None if not args.username: print('Username: ', end="", file=sys.stderr) args.username = input('').strip() if not args.password: args.password = getpass.getpass('Password: ') stash = stashy.connect(args.stash_url, args.username, args.password) if git is None: git = GitLab(args.gitlab_url, verify_ssl=args.verify_ssl) git.login(args.username, args.password) print('Retrieving existing Stash projects...', end="", file=sys.stderr) sys.stderr.flush() key_set = {proj['key'] for proj in stash.projects} stash_project_names = {proj['name'] for proj in stash.projects} names_to_keys = {proj['name']: proj['key'] for proj in stash.projects} print('done', file=sys.stderr) sys.stderr.flush() updated_projects = set() repo_to_slugs = {} failed_to_clone = set() cwd = os.getcwd() transfer_count = 0 skipped_count = 0 print('Processing GitLab projects...', file=sys.stderr) sys.stderr.flush() for project in gen_all_results(git.getallprojects, per_page=args.page_size): print('\n' + ('=' * 80) + '\n', file=sys.stderr) sys.stderr.flush() proj_name = project['namespace']['name'] # Create Stash project if it doesn't already exist if proj_name not in stash_project_names: # Create Stash project key key = proj_name if key.islower(): key = key.title() key = re.sub(r'[^A-Z]', '', key) if len(key) < 2: key = re.sub(r'[^A-Za-z]', '', proj_name)[0:2].upper() added = False suffix = 65 while key in key_set: if not added: key += 'A' else: suffix += 1 key = key[:-1] + chr(suffix) key_set.add(key) # Actually add the project to Stash print('Creating Stash project "%s" with key %s...' % (proj_name, key), end="", file=sys.stderr) sys.stderr.flush() stash.projects.create(key, proj_name) names_to_keys[proj_name] = key stash_project_names.add(proj_name) print('done', file=sys.stderr) sys.stderr.flush() else: key = names_to_keys[proj_name] stash_project = stash.projects[key] # Initialize maping from repository names to slugs for later if key not in repo_to_slugs: repo_to_slugs[key] = {repo['name']: repo['slug'] for repo in stash_project.repos} # Create Stash-compatible name for repository # Repository names are limited to 128 characters. # They must start with a letter or number and may contain spaces, # hyphens, underscores and periods repo_name = project['name'] if not repo_name[0].isalnum(): repo_name = 'A ' + repo_name repo_name = re.sub(r'[^A-Za-z0-9 _.-]', ' ', repo_name) if len(repo_name) > 128: repo_name = repo_name[0:128] # Add repository to Stash project if it's not already there if repo_name not in repo_to_slugs[key]: print('Creating Stash repository "%s" in project "%s"...' % (repo_name, proj_name), end="", file=sys.stderr) sys.stderr.flush() stash_repo = stash_project.repos.create(repo_name) repo_to_slugs[key][repo_name] = stash_repo['slug'] print('done', file=sys.stderr) sys.stderr.flush() elif args.skip_existing: print('Skipping existing Stash repository "%s" in project "%s"' % (repo_name, proj_name), file=sys.stderr) sys.stderr.flush() skipped_count += 1 continue else: print('Updating existing Stash repository "%s" in project "%s"' % (repo_name, proj_name), file=sys.stderr) sys.stderr.flush() repo_slug = repo_to_slugs[key][repo_name] stash_repo = stash_project.repos[repo_slug].get() for clone_link in stash_repo['links']['clone']: if clone_link['name'] == 'ssh': stash_repo_url = clone_link['href'] break with tempfile.TemporaryDirectory() as temp_dir: # Clone repository to temporary directory print('\nCloning GitLab repository...', file=sys.stderr) sys.stderr.flush() try: subprocess.check_call(['git', 'clone', '--mirror', project['ssh_url_to_repo'], temp_dir]) except subprocess.CalledProcessError: print('Failed to clone GitLab repository. This usually when ' + 'it does not exist.', file=sys.stderr) failed_to_clone.add(project['name_with_namespace']) skipped_count += 1 continue os.chdir(temp_dir) # Check that repository is not empty try: subprocess.check_call(['git', 'log', '--format=oneline', '-1'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: print('Repository is empty, so skipping push to Stash.', file=sys.stderr) skipped_count += 1 else: # Change remote to Stash and push print('\nPushing repository to Stash...', file=sys.stderr) sys.stderr.flush() subprocess.check_call(['git', 'remote', 'set-url', 'origin', stash_repo_url]) subprocess.check_call(['git', 'push', '--mirror']) transfer_count += 1 os.chdir(cwd) updated_projects.add(proj_name) print('\n' + ('=' * 35) + 'SUMMARY' + ('=' * 35), file=sys.stderr) print('{} repositories transferred.\n'.format(transfer_count), file=sys.stderr) print('{} repositories skipped.\n'.format(skipped_count), file=sys.stderr) print('Projects created/updated:', file=sys.stderr) for proj in sorted(updated_projects): print('\t' + proj, file=sys.stderr) print('Repositories that we could not clone:', file=sys.stderr) for repo_name in sorted(failed_to_clone): print('\t' + repo_name, file=sys.stderr) if __name__ == '__main__': main()