#!/usr/bin/env perl
###############################################################################
# Message filter script for "git filter-branch --msg-filter" which replaces
# all occurrences of svn revisions with the references to the git commits and
# removes git-svn-id metadata lines. This script is useful when doing the
# final (and irreversible) conversion of a git-svn mirror of svn repository to
# git. Notice that it should be used with "--date-order" git rev-list option
# (which can be passed to git filter-branch after "--") to ensure that the
# earlier commits are rewritten before the later ones which can reference
# them.
#
# Copyright (C) 2013 Vadim Zeitlin
###############################################################################

use strict;
use warnings;

use Cwd;

# Get the names of all svn branches.
sub get_svn_branches
{
    my @branches = qx{git branch -r --no-color};
    foreach my $branch (@branches) {
        chomp $branch;
        substr($branch, 0, 2) = '';
    }

    # Ignore some special directories which are not really branches
    # and also skip trunk that we already used above.
    @branches = grep { !/(?:^tags|trunk$)/ } @branches;

    return \@branches;
}

# Convert svn revision number to (abbreviated) git SHA-1 commit ID.
sub rev2sha
{
    my ($rev) = @_;

    # Start by looking in the trunk assuming that most revisions will be found
    # there.
    my $sha = qx{git svn find-rev r$rev svn/trunk};
    if (!$sha) {
        my $branches = get_svn_branches;

        foreach my $branch (@$branches) {
            $sha = qx{git svn find-rev r$rev $branch};

            last if $sha
        }
    }

    if (!$sha) {
        warn "\nRevision $rev not found in svn history " .
                "while rewriting $ENV{GIT_COMMIT}.\n";
        return $rev;
    }

    chomp $sha;

    # If we're used as git filter-branch message filter, this revision could
    # have been already rewritten, so check if we shouldn't use the new SHA-1
    # for it.
    if (-d '.git-rewrite') {
        if (open my $fh, '<', ".git-rewrite/map/$sha") {
            $sha = <$fh>;
        } else {
            warn "\nCommit $sha corresponding to r$rev not mapped yet " .
                    "while rewriting $ENV{GIT_COMMIT}.\n";
            return "r$rev";
        }
    }

    # Abbreviate it to a reasonably short, yet still usually unique, prefix.
    return substr($sha, 0, 7)
}

# Check that we have a valid GIT_DIR: this is taken care of automatically if
# we're being ran from "git filter-branch" but set it to the current directory
# to allow testing the script manually too.
my $git_dir = defined $ENV{GIT_DIR} ? $ENV{GIT_DIR} : getcwd() . "/.git";

die "Must run from git svn repository.\n" unless -d "$git_dir/svn/refs/remotes/svn";

# Change to the root repository directory to allow accessing .git-rewrite
# inside rev2sha using relative path.
chdir "$git_dir/..";

# Get the entire commit message at once.
$_ = do { local $/; <STDIN> };

# The svn revision of the current commit.
my ($current_rev) = m/^git-svn-id: [^ ]+@(\d+) /ms;

# Get rid of git-svn-id and the blank lines after it.
s/^git-svn-id: .*//ms;

# This also leaves us with the blank line before it, drop it too.
chomp;

# Special hack for the artificial commits generated by cvs2svn: they have the
# same date as the previous commit and "git filter-branch ... -- --date-order"
# sorts them in the wrong order so that the commit referencing the previous
# one is processed first. Avoid this by this hack.
s{(This commit was generated by cvs2svn to compensate for changes in) r\d+,}
 {$1 the previous commit,};

# Finally translate all revision references to git commits: to avoid false
# positives, we normally look for "r123" and similar but we also consider that
# any run of 5 or more digits corresponds to a revision (4 digits could be a
# year though).
s/(^|\s)(\d{5,})\b/$1 . ($2 < $current_rev ? rev2sha($2) : $2)/eg;
s/(?:\br|rev\s*|revision\s*)(\d+)([^._]|[.](?:\D|$))/rev2sha($1) . $2/eg;

print
