Issue with Git LFS Synchronization Performance Compared to SVN + Rsync (original) (raw)

February 25, 2025, 4:39pm 1

We have a self-hosted Git LFS server that uses an AWS S3 bucket for storage, currently holding around 700 GB of data. I also have 20 client servers (read-only) that use a script to fetch data from this Git server.

πŸ”§ Current Architecture:

πŸ—οΈ Previous SVN Architecture:


We moved away from SVN because rsync took too long to synchronize the 700 GB of dataβ€”it checked every file before updating modified ones, leading to significant delays. By switching to Git LFS, the goal was to reduce sync time since Git should only fetch differences between versions.

:x: Issues We’re Facing:

Scripts Used:

:one: rsynchMotionRessources.sh (Takes more than 4 hours to run)

# Main script setup
MOTION_DIR=/Users/ec2-user/Motion_Design

useGit() {
    echo "Using Git"
    init() {
        git config --global pull.rebase true
        git config --global credential.helper store
    }

    init_submodules() {
        echo "Started pulling modules at $(date +'%H:%M:%S')"
        git submodule update --init --recursive || { echo "ERROR: Git submodule update failed"; exit 1; }
    }

    clone() {
        echo "Started cloning at $(date +'%H:%M:%S')"
        rm -rf "$MOTION_DIR"
        git clone --depth=1 "https://oauth2:$MOTION_TOKEN@$GITLAB_ENDPOINT/Motion/$REPO.git" "$MOTION_DIR" --progress || { echo "ERROR: Cloning failed"; exit 1; }
        cd "$MOTION_DIR" || exit 1
        init_submodules
    }

    pull() {
        rm -rf "$MOTION_DIR/.git/modules/Ressources/index.lock"
        rm -rf "$MOTION_DIR/.git/index.lock"
        
        cd "$MOTION_DIR" || exit
        git reset --hard
        git checkout main
        git pull -X theirs origin main
    }

    if [ ! -d "$MOTION_DIR/.git" ]; then
        clone
    else
        cd "$MOTION_DIR" || exit 1
        pull
    fi
}

useGit

revised_rsynchMotionRessources.sh (Only fetches LFS metadata, not the actual files)

MOTION_DIR=/Users/ec2-user/Motion_Design

useGit() {
    echo "Using Git"
    init() {
        git config --global pull.rebase true
        git config --global credential.helper store
    }

    init_submodules() {
        echo "Started pulling modules at $(date +'%H:%M:%S')"
        git submodule update --init --recursive || { echo "Error: Git submodule update failed"; exit 1; }
    }

    clone() {
        echo "Started cloning at $(date +'%H:%M:%S')"
        rm -rf "$MOTION_DIR"
        git clone --depth=1 "https://oauth2:$MOTION_TOKEN@$GITLAB_ENDPOINT/Motion/$REPO.git" "$MOTION_DIR" --progress || { echo "Error: Cloning failed"; exit 1; }
        cd "$MOTION_DIR" || exit 1
        init_submodules
    }

    pull() {
        echo "Started fetching at $(date +'%H:%M:%S')"
        export GIT_LFS_SKIP_SMUDGE=1
        git fetch --depth=1 origin main --progress || { echo "Error: Git fetch failed"; exit 1; }
        git reset --hard origin/main || { echo "Error: Git reset failed"; exit 1; }

        export CHANGED_FILES=$(git diff --name-status --diff-filter=AMRDCT origin/main HEAD)
        echo "Changed files: $CHANGED_FILES"
        IFS=$'\n' read -rd '' -a FILE_ARRAY <<< "$CHANGED_FILES"

        for file in "${FILE_ARRAY[@]}"; do
            file_status=$(echo "$file" | awk '{print $1}')
            old_file=$(echo "$file" | awk '{print $2}')
            new_file=$(echo "$file" | awk '{print $3}')

            case $file_status in
                R) git checkout origin/main -- "$new_file" || { echo "Error checking out renamed file $new_file"; exit 1; } ;;
                D) [ -f "$old_file" ] && git rm "$old_file" ;;
                A|M|C|T) git checkout origin/main -- "$old_file" ;;
            esac
        done
    }

    if [ ! -d "$MOTION_DIR/.git" ]; then
        clone
    else
        cd "$MOTION_DIR" || exit 1
        pull
    fi
}

useGit

Looking for Suggestions:

Any help or insights would be greatly appreciated! πŸš€