patx/gitman

improve repo uploads and large pushes (limited)

Commit 59b52d5 · patx · 2026-05-07T21:31:14Z

Changeset
59b52d57f0830fc473d4dff7bba5538b272c787f
Parents
5e3fa23589fbf454ead60c38c81944bbc110c501

View source at this commit

Comments

No comments yet.

Log in to comment

Diff

diff --git a/app.py b/app.py
index 15e4570..0b0ce34 100644
--- a/app.py
+++ b/app.py
@@ -69,9 +69,12 @@ SQLITE_BUSY_TIMEOUT_MS = 30_000
 MAX_FORM_BYTES = env_int("GITMAN_MAX_FORM_BYTES", 64 * 1024)
 MAX_IMPORT_BYTES = env_int("GITMAN_MAX_IMPORT_BYTES", 2 * 1024 * 1024 * 1024)
 IMPORT_UPLOAD_CHUNK_BYTES = 1024 * 1024
+IMPORT_UPLOAD_STALE_SECONDS = env_int("GITMAN_IMPORT_UPLOAD_STALE_SECONDS", 6 * 60 * 60, minimum=60)
 GIT_IMPORT_TIMEOUT_SECONDS = env_int("GITMAN_IMPORT_TIMEOUT_SECONDS", 3600, minimum=1)
 GIT_IMPORT_NICE = env_int("GITMAN_GIT_IMPORT_NICE", 10, minimum=0)
 GIT_IMPORT_PACK_THREADS = env_int("GITMAN_GIT_IMPORT_PACK_THREADS", 1, minimum=1)
+GIT_HTTP_NICE = env_int("GITMAN_GIT_HTTP_NICE", 10, minimum=0)
+GIT_HTTP_PACK_THREADS = env_int("GITMAN_GIT_HTTP_PACK_THREADS", 1, minimum=1)
 MAX_RENDER_BYTES = env_int("GITMAN_MAX_RENDER_BYTES", 256 * 1024)
 MAX_GIT_RESPONSE_BYTES = env_int("GITMAN_MAX_GIT_RESPONSE_BYTES", 256 * 1024 * 1024)
 PERF_LOG_THRESHOLD_MS = env_int("GITMAN_PERF_LOG_THRESHOLD_MS", 250, minimum=0)
@@ -1592,6 +1595,17 @@ def run_git_import(args, cwd=None, check=True):
         raise GitCommandError("Git import timed out.", 124) from exc
 
 
+def apply_git_config_env(env, git_config):
+    if not git_config:
+        return
+    base_count = int(env.get("GIT_CONFIG_COUNT", "0") or 0)
+    for offset, (key, value) in enumerate(git_config.items()):
+        index = base_count + offset
+        env[f"GIT_CONFIG_KEY_{index}"] = str(key)
+        env[f"GIT_CONFIG_VALUE_{index}"] = str(value)
+    env["GIT_CONFIG_COUNT"] = str(base_count + len(git_config))
+
+
 def repo_is_empty(path):
     return commit_count(path) == 0
 
@@ -1629,9 +1643,22 @@ def save_bundle_upload(upload, destination):
 def import_upload_chunks_dir():
     path = Path(tempfile.gettempdir()) / "gitman-import-chunks"
     path.mkdir(mode=0o700, parents=True, exist_ok=True)
+    cleanup_stale_upload_chunks(path)
     return path
 
 
+def cleanup_stale_upload_chunks(path):
+    if not IMPORT_UPLOAD_STALE_SECONDS:
+        return
+    cutoff = time.time() - IMPORT_UPLOAD_STALE_SECONDS
+    for chunk_path in path.glob("*.bundle"):
+        try:
+            if chunk_path.stat().st_mtime < cutoff:
+                chunk_path.unlink()
+        except OSError:
+            pass
+
+
 def parse_nonnegative_int(value, name):
     try:
         parsed = int(value)
@@ -1663,6 +1690,22 @@ def save_upload_chunk(source, destination, expected_size, offset):
     return written
 
 
+def discard_upload_chunk(source, expected_size):
+    remaining = expected_size
+    while remaining > 0:
+        chunk = source.read(min(IMPORT_UPLOAD_CHUNK_BYTES, remaining))
+        if not chunk:
+            break
+        remaining -= len(chunk)
+
+
+def import_complete_upload_chunk(repo, path, filename, chunk_path):
+    with chunk_path.open("rb") as bundle_file:
+        import_git_bundle(repo, path, StreamingUpload(filename, bundle_file))
+    updated_repo = get_repo(repo["owner_username"], repo["name"])
+    return render_repo_settings_page(updated_repo, path, notice="Git bundle imported.")
+
+
 def bundle_ref_names(bundle_path):
     completed = run_git_import(["bundle", "list-heads", str(bundle_path)])
     refs = []
@@ -3775,11 +3818,51 @@ def git_http_backend_executable():
     raise GitCommandError("git-http-backend executable was not found.")
 
 
-def git_http_backend_response(repo, auth_user):
+def spool_request_body(source, expected_size=0):
+    body_file = tempfile.TemporaryFile()
+    size = 0
+    remaining = expected_size if expected_size > 0 else None
+    try:
+        while remaining is None or remaining > 0:
+            read_size = IMPORT_UPLOAD_CHUNK_BYTES if remaining is None else min(IMPORT_UPLOAD_CHUNK_BYTES, remaining)
+            chunk = source.read(read_size)
+            if not chunk:
+                break
+            body_file.write(chunk)
+            size += len(chunk)
+            if remaining is not None:
+                remaining -= len(chunk)
+        body_file.seek(0)
+        if expected_size > 0 and size != expected_size:
+            body_file.close()
+            raise ValueError("Git request body was incomplete.")
+        return body_file, size
+    except Exception:
+        body_file.close()
+        raise
+
+
+def git_http_backend_response(repo, auth_user, on_success=None, buffer_response=False):
     mount = f"/git/{repo['owner_username']}/{repo['name']}"
     original_path = request.environ.get("PATH_INFO", request.path)
     rest = original_path[len(mount) :] if original_path.startswith(mount) else ""
-    body = request.body.read() if request.method == "POST" else b""
+    service = git_service_from_request()
+    debug_receive = service == "git-receive-pack"
+    body_file = None
+    body_size = 0
+    if request.method == "POST":
+        body_file, body_size = spool_request_body(request.environ["wsgi.input"], request_content_length())
+        if debug_receive:
+            log_perf(
+                "git-receive-request",
+                PERF_LOG_THRESHOLD_MS / 1000 if PERF_LOG_THRESHOLD_MS else 1,
+                (
+                    f"path={original_path} content_length={request_content_length()} "
+                    f"spooled={body_size} content_type={request.environ.get('CONTENT_TYPE', '')!r} "
+                    f"transfer_encoding={request.get_header('Transfer-Encoding', '')!r} "
+                    f"expect={request.get_header('Expect', '')!r}"
+                ),
+            )
 
     env = git_env()
     env.update(
@@ -3790,7 +3873,7 @@ def git_http_backend_response(repo, auth_user):
             "REQUEST_METHOD": request.method,
             "QUERY_STRING": request.query_string or "",
             "CONTENT_TYPE": request.environ.get("CONTENT_TYPE", ""),
-            "CONTENT_LENGTH": str(len(body)),
+            "CONTENT_LENGTH": str(body_size),
             "REMOTE_ADDR": request.environ.get("REMOTE_ADDR", ""),
         }
     )
@@ -3802,18 +3885,20 @@ def git_http_backend_response(repo, auth_user):
     content_encoding = request.get_header("Content-Encoding", "")
     if content_encoding:
         env["HTTP_CONTENT_ENCODING"] = content_encoding
+    apply_git_config_env(env, {"pack.threads": GIT_HTTP_PACK_THREADS})
 
     stderr_file = tempfile.TemporaryFile()
+    command = [git_http_backend_executable()]
+    if GIT_HTTP_NICE > 0:
+        command = ["nice", "-n", str(GIT_HTTP_NICE), *command]
     process = subprocess.Popen(
-        [git_http_backend_executable()],
-        stdin=subprocess.PIPE,
+        command,
+        stdin=body_file if body_file else subprocess.DEVNULL,
         stdout=subprocess.PIPE,
         stderr=stderr_file,
         env=env,
     )
     try:
-        process.stdin.write(body)
-        process.stdin.close()
         header_lines = []
         while True:
             line = process.stdout.readline()
@@ -3828,10 +3913,18 @@ def git_http_backend_response(repo, auth_user):
             stderr_file.seek(0)
             message = stderr_file.read().decode("utf-8", "replace").strip() or "Git HTTP backend failed."
             raise GitCommandError(message, process.returncode)
+        if debug_receive:
+            log_perf(
+                "git-receive-headers",
+                PERF_LOG_THRESHOLD_MS / 1000 if PERF_LOG_THRESHOLD_MS else 1,
+                f"returncode={process.poll()} headers={header_lines!r}",
+            )
     except Exception:
         process.kill()
         process.wait()
         stderr_file.close()
+        if body_file:
+            body_file.close()
         raise
 
     status_code = 200
@@ -3850,6 +3943,53 @@ def git_http_backend_response(repo, auth_user):
         else:
             headers[key] = value
 
+    if buffer_response:
+        body_chunks = []
+        response_size = 0
+        try:
+            while True:
+                chunk = process.stdout.read(1024 * 1024)
+                if not chunk:
+                    break
+                response_size += len(chunk)
+                if MAX_GIT_RESPONSE_BYTES and response_size > MAX_GIT_RESPONSE_BYTES:
+                    raise GitResponseTooLarge("Git response too large.")
+                body_chunks.append(chunk)
+            process.wait()
+            stderr_file.seek(0)
+            stderr = stderr_file.read().decode("utf-8", "replace").strip()
+            if debug_receive:
+                log_perf(
+                    "git-receive-complete",
+                    PERF_LOG_THRESHOLD_MS / 1000 if PERF_LOG_THRESHOLD_MS else 1,
+                    (
+                        f"returncode={process.returncode} status={status_code} "
+                        f"response_size={response_size} stderr={stderr!r}"
+                    ),
+                )
+            if process.returncode != 0:
+                log_perf(
+                    "git-http-backend-error",
+                    PERF_LOG_THRESHOLD_MS / 1000 if PERF_LOG_THRESHOLD_MS else 1,
+                    stderr or f"returncode={process.returncode}",
+                )
+                if not body_chunks:
+                    return HTTPResponse(
+                        (stderr or "Git HTTP backend failed.") + "\n",
+                        status=500,
+                        content_type="text/plain; charset=utf-8",
+                    )
+            if process.returncode == 0 and status_code < 400 and on_success:
+                on_success()
+            return HTTPResponse(body=b"".join(body_chunks), status=status_code, headers=headers)
+        finally:
+            if process.poll() is None:
+                process.kill()
+                process.wait()
+            stderr_file.close()
+            if body_file:
+                body_file.close()
+
     def stream_git_response():
         try:
             while True:
@@ -3858,11 +3998,26 @@ def git_http_backend_response(repo, auth_user):
                     break
                 yield chunk
             process.wait()
+            if process.returncode != 0:
+                stderr_file.seek(0)
+                stderr = stderr_file.read().decode("utf-8", "replace").strip()
+                log_perf(
+                    "git-http-backend-error",
+                    PERF_LOG_THRESHOLD_MS / 1000 if PERF_LOG_THRESHOLD_MS else 1,
+                    stderr or f"returncode={process.returncode}",
+                )
+            if process.returncode == 0 and status_code < 400 and on_success:
+                try:
+                    on_success()
+                except Exception as exc:
+                    log_perf("git-http-success-hook-error", PERF_LOG_THRESHOLD_MS / 1000 if PERF_LOG_THRESHOLD_MS else 1, str(exc))
         finally:
             if process.poll() is None:
                 process.kill()
                 process.wait()
             stderr_file.close()
+            if body_file:
+                body_file.close()
 
     return HTTPResponse(body=stream_git_response(), status=status_code, headers=headers)
 
@@ -4147,7 +4302,38 @@ def repo_settings_import_bundle_chunk(owner, repo_name):
 
     current_size = chunk_path.stat().st_size if chunk_path.exists() else 0
     if current_size != offset:
-        abort(409, "Upload chunk offset mismatch.")
+        if current_size < offset:
+            discard_upload_chunk(request.environ["wsgi.input"], chunk_size)
+            if offset + chunk_size >= total and not repo_is_empty(path):
+                updated_repo = get_repo(owner, repo_name)
+                return render_repo_settings_page(updated_repo, path, notice="Git bundle imported.")
+            return HTTPResponse(
+                "Upload chunk offset mismatch.\n",
+                status=409,
+                content_type="text/plain; charset=utf-8",
+            )
+        if current_size >= offset + chunk_size:
+            discard_upload_chunk(request.environ["wsgi.input"], chunk_size)
+            if current_size >= total:
+                if not repo_is_empty(path):
+                    updated_repo = get_repo(owner, repo_name)
+                    return render_repo_settings_page(updated_repo, path, notice="Git bundle imported.")
+                try:
+                    return import_complete_upload_chunk(repo, path, filename, chunk_path)
+                except UploadTooLarge as exc:
+                    abort(413, str(exc))
+                except (ValueError, GitCommandError) as exc:
+                    return HTTPResponse(f"{exc}\n", status=400, content_type="text/plain; charset=utf-8")
+            return HTTPResponse("OK\n", content_type="text/plain; charset=utf-8")
+        try:
+            with chunk_path.open("r+b") as target:
+                target.truncate(offset)
+        except OSError:
+            return HTTPResponse(
+                "Upload chunk offset mismatch.\n",
+                status=409,
+                content_type="text/plain; charset=utf-8",
+            )
 
     try:
         save_upload_chunk(request.environ["wsgi.input"], chunk_path, chunk_size, offset)
@@ -4157,10 +4343,7 @@ def repo_settings_import_bundle_chunk(owner, repo_name):
         if current_size != total:
             raise ValueError("Upload size mismatch.")
 
-        with chunk_path.open("rb") as bundle_file:
-            import_git_bundle(repo, path, StreamingUpload(filename, bundle_file))
-        updated_repo = get_repo(owner, repo_name)
-        return render_repo_settings_page(updated_repo, path, notice="Git bundle imported.")
+        return import_complete_upload_chunk(repo, path, filename, chunk_path)
     except UploadTooLarge as exc:
         abort(413, str(exc))
     except (ValueError, GitCommandError) as exc:
@@ -4768,14 +4951,22 @@ def git_http(owner, repo_name, git_path=""):
         clear_auth_failures("git", auth_user["username"])
         prepare_repo_for_receive(repo_path(owner, repo_name))
 
-    git_response = git_http_backend_response(repo, auth_user)
-    if is_write and getattr(git_response, "status_code", 200) < 400:
+    def after_successful_write():
         now = utcnow()
         with db_connect() as conn:
             conn.execute("UPDATE repositories SET updated_at = ? WHERE id = ?", (now, repo["id"]))
         mark_repo_indexing(repo["id"], repo_path(owner, repo_name))
         schedule_repo_metadata_refresh(repo["id"])
-    return git_response
+
+    try:
+        return git_http_backend_response(
+            repo,
+            auth_user,
+            on_success=after_successful_write if is_write else None,
+            buffer_response=is_write,
+        )
+    except ValueError as exc:
+        return HTTPResponse(f"{exc}\n", status=400, content_type="text/plain; charset=utf-8")
 
 
 @app.error(404)
diff --git a/templates/base.tpl b/templates/base.tpl
index 0e8329d..d51af42 100644
--- a/templates/base.tpl
+++ b/templates/base.tpl
@@ -410,6 +410,15 @@
         document.close();
       };
 
+      const responseMessage = async (response, fallback) => {
+        try {
+          const text = (await response.text()).trim();
+          return text || fallback;
+        } catch (error) {
+          return fallback;
+        }
+      };
+
       forms.forEach((form) => {
         form.addEventListener("submit", async (event) => {
           const input = form.querySelector("[data-import-bundle-file]");
@@ -464,7 +473,10 @@
                     body: file.slice(offset, end),
                   });
                   if (response.ok) break;
-                  lastError = new Error(await response.text());
+                  lastError = new Error(
+                    await responseMessage(response, "Upload failed.")
+                  );
+                  if (response.status >= 400 && response.status < 500) break;
                 } catch (error) {
                   lastError = error;
                 }
@@ -490,7 +502,10 @@
           } catch (error) {
             if (status) {
               status.className = "alert";
-              status.textContent = "Upload failed. Check your connection and try again.";
+              status.textContent =
+                error && error.message
+                  ? error.message
+                  : "Upload failed. Check your connection and try again.";
               status.hidden = false;
             }
             if (button) {
diff --git a/tests/test_app.py b/tests/test_app.py
index 4ab6ef9..7638fab 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -292,6 +292,55 @@ def post_bundle_import(client, owner, repo_name, content, filename="repo.bundle"
     )
 
 
+def post_bundle_import_chunks(client, owner, repo_name, content, filename="repo.bundle", chunk_size=10):
+    if hasattr(content, "read_bytes"):
+        content = content.read_bytes()
+    upload_id = "test-upload"
+    response = None
+    for offset in range(0, len(content), chunk_size):
+        chunk = content[offset : offset + chunk_size]
+        query = urlencode(
+            {
+                "filename": filename,
+                "upload_id": upload_id,
+                "offset": offset,
+                "total": len(content),
+            }
+        )
+        response = client.request(
+            "POST",
+            f"/{owner}/{repo_name}/settings/import-bundle/chunk?{query}",
+            headers={
+                "Content-Type": "application/octet-stream",
+                "X-CSRF-Token": client.csrf_token or "",
+            },
+            raw_body=chunk,
+        )
+        if response.status_code != 200 or offset + len(chunk) >= len(content):
+            return response
+    return response
+
+
+def post_bundle_import_chunk(client, owner, repo_name, chunk, offset, total, upload_id="test-upload"):
+    query = urlencode(
+        {
+            "filename": "repo.bundle",
+            "upload_id": upload_id,
+            "offset": offset,
+            "total": total,
+        }
+    )
+    return client.request(
+        "POST",
+        f"/{owner}/{repo_name}/settings/import-bundle/chunk?{query}",
+        headers={
+            "Content-Type": "application/octet-stream",
+            "X-CSRF-Token": client.csrf_token or "",
+        },
+        raw_body=chunk,
+    )
+
+
 def basic_auth(username, password):
     token = base64.b64encode(f"{username}:{password}".encode("utf-8")).decode("ascii")
     return {"Authorization": f"Basic {token}"}
@@ -827,6 +876,62 @@ def test_import_git_bundle_maps_origin_remote_branches(isolated_app, tmp_path):
     assert "feature.txt" in isolated_app.git_files(imported_path, "refs/heads/feature")
 
 
+def test_import_git_bundle_chunked_upload_preserves_refs(isolated_app, tmp_path):
+    owner = create_user("alice")
+    isolated_app.create_repository(owner, "chunked-import", "")
+    bundle_path = tmp_path / "repo.bundle"
+    nodes = create_bundle_with_refs(bundle_path)
+
+    client = WsgiClient(isolated_app.app)
+    login_client(client, "alice")
+    client.get("/alice/chunked-import/settings")
+
+    response = post_bundle_import_chunks(client, "alice", "chunked-import", bundle_path, chunk_size=64)
+
+    assert response.status_code == 200
+    assert "Git bundle imported." in response.text
+    imported_path = isolated_app.repo_path("alice", "chunked-import")
+    assert isolated_app.repo_has_revision(imported_path, nodes["main"])
+    assert isolated_app.repo_has_revision(imported_path, nodes["feature"])
+
+
+def test_import_git_bundle_chunked_upload_returns_specific_import_error(isolated_app):
+    owner = create_user("alice")
+    isolated_app.create_repository(owner, "empty", "")
+
+    client = WsgiClient(isolated_app.app)
+    login_client(client, "alice")
+    client.get("/alice/empty/settings")
+
+    response = post_bundle_import_chunks(client, "alice", "empty", b"not a git bundle")
+
+    assert response.status_code == 400
+    assert response.text == "Uploaded file is not a valid Git bundle.\n"
+
+
+def test_import_git_bundle_final_chunk_retry_after_completed_import_returns_success(isolated_app):
+    owner = create_user("alice")
+    isolated_app.create_repository(owner, "already-imported", "")
+    commit_file(isolated_app.repo_path("alice", "already-imported"), "README.md", "# Imported\n")
+
+    client = WsgiClient(isolated_app.app)
+    login_client(client, "alice")
+    client.get("/alice/already-imported/settings")
+
+    response = post_bundle_import_chunk(
+        client,
+        "alice",
+        "already-imported",
+        b"final",
+        offset=100,
+        total=105,
+    )
+
+    assert response.status_code == 200
+    assert "Git bundle imported." in response.text
+    assert "Upload chunk offset mismatch." not in response.text
+
+
 def test_import_git_bundle_rejects_invalid_upload_without_replacing_repo(isolated_app):
     owner = create_user("alice")
     isolated_app.create_repository(owner, "empty", "")