patx/gitman
streaming git clones. improved large uploads mem/cpu usage
Commit 5e3fa23 · patx · 2026-05-07T05:54:10Z
Comments
No comments yet.
Diff
diff --git a/app.py b/app.py
index e1a2ff8..15e4570 100644
--- a/app.py
+++ b/app.py
@@ -1,4 +1,5 @@
import base64
+from contextlib import contextmanager
import datetime as dt
import hashlib
import hmac
@@ -69,6 +70,8 @@ MAX_FORM_BYTES = env_int("GITMAN_MAX_FORM_BYTES", 64 * 1024)
MAX_IMPORT_BYTES = env_int("GITMAN_MAX_IMPORT_BYTES", 2 * 1024 * 1024 * 1024)
IMPORT_UPLOAD_CHUNK_BYTES = 1024 * 1024
GIT_IMPORT_TIMEOUT_SECONDS = env_int("GITMAN_IMPORT_TIMEOUT_SECONDS", 3600, minimum=1)
+GIT_IMPORT_NICE = env_int("GITMAN_GIT_IMPORT_NICE", 10, minimum=0)
+GIT_IMPORT_PACK_THREADS = env_int("GITMAN_GIT_IMPORT_PACK_THREADS", 1, minimum=1)
MAX_RENDER_BYTES = env_int("GITMAN_MAX_RENDER_BYTES", 256 * 1024)
MAX_GIT_RESPONSE_BYTES = env_int("GITMAN_MAX_GIT_RESPONSE_BYTES", 256 * 1024 * 1024)
PERF_LOG_THRESHOLD_MS = env_int("GITMAN_PERF_LOG_THRESHOLD_MS", 250, minimum=0)
@@ -292,13 +295,23 @@ def configure_db_connection(conn):
conn.execute("PRAGMA synchronous = NORMAL")
-def db_connect():
+def open_db_connection():
conn = sqlite3.connect(DB_PATH, timeout=SQLITE_BUSY_TIMEOUT_MS / 1000)
conn.row_factory = sqlite3.Row
configure_db_connection(conn)
return conn
+@contextmanager
+def db_connect():
+ conn = open_db_connection()
+ try:
+ with conn:
+ yield conn
+ finally:
+ conn.close()
+
+
def init_db():
ensure_dirs()
with db_connect() as conn:
@@ -1445,11 +1458,17 @@ def log_perf(label, elapsed_seconds, detail=""):
print(f"[gitman-perf] {label} took {elapsed_ms}ms{suffix}", file=sys.stderr, flush=True)
-def run_git(args, cwd=None, timeout=15, check=True, text=True):
+def run_git(args, cwd=None, timeout=15, check=True, text=True, nice_value=0, git_config=None):
env = git_env()
+ command = [git_executable(env)]
+ for key, value in (git_config or {}).items():
+ command.extend(["-c", f"{key}={value}"])
+ command.extend(args)
+ if nice_value > 0:
+ command = ["nice", "-n", str(nice_value), *command]
started = time.monotonic()
completed = subprocess.run(
- [git_executable(env), *args],
+ command,
cwd=cwd,
capture_output=True,
text=text,
@@ -1559,8 +1578,16 @@ def fork_repository(owner, source_repo, name, description):
def run_git_import(args, cwd=None, check=True):
+ git_config = {"pack.threads": GIT_IMPORT_PACK_THREADS}
try:
- return run_git(args, cwd=cwd, timeout=GIT_IMPORT_TIMEOUT_SECONDS, check=check)
+ return run_git(
+ args,
+ cwd=cwd,
+ timeout=GIT_IMPORT_TIMEOUT_SECONDS,
+ check=check,
+ nice_value=GIT_IMPORT_NICE,
+ git_config=git_config,
+ )
except subprocess.TimeoutExpired as exc:
raise GitCommandError("Git import timed out.", 124) from exc
@@ -1615,7 +1642,7 @@ def parse_nonnegative_int(value, name):
return parsed
-def save_upload_chunk(source, destination, expected_size):
+def save_upload_chunk(source, destination, expected_size, offset):
written = 0
with destination.open("ab") as target:
remaining = expected_size
@@ -1627,6 +1654,11 @@ def save_upload_chunk(source, destination, expected_size):
written += len(chunk)
remaining -= len(chunk)
if written != expected_size:
+ try:
+ with destination.open("r+b") as target:
+ target.truncate(offset)
+ except OSError:
+ pass
raise ValueError("Upload chunk was incomplete.")
return written
@@ -3764,34 +3796,43 @@ def git_http_backend_response(repo, auth_user):
)
if auth_user:
env["REMOTE_USER"] = auth_user["username"]
-
- completed = subprocess.run(
+ git_protocol = request.get_header("Git-Protocol", "")
+ if git_protocol:
+ env["HTTP_GIT_PROTOCOL"] = git_protocol
+ content_encoding = request.get_header("Content-Encoding", "")
+ if content_encoding:
+ env["HTTP_CONTENT_ENCODING"] = content_encoding
+
+ stderr_file = tempfile.TemporaryFile()
+ process = subprocess.Popen(
[git_http_backend_executable()],
- input=body,
- capture_output=True,
- timeout=60,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=stderr_file,
env=env,
)
- if MAX_GIT_RESPONSE_BYTES and len(completed.stdout) > MAX_GIT_RESPONSE_BYTES:
- return HTTPResponse(
- "Git response too large.\n",
- status=413,
- content_type="text/plain; charset=utf-8",
- )
- if completed.returncode != 0 and not completed.stdout:
- message = completed.stderr.decode("utf-8", "replace").strip() or "Git HTTP backend failed."
- raise GitCommandError(message, completed.returncode)
-
- raw = completed.stdout
- if b"\r\n\r\n" in raw:
- header_bytes, response_body = raw.split(b"\r\n\r\n", 1)
- header_lines = header_bytes.decode("latin-1").split("\r\n")
- elif b"\n\n" in raw:
- header_bytes, response_body = raw.split(b"\n\n", 1)
- header_lines = header_bytes.decode("latin-1").split("\n")
- else:
+ try:
+ process.stdin.write(body)
+ process.stdin.close()
header_lines = []
- response_body = raw
+ while True:
+ line = process.stdout.readline()
+ if not line:
+ break
+ stripped = line.rstrip(b"\r\n")
+ if not stripped:
+ break
+ header_lines.append(stripped.decode("latin-1"))
+
+ if not header_lines and process.poll() is not None and process.returncode != 0:
+ stderr_file.seek(0)
+ message = stderr_file.read().decode("utf-8", "replace").strip() or "Git HTTP backend failed."
+ raise GitCommandError(message, process.returncode)
+ except Exception:
+ process.kill()
+ process.wait()
+ stderr_file.close()
+ raise
status_code = 200
headers = {}
@@ -3809,7 +3850,21 @@ def git_http_backend_response(repo, auth_user):
else:
headers[key] = value
- return HTTPResponse(body=response_body, status=status_code, headers=headers)
+ def stream_git_response():
+ try:
+ while True:
+ chunk = process.stdout.read(1024 * 1024)
+ if not chunk:
+ break
+ yield chunk
+ process.wait()
+ finally:
+ if process.poll() is None:
+ process.kill()
+ process.wait()
+ stderr_file.close()
+
+ return HTTPResponse(body=stream_git_response(), status=status_code, headers=headers)
@app.route("/static/<filename:path>")
@@ -4095,7 +4150,7 @@ def repo_settings_import_bundle_chunk(owner, repo_name):
abort(409, "Upload chunk offset mismatch.")
try:
- save_upload_chunk(request.environ["wsgi.input"], chunk_path, chunk_size)
+ save_upload_chunk(request.environ["wsgi.input"], chunk_path, chunk_size, offset)
current_size = chunk_path.stat().st_size
if current_size < total:
return HTTPResponse("OK\n", content_type="text/plain; charset=utf-8")
@@ -4109,8 +4164,7 @@ def repo_settings_import_bundle_chunk(owner, repo_name):
except UploadTooLarge as exc:
abort(413, str(exc))
except (ValueError, GitCommandError) as exc:
- repo = get_repo(owner, repo_name) or repo
- return render_repo_settings_page(repo, path, error=str(exc))
+ return HTTPResponse(f"{exc}\n", status=400, content_type="text/plain; charset=utf-8")
finally:
try:
if chunk_path.exists() and chunk_path.stat().st_size >= total:
diff --git a/templates/base.tpl b/templates/base.tpl
index d9ecca8..0e8329d 100644
--- a/templates/base.tpl
+++ b/templates/base.tpl
@@ -437,7 +437,7 @@
}
try {
- const chunkSize = 16 * 1024 * 1024;
+ const chunkSize = 4 * 1024 * 1024;
const uploadId = (
crypto.randomUUID ? crypto.randomUUID() : `${Date.now()}-${Math.random()}`
).replace(/[^A-Za-z0-9._-]/g, "");
@@ -452,8 +452,9 @@
let response = null;
let lastError = null;
- for (let attempt = 1; attempt <= 4; attempt += 1) {
+ for (let attempt = 1; attempt <= 8; attempt += 1) {
try {
+ url.searchParams.set("retry", String(attempt));
response = await fetch(url.toString(), {
method: "POST",
headers: {
@@ -467,9 +468,9 @@
} catch (error) {
lastError = error;
}
- if (attempt < 4) {
- if (status) status.textContent = `Retrying upload chunk... ${attempt}/3`;
- await new Promise((resolve) => setTimeout(resolve, attempt * 1000));
+ if (attempt < 8) {
+ if (status) status.textContent = `Retrying upload chunk... ${attempt}/7`;
+ await new Promise((resolve) => setTimeout(resolve, Math.min(attempt * 2000, 15000)));
}
}
if (!response || !response.ok) throw lastError || new Error("Upload failed.");