patx/gitman

fix file uploads for large repo bundles

Commit 11a2984 · patx · 2026-05-07T03:16:12Z

Changeset
11a2984367e64b8ee8c485614362d8f59187c362
Parents
10a4b500c460fdcb8b542d5ce58d9cdbee039613

View source at this commit

Comments

No comments yet.

Log in to comment

Diff

diff --git a/README.md b/README.md
index aac786b..026d1a9 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ Pages-style static hosting is driven by the Git repository contents. A user site
 - `GITMAN_GIT_BINARY`: Git executable name or full path, default `git`
 - `GITMAN_PAGES_DOMAIN`: wildcard Pages domain, default `gitman.io`
 - `GITMAN_MAX_FORM_BYTES`: maximum browser form body size, default `65536`
-- `GITMAN_MAX_IMPORT_BYTES`: maximum Git bundle import upload size, default `5368709120`
+- `GITMAN_MAX_IMPORT_BYTES`: maximum Git bundle import upload size, default `2147483648`
 - `GITMAN_IMPORT_TIMEOUT_SECONDS`: maximum Git bundle verify/fetch time, default `3600`
 - `GITMAN_GUNICORN_TIMEOUT_SECONDS`: Gunicorn worker timeout, default `GITMAN_IMPORT_TIMEOUT_SECONDS + 300`
 - `GITMAN_MAX_RENDER_BYTES`: maximum file preview size, default `262144`
diff --git a/app.py b/app.py
index 6abf43f..cb1824f 100644
--- a/app.py
+++ b/app.py
@@ -64,7 +64,7 @@ DEBUG = env_bool("GITMAN_DEBUG")
 PASSWORD_ITERATIONS = 260_000
 SQLITE_BUSY_TIMEOUT_MS = 30_000
 MAX_FORM_BYTES = env_int("GITMAN_MAX_FORM_BYTES", 64 * 1024)
-MAX_IMPORT_BYTES = env_int("GITMAN_MAX_IMPORT_BYTES", 1 * 1024 * 1024 * 1024)
+MAX_IMPORT_BYTES = env_int("GITMAN_MAX_IMPORT_BYTES", 2 * 1024 * 1024 * 1024)
 IMPORT_UPLOAD_CHUNK_BYTES = 1024 * 1024
 GIT_IMPORT_TIMEOUT_SECONDS = env_int("GITMAN_IMPORT_TIMEOUT_SECONDS", 3600, minimum=1)
 MAX_RENDER_BYTES = env_int("GITMAN_MAX_RENDER_BYTES", 256 * 1024)
@@ -259,6 +259,9 @@ class StreamingUpload:
         self.file = file
 
 
+UPLOAD_ID_RE = re.compile(r"^[A-Za-z0-9._-]{1,80}$")
+
+
 def validate_startup_config():
     if not DEBUG and SECRET_KEY == DEFAULT_SECRET_KEY:
         raise RuntimeError("SECRET_KEY must be set to a non-default value when GITMAN_DEBUG is disabled.")
@@ -614,7 +617,13 @@ def is_repo_settings_multipart_request():
 
 def is_repo_settings_import_stream_request():
     path = request.environ.get("PATH_INFO", request.path) or ""
-    return path.endswith("/settings/import-bundle") and request_content_type().startswith("application/octet-stream")
+    return (
+        (
+            path.endswith("/settings/import-bundle")
+            or path.endswith("/settings/import-bundle/chunk")
+        )
+        and request_content_type().startswith("application/octet-stream")
+    )
 
 
 def browser_post_size_limit():
@@ -1526,6 +1535,38 @@ def save_bundle_upload(upload, destination):
         raise ValueError("Uploaded bundle is empty.")
 
 
+def import_upload_chunks_dir():
+    path = Path(tempfile.gettempdir()) / "gitman-import-chunks"
+    path.mkdir(mode=0o700, parents=True, exist_ok=True)
+    return path
+
+
+def parse_nonnegative_int(value, name):
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        abort(400, f"Invalid {name}.")
+    if parsed < 0:
+        abort(400, f"Invalid {name}.")
+    return parsed
+
+
+def save_upload_chunk(source, destination, expected_size):
+    written = 0
+    with destination.open("ab") as target:
+        remaining = expected_size
+        while remaining > 0:
+            chunk = source.read(min(IMPORT_UPLOAD_CHUNK_BYTES, remaining))
+            if not chunk:
+                break
+            target.write(chunk)
+            written += len(chunk)
+            remaining -= len(chunk)
+    if written != expected_size:
+        raise ValueError("Upload chunk was incomplete.")
+    return written
+
+
 def bundle_ref_names(bundle_path):
     completed = run_git_import(["bundle", "list-heads", str(bundle_path)])
     refs = []
@@ -3677,6 +3718,67 @@ def repo_settings_import_bundle(owner, repo_name):
         return render_repo_settings_page(repo, path, error=str(exc))
 
 
[email protected]("/<owner>/<repo_name>/settings/import-bundle/chunk")
+def repo_settings_import_bundle_chunk(owner, repo_name):
+    user = require_login()
+    repo = get_repo(owner, repo_name)
+    if not repo:
+        abort(404, "Repository not found.")
+    if not user_owns_repo(user, repo):
+        abort(403, "Only the owner can update repository settings.")
+    if not request_content_type().startswith("application/octet-stream"):
+        abort(400, "Git bundle uploads must use application/octet-stream.")
+
+    path = repo_path(owner, repo_name)
+    filename = os.path.basename(request.query.get("filename", "repo.bundle")) or "repo.bundle"
+    upload_id = request.query.get("upload_id", "")
+    if not UPLOAD_ID_RE.match(upload_id):
+        abort(400, "Invalid upload id.")
+
+    total = parse_nonnegative_int(request.query.get("total", ""), "total")
+    offset = parse_nonnegative_int(request.query.get("offset", ""), "offset")
+    chunk_size = request_content_length()
+    if total <= 0:
+        abort(400, "Upload is empty.")
+    if MAX_IMPORT_BYTES and total > MAX_IMPORT_BYTES:
+        abort(413, "Request body too large.")
+    if chunk_size <= 0 or offset + chunk_size > total:
+        abort(400, "Invalid upload chunk.")
+
+    chunks_dir = import_upload_chunks_dir()
+    chunk_path = chunks_dir / f"{user['id']}-{repo['id']}-{upload_id}.bundle"
+    if offset == 0 and chunk_path.exists():
+        chunk_path.unlink()
+
+    current_size = chunk_path.stat().st_size if chunk_path.exists() else 0
+    if current_size != offset:
+        abort(409, "Upload chunk offset mismatch.")
+
+    try:
+        save_upload_chunk(request.environ["wsgi.input"], chunk_path, chunk_size)
+        current_size = chunk_path.stat().st_size
+        if current_size < total:
+            return HTTPResponse("OK\n", content_type="text/plain; charset=utf-8")
+        if current_size != total:
+            raise ValueError("Upload size mismatch.")
+
+        with chunk_path.open("rb") as bundle_file:
+            import_git_bundle(repo, path, StreamingUpload(filename, bundle_file))
+        updated_repo = get_repo(owner, repo_name)
+        return render_repo_settings_page(updated_repo, path, notice="Git bundle imported.")
+    except UploadTooLarge as exc:
+        abort(413, str(exc))
+    except (ValueError, GitCommandError) as exc:
+        repo = get_repo(owner, repo_name) or repo
+        return render_repo_settings_page(repo, path, error=str(exc))
+    finally:
+        try:
+            if chunk_path.exists() and chunk_path.stat().st_size >= total:
+                chunk_path.unlink()
+        except OSError:
+            pass
+
+
 @app.route("/<owner>/<repo_name>/settings", method=["GET", "POST"])
 def repo_settings(owner, repo_name):
     user = require_login()
diff --git a/templates/base.tpl b/templates/base.tpl
index e3d91cb..d9ecca8 100644
--- a/templates/base.tpl
+++ b/templates/base.tpl
@@ -423,6 +423,7 @@
           const csrf = form.querySelector('input[name="_csrf_token"]');
           const url = new URL(form.dataset.uploadUrl, window.location.origin);
           url.searchParams.set("filename", file.name || "repo.bundle");
+          if (file.size <= 0) return;
 
           if (status) {
             status.className = "muted";
@@ -436,15 +437,55 @@
           }
 
           try {
-            const response = await fetch(url.toString(), {
-              method: "POST",
-              headers: {
-                "Content-Type": "application/octet-stream",
-                "X-CSRF-Token": csrf ? csrf.value : "",
-              },
-              body: file,
-            });
-            replaceDocument(await response.text());
+            const chunkSize = 16 * 1024 * 1024;
+            const uploadId = (
+              crypto.randomUUID ? crypto.randomUUID() : `${Date.now()}-${Math.random()}`
+            ).replace(/[^A-Za-z0-9._-]/g, "");
+            let offset = 0;
+            let finalResponse = null;
+
+            while (offset < file.size) {
+              const end = Math.min(offset + chunkSize, file.size);
+              url.searchParams.set("upload_id", uploadId);
+              url.searchParams.set("offset", String(offset));
+              url.searchParams.set("total", String(file.size));
+
+              let response = null;
+              let lastError = null;
+              for (let attempt = 1; attempt <= 4; attempt += 1) {
+                try {
+                  response = await fetch(url.toString(), {
+                    method: "POST",
+                    headers: {
+                      "Content-Type": "application/octet-stream",
+                      "X-CSRF-Token": csrf ? csrf.value : "",
+                    },
+                    body: file.slice(offset, end),
+                  });
+                  if (response.ok) break;
+                  lastError = new Error(await response.text());
+                } catch (error) {
+                  lastError = error;
+                }
+                if (attempt < 4) {
+                  if (status) status.textContent = `Retrying upload chunk... ${attempt}/3`;
+                  await new Promise((resolve) => setTimeout(resolve, attempt * 1000));
+                }
+              }
+              if (!response || !response.ok) throw lastError || new Error("Upload failed.");
+
+              offset = end;
+              if (offset < file.size) {
+                if (status) {
+                  status.textContent = `Uploading Git bundle... ${Math.floor((offset / file.size) * 100)}%`;
+                }
+              } else {
+                finalResponse = response;
+              }
+            }
+
+            if (status) status.textContent = "Importing Git bundle...";
+            replaceDocument(await finalResponse.text());
           } catch (error) {
             if (status) {
               status.className = "alert";
diff --git a/templates/repo_settings.tpl b/templates/repo_settings.tpl
index dade147..4517b3a 100644
--- a/templates/repo_settings.tpl
+++ b/templates/repo_settings.tpl
@@ -28,7 +28,7 @@
   <h2>Import Git bundle</h2>
   <p class="muted">Create a bundle from the source repository, then upload it here.</p>
   <pre>git bundle create repo.bundle --all</pre>
-  <form method="post" enctype="multipart/form-data" data-import-bundle-form data-upload-url="/{{repo['owner_username']}}/{{repo['name']}}/settings/import-bundle">
+  <form method="post" enctype="multipart/form-data" data-import-bundle-form data-upload-url="/{{repo['owner_username']}}/{{repo['name']}}/settings/import-bundle/chunk">
     {{!csrf_field()}}
     <input type="hidden" name="action" value="import_bundle">
     <label>