diff options
| author | Pascal Dulieu <pascal@dulieu.uk> | 2026-03-24 16:52:00 +0000 |
|---|---|---|
| committer | Pascal Dulieu <pascal@dulieu.uk> | 2026-03-24 16:52:00 +0000 |
| commit | 7732450be95eca4840a05346988c42f3c3511004 (patch) | |
| tree | fcc4b9f8cb539a3569667ad21a1bee2dc0366dd4 | |
| -rw-r--r-- | README.md | 24 | ||||
| -rwxr-xr-x | remux_xdcam_av.py | 268 |
2 files changed, 292 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..c50ae40 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# XDCAM essence to single MXF + +Combines split XDCAM essence (`*V##.MXF` + matching `*A##.MXF`) into one `*_AV.mxf` using stream copy (`-c copy`, no re-encode) and puts them in the same folder as the clip. + +## Requirements + +- [FFmpeg](https://ffmpeg.org/) on PATH + +## Usage + +```bash +./remux_xdcam_av.py /path/to/media/clip +./remux_xdcam_av.py /path/to/media +``` + +## Options + +```plain +-o /path/to/out.mxf custom output path (single folder mode) +-n print ffmpeg command(s) only +--overwrite replace existing output +--no-follow-symlinks do not traverse symlinked dirs +--scan print folder discovery +``` diff --git a/remux_xdcam_av.py b/remux_xdcam_av.py new file mode 100755 index 0000000..40f5768 --- /dev/null +++ b/remux_xdcam_av.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +""" +Remux Sony XDCAM-style separate-essence MXF folders: one *V##.MXF (MPEG-2 HD422) +plus *A##.MXF mono PCM tracks, into a single MXF with stream copy (no re-encode). + +Ignores substream (*S##.MXF), proxy/metadata, XML/SMI, etc. + +Run on a clip folder, or on a parent folder to process every subdirectory that contains +a full V+A essence set (output is always next to the split essence files). +""" + +from __future__ import annotations + +import argparse +import os +import re +import subprocess +import sys +from pathlib import Path + +# Track index: one or more digits (V1 / V01 / V001) +VIDEO_RE = re.compile(r"^(.+?)V(\d{1,3})\.mxf$", re.IGNORECASE) +AUDIO_RE = re.compile(r"^(.+?)A(\d{1,3})\.mxf$", re.IGNORECASE) + +ClipParts = tuple[str, Path, list[tuple[int, Path]]] + + +def discover(folder: Path) -> tuple[list[tuple[str, int, Path]], list[tuple[str, int, Path]]]: + mxfs = sorted(folder.glob("*.[Mm][Xx][Ff]")) + videos: list[tuple[str, int, Path]] = [] + audios: list[tuple[str, int, Path]] = [] + for p in mxfs: + vm = VIDEO_RE.match(p.name) + am = AUDIO_RE.match(p.name) + if vm: + videos.append((vm.group(1), int(vm.group(2)), p)) + elif am: + audios.append((am.group(1), int(am.group(2)), p)) + return videos, audios + + +def pick_clip_videos(videos: list[tuple[str, int, Path]]) -> tuple[str, Path]: + if not videos: + raise ValueError("No video essence file matching *V##.MXF (e.g. C0006V01.MXF).") + by_prefix: dict[str, list[tuple[int, Path]]] = {} + for prefix, idx, path in videos: + by_prefix.setdefault(prefix, []).append((idx, path)) + if len(by_prefix) > 1: + raise ValueError( + "Multiple clip prefixes with video essence: " + + ", ".join(sorted(by_prefix)) + + ". Put one clip per folder or process folders separately." + ) + prefix = next(iter(by_prefix.keys())) + items = sorted(by_prefix[prefix], key=lambda x: x[0]) + if len(items) > 1: + raise ValueError( + f"Multiple video essence files for prefix {prefix!r}: " + + ", ".join(p.name for _, p in items) + + ". Expected a single V track per remux." + ) + return prefix, items[0][1] + + +def resolve_clip_required(folder: Path) -> ClipParts: + videos, audios = discover(folder) + if not videos: + sys.exit(f"No video essence file (*V##.MXF) in {folder}") + try: + prefix, video_path = pick_clip_videos(videos) + except ValueError as e: + sys.exit(f"{folder}: {e}") + clip_audios = [(idx, p) for pfx, idx, p in audios if pfx == prefix] + clip_audios.sort(key=lambda x: x[0]) + if not clip_audios: + sys.exit(f"No audio essence *A##.MXF for clip prefix {prefix!r} in {folder}.") + return prefix, video_path, clip_audios + + +def try_resolve_clip(folder: Path) -> ClipParts | None: + videos, audios = discover(folder) + if not videos: + return None + try: + prefix, video_path = pick_clip_videos(videos) + except ValueError as e: + print(f"Skipping {folder}: {e}", file=sys.stderr) + return None + clip_audios = [(idx, p) for pfx, idx, p in audios if pfx == prefix] + clip_audios.sort(key=lambda x: x[0]) + if not clip_audios: + print( + f"Skipping {folder}: no audio essence *A##.MXF for prefix {prefix!r}.", + file=sys.stderr, + ) + return None + return prefix, video_path, clip_audios + + +def iter_clip_folders(root: Path, *, follow_symlinks: bool) -> list[tuple[Path, ClipParts]]: + found: list[tuple[Path, ClipParts]] = [] + for dirpath, _, _ in os.walk(root, followlinks=follow_symlinks): + p = Path(dirpath) + r = try_resolve_clip(p) + if r is not None: + found.append((p, r)) + return sorted(found, key=lambda x: str(x[0])) + + +def scan_tree(root: Path, *, follow_symlinks: bool) -> None: + """Print what is on disk: helps debug 'no clip folders found' (naming, symlinks, layout).""" + print(f"Scanning: {root}", file=sys.stderr) + print(f"follow_symlinks={follow_symlinks}", file=sys.stderr) + dirs_seen = 0 + dirs_with_mxf: list[tuple[str, list[str]]] = [] + for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks): + dirs_seen += 1 + mxf_files = sorted(f for f in filenames if f.lower().endswith(".mxf")) + if mxf_files: + dirs_with_mxf.append((dirpath, mxf_files)) + print(f"Directories visited: {dirs_seen}", file=sys.stderr) + print(f"Directories containing .mxf: {len(dirs_with_mxf)}", file=sys.stderr) + for d, names in dirs_with_mxf[:200]: + v_ok = sum(1 for n in names if VIDEO_RE.match(n)) + a_ok = sum(1 for n in names if AUDIO_RE.match(n)) + print(f" {d}", file=sys.stderr) + print(f" files={len(names)} names_like_*V*#={v_ok} *A*#={a_ok}", file=sys.stderr) + if v_ok == 0 or a_ok == 0: + print(f" sample: {names[:8]}{'…' if len(names) > 8 else ''}", file=sys.stderr) + if len(dirs_with_mxf) > 200: + print(f" … {len(dirs_with_mxf) - 200} more directories with .mxf not shown", file=sys.stderr) + + +def build_cmd(parts: ClipParts, out: Path, *, overwrite: bool) -> list[str]: + prefix, video_path, clip_audios = parts + inputs: list[Path] = [video_path] + [p for _, p in clip_audios] + cmd: list[str] = ["ffmpeg", "-hide_banner", "-y" if overwrite else "-n"] + for p in inputs: + cmd.extend(["-i", str(p)]) + cmd.append("-map") + cmd.append("0:v:0") + for i in range(1, len(inputs)): + cmd.extend(["-map", f"{i}:a:0"]) + cmd.extend(["-c", "copy", "-shortest", "-f", "mxf", str(out)]) + return cmd + + +def run_remux( + folder: Path, + parts: ClipParts, + output: Path | None, + dry_run: bool, + *, + overwrite: bool, +) -> int: + prefix, video_path, clip_audios = parts + out = output if output is not None else folder / f"{prefix}_AV.mxf" + + print("Clip prefix:", prefix) + print("Folder:", folder) + print("Video:", video_path.name) + print("Audio tracks (channel order):", ", ".join(f"A{a:02d}" for a, _ in clip_audios)) + print("Output:", out) + if not dry_run and out.exists() and not overwrite: + print() + print(f"Skipping — output already exists: {out}", file=sys.stderr) + print("Use --overwrite to replace it.", file=sys.stderr) + return 0 + cmd = build_cmd(parts, out, overwrite=overwrite) + if dry_run: + print() + print(subprocess.list2cmdline(cmd)) + return 0 + r = subprocess.run(cmd) + return r.returncode + + +def main() -> None: + ap = argparse.ArgumentParser( + description=( + "Remux XDCAM folder (*V## + *A## MXF) to one MXF per clip, codecs copied. " + "Give a clip folder, or a parent folder to scan recursively for clip folders." + ), + ) + ap.add_argument( + "folder", + type=Path, + help="Clip folder with essence MXF files, or a parent folder to scan", + ) + ap.add_argument( + "-o", + "--output", + type=Path, + help=( + "Output MXF path (only when FOLDER is a single clip folder; " + "not used when scanning subfolders)" + ), + ) + ap.add_argument( + "-n", + "--dry-run", + action="store_true", + help="Print ffmpeg command(s) and exit", + ) + ap.add_argument( + "--overwrite", + action="store_true", + help="Replace existing output MXF (default: skip if *_AV.mxf already exists)", + ) + ap.add_argument( + "--no-follow-symlinks", + action="store_true", + help=( + "Do not walk into symlinked directories. By default symlinks are followed " + "(needed on some volumes where clip folders point at the real media)." + ), + ) + ap.add_argument( + "--scan", + action="store_true", + help="List directories that contain .mxf and how many match *V* / *A* patterns; then exit", + ) + args = ap.parse_args() + root = args.folder.expanduser().resolve() + if not root.is_dir(): + sys.exit(f"Not a directory: {root}") + + follow_symlinks = not args.no_follow_symlinks + + if args.scan: + scan_tree(root, follow_symlinks=follow_symlinks) + raise SystemExit(0) + + if args.output is not None: + parts = resolve_clip_required(root) + raise SystemExit( + run_remux( + root, + parts, + args.output.expanduser().resolve(), + args.dry_run, + overwrite=args.overwrite, + ) + ) + + items = iter_clip_folders(root, follow_symlinks=follow_symlinks) + if not items: + sys.exit( + f"No clip folders found under {root} (need *V#.MXF + matching *A#.MXF in the same folder).\n" + f"Re-run with --scan on that path to list .mxf locations and naming; " + f"by default symlinked folders are followed (avoid --no-follow-symlinks unless needed)." + ) + + exit_code = 0 + for i, (clip_dir, parts) in enumerate(items): + if len(items) > 1: + print() + print("=" * 72) + print(f"[{i + 1}/{len(items)}] {clip_dir}") + print("=" * 72) + rc = run_remux(clip_dir, parts, None, args.dry_run, overwrite=args.overwrite) + if rc != 0: + exit_code = rc + raise SystemExit(exit_code) + + +if __name__ == "__main__": + main() |
