#!/usr/bin/env python3 """Simple VCF to CSV converter.""" from __future__ import annotations import argparse import csv from pathlib import Path from typing import Dict, Iterable, List, Sequence, Tuple DEFAULT_FIELDS: Sequence[str] = ( "FN", "N", "NICKNAME", "ORG", "TITLE", "TEL", "EMAIL", "ADR", "URL", "BDAY", "NOTE", ) def unfold_lines(lines: Iterable[str]) -> Iterable[str]: """Unfold RFC 6350 folded lines (continuations start with space or tab).""" current: str | None = None for raw_line in lines: line = raw_line.rstrip("\r\n") if not line: # Blank lines terminate the current buffer. if current is not None: yield current current = None continue if line.startswith((" ", "\t")): if current is None: current = line.lstrip() else: current += line[1:] else: if current is not None: yield current current = line if current is not None: yield current def parse_vcards(path: Path) -> List[Dict[str, List[str]]]: entries: List[Dict[str, List[str]]] = [] current: Dict[str, List[str]] | None = None with path.open("r", encoding="utf-8", errors="replace") as handle: for line in unfold_lines(handle): if not line: continue upper = line.upper() if upper == "BEGIN:VCARD": current = {} continue if upper == "END:VCARD": if current is not None: entries.append(current) current = None continue if current is None: continue if ":" not in line: continue prop, value = line.split(":", 1) key = prop.split(";", 1)[0].upper() current.setdefault(key, []).append(value) return entries def determine_headers( cards: Sequence[Dict[str, List[str]]], fields: Sequence[str] | None, include_all: bool, ) -> List[Tuple[str, str]]: """Return (header, key) pairs for CSV writing.""" if include_all: unique_keys = sorted({key for card in cards for key in card.keys()}) return [(key, key) for key in unique_keys] if fields: cleaned = [field.strip() for field in fields if field.strip()] if not cleaned: raise ValueError("No valid field names provided") return [(field.upper(), field.upper()) for field in cleaned] return [(field, field) for field in DEFAULT_FIELDS] def flatten_card(card: Dict[str, List[str]], headers: Sequence[Tuple[str, str]]) -> Dict[str, str]: return {header: " | ".join(card.get(key, [])) for header, key in headers} def convert_vcf_to_csv( input_path: Path, output_path: Path, fields: Sequence[str] | None = None, include_all: bool = False, delimiter: str = ",", ) -> None: cards = parse_vcards(input_path) if not cards: raise SystemExit(f"No vCards found in {input_path}") headers = determine_headers(cards, fields, include_all) with output_path.open("w", encoding="utf-8", newline="") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=[header for header, _ in headers], delimiter=delimiter) writer.writeheader() for card in cards: writer.writerow(flatten_card(card, headers)) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Convert VCF/vCard files to CSV") parser.add_argument("input", type=Path, help="Path to the .vcf file") parser.add_argument("output", type=Path, help="Destination .csv file") parser.add_argument( "--fields", type=lambda value: value.split(","), help="Comma separated list of field names to include (default uses a sensible subset)", ) parser.add_argument( "--include-all", action="store_true", help="Include every property encountered in the VCF (may include technical fields)", ) parser.add_argument( "--delimiter", default=",", help="CSV delimiter (default: comma)", ) return parser.parse_args() def main() -> None: args = parse_args() convert_vcf_to_csv( input_path=args.input, output_path=args.output, fields=args.fields, include_all=args.include_all, delimiter=args.delimiter, ) if __name__ == "__main__": main()