JSON to YAML in Python: PyYAML, ruamel.yaml, and Conversion Patterns
Last updated:
Python is the go-to language for JSON-to-YAML conversion — two libraries cover nearly every use case. PyYAML handles the common path with a two-line conversion. ruamel.yaml handles the nuanced path when key order, comments, or YAML 1.2 compliance matter. This guide covers both, plus edge cases like multiline strings, float precision, batch file conversion, and the reverse direction.
Basic JSON to YAML with PyYAML
import json
import yaml
# From JSON string
json_str = '''
{
"name": "Alice",
"age": 30,
"address": {
"city": "London",
"country": "UK"
},
"tags": ["developer", "python", "devops"],
"active": true,
"score": null
}
'''
data = json.loads(json_str)
yaml_str = yaml.dump(data, default_flow_style=False, allow_unicode=True)
print(yaml_str)
# active: true
# address:
# city: London
# country: UK
# age: 30
# name: Alice
# score: null
# tags:
# - developer
# - python
# - devops
# From JSON file
with open('config.json') as f:
data = json.load(f)
with open('config.yaml', 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True)yaml.dump() Options Reference
| Parameter | Default | Description |
|---|---|---|
default_flow_style | False | False=block style, True=inline {a: b} |
allow_unicode | False | True=preserve Unicode; False=escape as \uXXXX |
indent | 2 | Indentation spaces (2 or 4 recommended) |
sort_keys | True | Sort object keys alphabetically |
width | 80 | Line wrap width |
default_style | None | '|' for literal blocks, '>' for folded |
explicit_start | False | Add --- document start marker |
# Common option combinations
# Human-readable config output
yaml.dump(data,
default_flow_style=False,
allow_unicode=True,
sort_keys=False, # preserve dict insertion order (Python 3.7+)
indent=2)
# Kubernetes/Docker Compose style
yaml.dump(data,
default_flow_style=False,
allow_unicode=True,
sort_keys=True,
explicit_start=True) # adds "---" at top
# Output starts with:
# ---
# apiVersion: apps/v1
# ...Preserve Key Order with ruamel.yaml
from ruamel.yaml import YAML
# ruamel.yaml preserves key order and comments
yaml = YAML()
yaml.default_flow_style = False
yaml.allow_unicode = True
yaml.indent(mapping=2, sequence=4, offset=2)
import json
from io import StringIO
json_str = '{"name": "Alice", "email": "alice@example.com", "age": 30}'
data = json.loads(json_str)
# Write to string (ruamel.yaml requires stream)
stream = StringIO()
yaml.dump(data, stream)
yaml_str = stream.getvalue()
print(yaml_str)
# name: Alice
# email: alice@example.com # order preserved! (PyYAML would sort: age, email, name)
# age: 30
# Write to file
with open('output.yaml', 'w', encoding='utf-8') as f:
yaml.dump(data, f)Handle Special Types and Edge Cases
import json
import yaml
from datetime import datetime
# JSON dates are strings — they convert to YAML strings (not YAML timestamps)
data = {
"created_at": "2026-01-15T10:30:00Z", # stays as string in YAML
"duration_ms": 1500.5,
"tags": [], # empty array → YAML: tags: []
"meta": {}, # empty object → YAML: meta: {}
"note": None, # JSON null → YAML: note: null
"ratio": 0.1 + 0.2, # IEEE 754 float → 0.30000000000000004 in YAML
}
# Round float precision issues
import math
def clean_floats(obj):
if isinstance(obj, float):
return round(obj, 10)
if isinstance(obj, dict):
return {k: clean_floats(v) for k, v in obj.items()}
if isinstance(obj, list):
return [clean_floats(i) for i in obj]
return obj
yaml_str = yaml.dump(clean_floats(data), default_flow_style=False, allow_unicode=True)
# Multiline strings — use literal block style
data_with_text = {"description": "Line one\nLine two\nLine three"}
yaml_str = yaml.dump(data_with_text, default_flow_style=False)
# description: 'Line one
# Line two
# Line three
# '
# Use default_style='|' for clean literal block:
yaml_str = yaml.dump(data_with_text, default_flow_style=False, default_style='|')YAML to JSON (Reverse)
import yaml
import json
# Read YAML file and convert to JSON
with open('config.yaml', encoding='utf-8') as f:
data = yaml.safe_load(f) # NEVER use yaml.load() — security risk
json_str = json.dumps(data, indent=2, ensure_ascii=False)
print(json_str)
# YAML string to JSON string
yaml_str = """
name: Alice
age: 30
tags:
- python
- devops
"""
data = yaml.safe_load(yaml_str)
print(json.dumps(data, indent=2))
# {
# "name": "Alice",
# "age": 30,
# "tags": ["python", "devops"]
# }
# Watch out: YAML booleans — "yes", "no", "on", "off" are bool in YAML 1.1
# yaml.safe_load("active: yes") → {"active": True} (not "yes")
# JSON equivalent: {"active": true}Batch Conversion and CLI Tools
# Batch convert all JSON files in a directory
import json
import yaml
from pathlib import Path
def json_dir_to_yaml(input_dir: str, output_dir: str) -> int:
input_path = Path(input_dir)
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
converted = 0
for json_file in input_path.glob("**/*.json"):
relative = json_file.relative_to(input_path)
yaml_file = output_path / relative.with_suffix('.yaml')
yaml_file.parent.mkdir(parents=True, exist_ok=True)
try:
data = json.loads(json_file.read_text(encoding='utf-8'))
yaml_file.write_text(
yaml.dump(data, default_flow_style=False, allow_unicode=True),
encoding='utf-8'
)
converted += 1
except json.JSONDecodeError as e:
print(f"Skip {json_file}: {e}")
return converted
count = json_dir_to_yaml('./configs-json', './configs-yaml')
print(f"Converted {count} files")Command-line alternatives:
# yq (like jq for YAML) — convert JSON to YAML in one command
yq -P '.' config.json > config.yaml # -P = pretty (YAML block style)
cat config.json | yq -P '.' # from stdin
# Python one-liner
python3 -c "import sys,json,yaml; print(yaml.dump(json.load(sys.stdin), default_flow_style=False))" < input.json > output.yaml
# Install yq (macOS)
brew install yq
# Or: pip install yq (Python-based wrapper around jq)JSON Schema to YAML Schema
# JSON Schema is valid JSON — converts cleanly to YAML
json_schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"name": {"type": "string", "minLength": 1},
"age": {"type": "integer", "minimum": 0, "maximum": 150},
"email": {"type": "string", "format": "email"}
},
"required": ["name", "age"]
}
import yaml
schema_yaml = yaml.dump(json_schema, default_flow_style=False, allow_unicode=True, sort_keys=False)
# $schema: https://json-schema.org/draft/2020-12/schema
# type: object
# properties:
# name:
# type: string
# minLength: 1
# age:
# type: integer
# minimum: 0
# maximum: 150
# email:
# type: string
# format: email
# required:
# - name
# - age
# OpenAPI specs use YAML — convert JSON OpenAPI to YAML the same wayFAQ
How do I convert JSON to YAML in Python?
Install PyYAML with pip install pyyaml. Then import both libraries: import json and import yaml. Parse the JSON string with data = json.loads(json_string), or read a file with json.load(file). Convert to YAML with yaml.dump(data, default_flow_style=False). Add allow_unicode=True to preserve non-ASCII characters like accented letters without escaping. To write to a file: yaml.dump(data, open("out.yaml", "w"), default_flow_style=False, allow_unicode=True). The entire conversion is two lines: parse JSON, then dump YAML.
What is the difference between PyYAML and ruamel.yaml?
PyYAML is simpler, faster, and more widely installed. It implements YAML 1.1, which means strings like "yes", "no", "on", and "off" are parsed as booleans. It sorts keys alphabetically by default and does not preserve comments. ruamel.yaml implements YAML 1.2, so only "true" and "false" are booleans. It preserves key insertion order, comments, and formatting on round-trips, making it better for editing existing YAML files. For most JSON-to-YAML conversions PyYAML is the right choice. Use ruamel.yaml when key order matters, when you are round-tripping YAML, or when you need YAML 1.2 compliance.
How do I preserve key order when converting JSON to YAML in Python?
Python 3.7+ dicts preserve insertion order natively, and json.loads() returns an ordered dict. The issue is that yaml.dump() sorts keys alphabetically by default. To disable sorting, pass sort_keys=False to yaml.dump(). With sort_keys=False, keys appear in the same order as the source JSON. Alternatively, use ruamel.yaml which always preserves key order regardless of settings. For Python 3.6 and earlier, use json.loads() with object_pairs_hook=OrderedDict to guarantee order preservation.
What happens to JSON null, true, and false in YAML?
JSON null becomes YAML null (the literal word). JSON true becomes YAML true and JSON false becomes YAML false. JSON numbers convert to YAML integers or floats without change. JSON arrays become YAML sequences (block-style list items prefixed with - ). JSON strings that look like YAML keywords — such as the string "true" or the string "null" — are automatically quoted by yaml.dump() to prevent ambiguity, so no data is lost in the conversion.
How do I convert YAML to JSON in Python?
Use yaml.safe_load(yaml_string) or yaml.safe_load(file_object) to parse YAML to a Python dict. Then convert to JSON with json.dumps(data, indent=2). Never use yaml.load() without an explicit Loader argument — it can execute arbitrary Python code from untrusted YAML and is a known security vulnerability. yaml.safe_load() only constructs basic Python types and is always safe. Watch out for YAML 1.1 booleans: yaml.safe_load("active: yes") returns {"active": True}, which becomes {"active": true} in JSON.
How do I handle multiline strings in JSON to YAML conversion?
JSON strings containing \n newline characters convert to YAML quoted strings by default. yaml.dump() chooses the most compact representation automatically. For clean literal blocks — where newlines are preserved and the YAML reads naturally — use default_style='|' in yaml.dump(). Literal blocks (|) preserve newlines exactly. Folded blocks (>) replace newlines with spaces, useful for long prose text. To apply literal style only to specific values rather than all strings, use a custom YAML representer or switch to ruamel.yaml for finer-grained control.
Is there a command-line tool to convert JSON to YAML?
yq is the most popular CLI tool: install with brew install yq (macOS) or pip install yq (Python wrapper). Convert with yq -P "." input.json > output.yaml (the -P flag outputs pretty block-style YAML). You can also use a Python one-liner: python3 -c "import sys,json,yaml; print(yaml.dump(json.load(sys.stdin), default_flow_style=False))" < input.json > output.yaml. For bulk conversion of many files, write a Python script using pathlib.Path.glob("*.json"). The jq tool can transform JSON before piping to yq for combined filter-and-convert workflows.
What are the size differences between JSON and YAML?
YAML is typically 5–15% larger than equivalent JSON for the same data. JSON is more compact especially for arrays of objects, because JSON uses braces and brackets as delimiters while YAML repeats keys and uses indentation. JSON strings require no extra whitespace. For machine-to-machine communication and API responses, prefer JSON for its compactness and universal parser support. For configuration files, Kubernetes manifests, Docker Compose files, and human-edited documents, prefer YAML for its readability and support for comments — the slight size increase is irrelevant for config files.
Definitions
- PyYAML
- The most widely used Python YAML library; implements YAML 1.1. Use
yaml.safe_load()for parsing andyaml.dump()for serialization. Does not preserve key order by default — setsort_keys=Falseto disable alphabetical sorting. - ruamel.yaml
- A Python YAML library implementing YAML 1.2. Preserves comments, key order, and formatting on round-trips. Slower than PyYAML but more accurate for editing existing YAML files without losing structure.
- block style
- YAML formatting where nested structures are written on separate indented lines — the human-readable default. Produced by
default_flow_style=Falsein PyYAML. Each key-value pair gets its own line; nested objects are indented. - flow style
- YAML formatting where structures are written on a single line using braces and brackets:
{name: Alice, age: 30}. Similar in appearance to JSON. Produced bydefault_flow_style=Truein PyYAML. Less readable for deeply nested data. - safe_load()
- PyYAML function that parses only basic YAML types (dict, list, str, int, float, bool, None). Prevents arbitrary Python object construction from untrusted YAML, avoiding a well-known remote code execution vulnerability. Always prefer
yaml.safe_load()overyaml.load().
Further reading and primary sources
- PyYAML Documentation — Full PyYAML API reference: yaml.dump() parameters, custom representers, and YAML 1.1 type resolution
- ruamel.yaml Documentation — ruamel.yaml guide: round-trip preservation, YAML 1.2, and comment handling
- YAML 1.2 Specification — The official YAML 1.2 spec: type system, scalars, sequences, mappings, and anchors
- JSON to Markdown (Jsonic) — Convert JSON data to Markdown tables and lists with Python, JavaScript, and CLI tools
- JSON Schema Patterns (Jsonic) — JSON Schema validation patterns: $ref, allOf, oneOf, conditionals, and custom formats