{
  "architectures": [
    "Emu3VisionVQModel"
  ],
  "attn_resolutions": [
    3
  ],
  "ch": 256,
  "ch_mult": [
    1,
    2,
    2,
    4
  ],
  "codebook_size": 32768,
  "double_z": false,
  "dropout": 0.0,
  "embed_dim": 4,
  "in_channels": 3,
  "model_type": "Emu3VisionVQ",
  "num_res_blocks": 2,
  "out_channels": 3,
  "temporal_downsample_factor": 4,
  "dtype": "float32",
  "z_channels": 4
}