{
  "init_args": [
    {
      "image_resolution": 224,
      "vision_layers": [
        3,
        4,
        23,
        3
      ],
      "vision_heads": 32,
      "vision_mlp_ratio": null,
      "vision_embed_dim": 64,
      "vision_patch_size": null,
      "vision_hidden_act": null,
      "max_text_length": 77,
      "vocab_size": 49408,
      "text_embed_dim": 512,
      "text_heads": 8,
      "text_layers": 12,
      "text_hidden_act": "quick_gelu",
      "projection_dim": 512,
      "initializer_range": 0.02,
      "logit_scale_init_value": 2.6592,
      "init_class": "CLIPModel"
    }
  ],
  "init_class": "CLIPForImageGeneration"
}