{"data":[{"id":"fastrouter/auto","name":"FastRouter: Auto","description":"fastrouter/auto is an enhanced meta-model that intelligently routes each prompt to the best-fit model from dozens of options — optimizing for quality, contextual relevance, and cost. You will be billed at the exact rate of the selected model, with full transparency in the Activity page and response metadata. With improved query understanding and a next-gen selection engine, Auto delivers sharper routing decisions and more consistent outputs — making it the most reliable way to get started.","created":1767260074,"context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":1000000,"max_completion_tokens":1000000,"is_moderated":false},"supported_parameters":[],"models_extra_params":{"category":null},"is_active":true,"creator":"FastRouter"},{"id":"openai/gpt-5.5-pro","name":"OpenAI: GPT-5.5 Pro","description":"OpenAI's GPT-5.5 Pro is a premium variant of the GPT-5.5 model, designed for higher-tier users (Pro, Business, Enterprise) handling demanding workloads like advanced research, business analysis, legal tasks, and deep reasoning with superior accuracy and depth. It builds on GPT-5.5's agentic capabilities but adds extended reasoning modes for complex, multi-step workflows.","created":1777051896,"context_length":1050000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00003","completion":"0.00018","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"0.00006","completion_more_than_272k_input":"0.00027","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{"prompt":"0.000015","completion":"0.00009"},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00003","completion":"0.00018","prompt_more_than_272k_input":"0.00006","completion_more_than_272k_input":"0.00027"}},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"anthropic/claude-opus-4.7","name":"Anthropic: Claude Opus 4.7","description":"Opus 4.7 is Anthropic’s next-generation Opus model, designed for long-running, asynchronous agents. Building on the coding and agentic capabilities of Opus 4.6, it offers improved performance across complex tasks.","created":1776351100,"context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000005","completion":"0.000025","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000005","input_cache_write":"0.00000625","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","tool_choice","tools","verbosity"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-sonnet-4.6","name":"Anthropic: Claude Sonnet 4.6","description":"Sonnet 4.6 is Anthropic’s strongest Sonnet-tier model to date, delivering near-frontier performance for coding, agentic workflows, and professional tasks. It’s especially good at iterative build cycles, navigating large codebases, managing end-to-end projects with memory, producing polished documents, and reliably using computers for web QA and workflow automation.","created":1771342990,"context_length":1000000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000003","completion":"0.000015","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000003","input_cache_write":"0.00000375","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-haiku-4.5","name":"Anthropic: Claude Haiku 4.5","description":"Claude Haiku 4.5 is Anthropic’s October 2025 release of its newest compact AI language model, designed to offer near-premium intelligence, ultra-fast responses, and low operational cost. Positioned as the lightweight sibling to Claude Sonnet 4.5 and Opus 4.1, Haiku 4.5 enables “near‑frontier‑level” performance suitable for enterprise-scale workloads, free-tier operations, and real-time applications.","created":1760547638,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000001","completion":"0.000005","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000001","input_cache_write":"0.00000125","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":64000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","stop","temperature","tool_choice","tools","top_k","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"google/gemini-3.1-flash-lite-preview","name":"Google: Gemini 3.1 Flash Lite Preview","description":"Gemini 3.1 Flash-Lite Preview (gemini-3.1-flash-lite-preview) is Google's fastest and most cost-efficient multimodal model in the Gemini 3 series, launched in preview in March 2026 for high-volume, low-latency developer workloads like real-time translation, content moderation, and dynamic UI generation.","created":1772512673,"context_length":1048576,"architecture":{"modality":"text+image+file+audio+video-\u003etext","input_modalities":["text","image","video","file","audio"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.00000025","completion":"0.0000015","request":"","image":"0.00000025","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"0.0000015","input_cache_read":"0.000000025","input_cache_write":"0.00000008333333333333334","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"x-ai/grok-4.3","name":"xAI: Grok 4.3","description":"Grok-4.3 is xAI's advanced reasoning model, launched in late April 2026, designed for complex tasks like advanced logic, math, scientific analysis, and multi-step agentic workflows.","created":1777591821,"context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.00000125","completion":"0.0000025","request":"","image":"","web_search":"0.005","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000002","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.0000025","completion_more_than_200k_input":"0.00005","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00000125","completion":"0.0000025","prompt_more_than_200k_input":"0.0000025","completion_more_than_200k_input":"0.00005"}},"top_provider":{"context_length":1000000,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"openai/gpt-5.3-codex","name":"OpenAI: GPT-5.3-Codex","description":"GPT-5.3-Codex is OpenAI's most capable agentic coding model to date, combining frontier coding performance with general work automation capabilities—enabling developers and professionals to automate complex tasks across the entire software lifecycle, from code writing to infrastructure management and cybersecurity.","created":1771959164,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000175","completion":"0.000014","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.000000175","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"moonshotai/kimi-k2.6","name":"MoonshotAI: Kimi K2.6","description":"Moonshot AI's Kimi-K2.6 is a frontier-scale, open-source Mixture-of-Experts (MoE) multimodal model with 1 trillion total parameters (32B active), a 256K-262K token context window, and native support for text, image, and video inputs, excelling in long-horizon coding, agentic workflows, multi-agent orchestration, and complex software engineering.","created":1776699402,"context_length":262144,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.00000055","completion":"0.0000025","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000015","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":262144,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"MoonshotAI"},{"id":"deepseek/deepseek-v4-pro","name":"DeepSeek: DeepSeek V4 Pro","description":"DeepSeek-V4-Pro is a 1.6 trillion parameter Mixture-of-Experts (MoE) language model from DeepSeek AI, with 49 billion parameters activated per token and support for a 1 million token context window. \nIt features a hybrid attention architecture combining Compressed Sparse Attention (CSA) and Heavily Compressed Attention (HCA), which reduces inference FLOPs to 27% and KV cache to 10% of DeepSeek-V3.2 at 1M context, enabling 83.5% MRCR comprehension. The model was pre-trained on over 32 trillion tokens using the Muon optimizer and a two-stage post-training process involving SFT, RL with GRPO, and on-policy distillation for domain-specific expertise.","created":1777000679,"context_length":1048576,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek"},"pricing":{"prompt":"0.0000021","completion":"0.0000044","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000002","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1048576,"max_completion_tokens":384000,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"DeepSeek"},{"id":"openai/gpt-image-2","name":"OpenAI: GPT Image 2","description":"OpenAI's gpt-image-2 (also referred to as GPT Image 2 or ChatGPT Images 2.0) is a state-of-the-art text-to-image generation and editing model that transforms natural-language prompts into high-quality, photorealistic visuals with exceptional prompt fidelity, accurate text rendering, and advanced reasoning capabilities.","created":1776797528,"context_length":128000,"architecture":{"modality":"text+image-\u003eimage","input_modalities":["image","text"],"output_modalities":["image"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000008","completion":"0.000030","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.000002","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["prompt","background","moderation","n","output_compression","output_format","quality","response_format","size","style"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"google/gemini-3.1-flash-image-preview","name":"Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)","description":"Google's gemini-3.1-flash-image-preview (codenamed Nano Banana 2) is a preview version of the Gemini 3.1 Flash Image model, optimized for high-speed image generation and editing with Pro-level quality, balancing performance, low latency, and cost efficiency.","created":1772119558,"context_length":65536,"architecture":{"modality":"text+image-\u003etext+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.00000025","completion":"0.0000015","request":"","image":"0.00006","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":65536,"max_completion_tokens":65536,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"openai/gpt-5.5","name":"OpenAI: GPT-5.5","description":"OpenAI's GPT-5.5 is a large language model (LLM) released on April 23, 2026, codenamed \"Spud,\" positioned as the company's smartest model yet for complex real-world tasks like coding, research, data analysis, and agentic workflows. It features variants including GPT-5.5 Thinking and GPT-5.5 Pro (not available to free-tier users), with API access starting April 24 after safeguards implementation","created":1777051893,"context_length":1050000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000005","completion":"0.000030","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000005","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"0.000010","completion_more_than_272k_input":"0.000045","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{"prompt":"0.0000025","completion":"0.000015","prompt_more_than_272k_input":"0.000005","completion_more_than_272k_input":"0.0000225","input_cache_read":"0.00000025","input_cache_read_more_than_272k_input":"0.00000050"},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000005","completion":"0.000030","prompt_more_than_272k_input":"0.000010","completion_more_than_272k_input":"0.000045"}},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_completion_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.4-nano","name":"OpenAI: GPT-5.4 Nano","description":"GPT-5.4 nano is OpenAI's most lightweight and cost-efficient model in the GPT-5.4 family, optimized for speed-critical, high-volume tasks like classification, data extraction, ranking, and sub-agent execution.\nIt prioritizes low latency and efficiency over deep reasoning, making it ideal for real-time systems, background tasks, distributed agent architectures, coding assistants, and multimodal applications involving images. The model supports text and image inputs (no audio or video) with text outputs, a 400,000-token context window, and up to 128,000 max output tokens; its knowledge cutoff is August 31, 2025.","created":1773748187,"context_length":400000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000002","completion":"0.00000125","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000002","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_completion_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.4-mini","name":"OpenAI: GPT-5.4 Mini","description":"GPT-5.4 mini is OpenAI's compact, high-performance model in the GPT-5.4 family, balancing advanced reasoning, multimodal capabilities, and efficiency for high-volume workloads like coding, agent workflows, and production-scale applications. \nIt delivers significant improvements over GPT-5 mini in coding, reasoning, multimodal understanding, and tool use, running over 2x faster while approaching GPT-5.4 performance on benchmarks like SWE-Bench Pro and OSWorld-Verified. The model supports text and image inputs (no audio/video) with text outputs, a 400,000-token context window, up to 128,000 max output tokens, and an August 31, 2025 knowledge cutoff.","created":1773748178,"context_length":400000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000075","completion":"0.0000045","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.000000075","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_completion_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"anthropic/claude-opus-4.6","name":"Anthropic: Claude Opus 4.6","description":"Opus 4.6 is Anthropic’s new top-end Claude model, optimized for complex reasoning, coding, and high‑stakes professional work. It excels at agentic workflows, breaking down multi-step tasks, calling tools, and running for longer with fewer errors and less hand-holding. The model introduces a 1M‑token context window (in beta), stronger long-context retrieval, and up to 128k output tokens for large code and document workloads. It also adds “adaptive thinking” and adjustable effort levels, letting it dynamically choose when to think deeply versus respond quickly.","created":1770219050,"context_length":1000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000005","completion":"0.000025","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000005","input_cache_write":"0.00000625","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1000000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p","verbosity"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-opus-4.5","name":"Anthropic: Claude Opus 4.5","description":"anthropic/claude-opus-4.5 is Anthropic’s latest frontier reasoning model, optimized for complex software engineering, agentic workflows, and long-running computer-use tasks such as multi-step automation and tool use. It is positioned as the most capable model in the Claude Opus family, offering state-of-the-art performance on challenging coding and enterprise benchmarks while being significantly more efficient than earlier Opus versions.","created":1764010580,"context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000005","completion":"0.000025","request":"0","image":"0","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000005","input_cache_write":"0.00000625","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","stop","structured_outputs","temperature","tool_choice","tools","top_k","verbosity"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-opus-4.1","name":"Anthropic: Claude Opus 4.1","description":"Claude Opus 4.1, a direct replacement for Opus 4, excels in real-world coding and agentic tasks with enhanced performance and precision. It autonomously handles complex, end-to-end development tasks, adapting to user styles while maintaining high-quality output, particularly in frontend code generation and intricate logic. With superior long-horizon task management and problem-solving, it serves as an ideal virtual collaborator for sustained reasoning and multi-step actions. Opus 4.1 also boosts AI agent capabilities, enabling accurate execution of complex tasks and shining in agentic search, research, content creation, and context management for comprehensive insights and high-quality summarization.","created":1754411591,"context_length":200000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000015","completion":"0.000075","request":"0","image":"0.024","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000015","input_cache_write":"0.00001875","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000750","completion":"0.00003750","input_cache_read":"0.00000075","input_cache_write":"0.000009375"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","stop","temperature","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-4.5-sonnet","name":"Anthropic: Claude Sonnet 4.5","description":"Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.","created":1759161676,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000003","completion":"0.000015","request":"0","image":"0.0048","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000003","input_cache_write":"0.00000375","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.000006","completion_more_than_200k_input":"0.0000225","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000150","completion":"0.00000750","input_cache_read":"0.00000015","input_cache_write":"0.000001875","input_cache_read_more_than_200k_input":"0.0000003","input_cache_write_more_than_200k_input":"0.00000375","prompt_more_than_200k_input":"0.000003","completion_more_than_200k_input":"0.000001125"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000003","prompt_more_than_200k_input":"0.000006","completion":"0.000015","completion_more_than_200k_input":"0.0000225"}},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","stop","temperature","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-sonnet-4-20250514","name":"Anthropic: Claude Sonnet 4","description":"Claude Sonnet 4 represents a significant advancement over Claude Sonnet 3.7, particularly in the area of coding. It delivers top-tier performance suitable for a wide range of AI applications, such as interactive AI assistants and large-scale automation tasks. The model excels in understanding nuanced instructions, adapting to context, self-correcting errors, and generating deep insights from complex information. Its strengths in coding, vision, and writing make it a versatile tool for many professional use cases.","created":1747930371,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000003","completion":"0.000015","request":"0","image":"0.0048","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000003","input_cache_write":"0.00000375","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.000006","completion_more_than_200k_input":"0.0000225","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000150","completion":"0.00000750","input_cache_read":"0.00000015","input_cache_write":"0.000001875"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000003","prompt_more_than_200k_input":"0.000006","completion":"0.000015","completion_more_than_200k_input":"0.0000225"}},"top_provider":{"context_length":1000000,"max_completion_tokens":64000,"is_moderated":true},"supported_parameters":["max_tokens","temperature","stop","reasoning","include_reasoning","tools","tool_choice","top_p","top_k"],"models_extra_params":{"category":["Coding","Legal","Health","Creative Writing"]},"is_active":true,"creator":"Anthropic"},{"id":"anthropic/claude-3-5-haiku-20241022","name":"Anthropic: Claude 3.5 Haiku","description":"Claude 3.5 Haiku (October 2022 version) is Anthropic's most efficient model optimized for rapid response times and minimal latency. This specialized release delivers enhanced capabilities across coding accuracy, external tool utilization, and logical reasoning while maintaining the fastest performance profile in Anthropic's model lineup. Unlike its multimodal counterparts, this text-only variant prioritizes pure language processing efficiency for applications demanding maximum responsiveness. The model particularly excels in high-interaction scenarios like conversational interfaces, real-time code suggestions, structured data extraction, and content moderation workflows. Its balanced performance across both technical and general tasks makes it versatile for diverse business applications while its focus on efficiency ensures it remains cost-effective for large-scale deployments in time-sensitive environments across various industries.","created":1730678400,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.0000008","completion":"0.000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000008","input_cache_write":"0.000001","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000040","completion":"0.00000200","input_cache_read":"0.00000004","input_cache_write":"0.00000050"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":8192,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","top_k","stop"],"models_extra_params":{"category":null},"is_active":true,"creator":"Anthropic"},{"id":"google/gemini-3.1-pro-preview","name":"Google: Gemini 3.1 Pro Preview","description":"Google's gemini-3.1-pro-preview (also called Gemini 3.1 Pro Preview) is a preview-stage, multimodal AI model from Google DeepMind, optimized for advanced reasoning, agentic workflows, software engineering, and complex problem-solving across text, images, video, audio, and PDFs. \nIt refines the Gemini 3 Pro series with improved thinking, token efficiency, factual consistency, and reliability for multi-step tasks, tool use, and long-horizon stability. Key specs include a 1M input token context window (1,048,576 tokens), 64K-65K output tokens, and support for capabilities like function calling, structured outputs, code execution, search grounding, and a new \"MEDIUM\" thinking level for balancing cost, speed, and performance.","created":1771509627,"context_length":1048576,"architecture":{"modality":"text+image+file+audio+video-\u003etext","input_modalities":["audio","file","image","text","video"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.000002","completion":"0.000012","request":"0","image":"0.00516","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0.0000035","input_cache_read":"0.00000031","input_cache_write":"0.000001625","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.000004","completion_more_than_200k_input":"0.000018","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000002","completion":"0.000012","prompt_more_than_200k_input":"0.000004","completion_more_than_200k_input":"0.000018"}},"top_provider":{"context_length":1048576,"max_completion_tokens":65536,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/gemini-3-flash-preview","name":"Google: Gemini 3 Flash Preview","description":"Google's gemini-3-flash-preview is a high-speed, cost-effective AI model from the Gemini 3 series, designed for agentic workflows, multi-turn chats, and coding tasks. It delivers near-Pro-level reasoning and tool use with lower latency than larger variants, supporting a 1M token context window and multimodal inputs like text, images, audio, video, and PDFs.","created":1765987078,"context_length":1048576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file","video"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.0000005","completion":"0.000003","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000005","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/gemini-3-pro-image-preview","name":"Google: Gemini 3 Pro Image Preview (Nano Banana Pro)","description":"Gemini 3 Pro Image Preview is a high-end image generation and editing model in the Gemini 3 family (also known as Nano Banana Pro), built to produce studio-quality visuals with strong reasoning over complex prompts. It is optimized for detailed, multi-step creative workflows where you need both visual fidelity and precise control over content, layout, and text inside images.","created":1763653797,"context_length":65536,"architecture":{"modality":"text+image-\u003etext+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.000002","completion":"0.000012","request":"0","image":"0.00012","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":65536,"max_completion_tokens":32768,"is_moderated":false},"supported_parameters":["max_tokens","response_format","seed","structured_outputs","temperature","top_p","aspectRatio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"x-ai/grok-4.20-beta","name":"xAI: Grok 4.20 Beta","description":"Grok 4.20 Beta is xAI’s newest flagship large language model that uses four specialized “agents” working together to answer complex questions more accurately and with deeper reasoning than prior Grok versions.","created":1773325354,"context_length":2000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.000002","completion":"0.000006","request":"","image":"","web_search":"0.005","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000002","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.000004","completion_more_than_200k_input":"0.000012","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000002","completion":"0.000006","prompt_more_than_200k_input":"0.000004","completion_more_than_200k_input":"0.000012"}},"top_provider":{"context_length":2000000,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["include_reasoning","logprobs","max_tokens","reasoning","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-4.1-fast","name":"xAI: Grok 4.1 Fast","description":"Grok 4.1 Fast by xAI is a state-of-the-art large multimodal language model, specifically designed for agentic tool-calling, long-context workflows, and production-grade low-latency inference. It features a massive 2 million token context window, making it ideal for complex, multi-hour conversations, deep research, and supporting tasks like customer support, code execution, and document retrieval.","created":1763587502,"context_length":2000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.00000020","completion":"0.00000050","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"0.00000040","completion_more_than_128k_input":"0.000001","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00000020","completion":"0.00000050","prompt_more_than_128k_input":"0.00000040","completion_more_than_128k_input":"0.000001"}},"top_provider":{"context_length":2000000,"max_completion_tokens":30000,"is_moderated":false},"supported_parameters":["logprobs","max_tokens","response_format","seed","temperature","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-4-fast","name":"xAI: Grok 4 Fast","description":"Grok-4-Fast is xAI’s latest efficiency-focused generative AI model, designed to deliver high-speed responses with significant improvements in cost and latency over the original Grok 4. It features a unified architecture that seamlessly handles both reasoning and non-reasoning tasks; this allows it to produce quick replies for simple queries and dive deeply into complex coding or reasoning problems when needed. One of its standout capabilities is the very large 2 million token context window, enabling it to process lengthy documents, multi-hour transcripts, or entire codebases in one go. Grok-4-Fast achieves roughly 40% greater token efficiency than Grok 4, reducing computational cost by up to 98% in benchmark tasks while maintaining state-of-the-art performance in reasoning, search, and coding benchmarks such as GPQA Diamond and AIME 2025. The model supports advanced features like multimodal input (text, image, voice), tool and web search integration, and function calling for structured outputs, making it suitable for both enterprise automation and consumer apps. ","created":1758240090,"context_length":2000000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.00000020","completion":"0.00000050","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"0.00000040","completion_more_than_128k_input":"0.000001","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00000020","completion":"0.00000050","prompt_more_than_128k_input":"0.00000040","completion_more_than_128k_input":"0.000001"}},"top_provider":{"context_length":2000000,"max_completion_tokens":30000,"is_moderated":false},"supported_parameters":["logprobs","max_tokens","response_format","seed","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"openai/gpt-5.2-codex","name":"OpenAI: GPT-5.2-Codex","description":"GPT-5.2-Codex is OpenAI's specialized coding model within the GPT-5.2 family, released December 18, 2025, optimized for software engineering tasks including multi-file project management, agentic workflows, and defensive cybersecurity.","created":1768409315,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000175","completion":"0.000014","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.000000175","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"moonshotai/kimi-k2.5","name":"MoonshotAI: Kimi K2.5","description":"MoonshotAI's Kimi-K2.5 is a cutting-edge open-source multimodal large language model series from MoonshotAI, featuring a massive Mixture-of-Experts (MoE) architecture with 1 trillion total parameters—yet only 32 billion activated per token for highly efficient inference and low computational costs. It boasts 61 transformer layers, 64 attention heads, a 256K context window enabled by advanced MLA attention mechanisms, and SwiGLU activations, paired with a MoonViT vision encoder for native handling of images, videos, and cross-modal tasks like visual reasoning, UI mockup-to-code generation (e.g., React from screenshots), and agentic workflows.","created":1769487076,"context_length":262144,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.00000045","completion":"0.0000028","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":262144,"max_completion_tokens":65535,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Moonshot AI"},{"id":"moonshotai/kimi-k2-thinking","name":"MoonshotAI: Kimi K2 Thinking","description":"MoonshotAI's Kimi-K2-Thinking is an open-source large language model designed for deep, step-by-step reasoning and robust tool use. It is built as a \"thinking agent\" that sets new standards for complex reasoning, agentic workflows, and coding, handling up to 200–300 consecutive tool calls while maintaining coherent, goal-directed behavior.","created":1762440622,"context_length":262144,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000006","completion":"0.0000025","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000015","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":262144,"max_completion_tokens":262144,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Moonshot AI"},{"id":"deepseek/deepseek-v3.2","name":"DeepSeek: DeepSeek V3.2","description":"DeepSeek-V3.2 is positioned as a next-generation “general-purpose + reasoning” model intended to be a daily driver at roughly frontier (GPT‑5-class) performance on broad tasks like coding, math, agents, and general chat. It is released in several variants (such as V3.2, V3.2-Exp, and V3.2-Speciale), sharing the same core architecture but targeting different trade‑offs between efficiency and maximum reasoning power.","created":1764594642,"context_length":163840,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek"},"pricing":{"prompt":"0.00000027","completion":"0.0000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000216","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":163840,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"DeepSeek"},{"id":"deepseek/deepseek-v3.1","name":"DeepSeek: DeepSeek V3.1","description":"DeepSeek V3.1 is an advanced open-source hybrid AI model designed to balance powerful reasoning with high-speed efficiency. It uniquely supports two inference modes—\"Think\" for deep reasoning and \"Non-Think\" for direct, lightweight tasks—making it versatile across use cases. Built with a mixture-of-experts architecture, it scales to 685B parameters while activating only 37B per token, enabling cost-effective performance. With a 128K context window, it can handle large documents, codebases, and complex multi-step workflows. DeepSeek V3.1 is optimized for tool use, agent-based applications, and enterprise deployment through open weights and developer-friendly APIs.","created":1755779628,"context_length":163840,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-v3.1"},"pricing":{"prompt":"0.00000027","completion":"0.000001","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":163840,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"DeepSeek"},{"id":"deepseek-ai/DeepSeek-Prover-V2-671B","name":"DeepSeek: DeepSeek Prover V2","description":"A specialized large language model designed specifically for formal theorem proving in Lean 4. Built with a recursive theorem proving pipeline powered by DeepSeek-V3, it decomposes complex problems into subgoals and synthesizes proofs into chain-of-thought reasoning processes for advanced mathematical tasks.","created":1746013094,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek"},"pricing":{"prompt":"0.0000005","completion":"0.00000218","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","top_k","min_p","repetition_penalty","logit_bias","response_format"],"models_extra_params":{"category":[]},"is_active":true,"creator":"DeepSeek"},{"id":"openai/gpt-image-1.5","name":"OpenAI: GPT Image 1.5","description":"OpenAI’s gpt-image-1.5 is a next‑generation image generation and editing model designed to create production‑quality visuals from text prompts and to edit existing images with fine control.","created":1765893957,"context_length":128000,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":"GPT-4 tokenizer","instruct_type":"instruct"},"pricing":{"prompt":"0.000008","completion":"0.000032","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":[{"high":"0.133","medium":"0.034","low":"0.009","hd":"","standard":"","dimension":"1024x1024"},{"high":"0.2","medium":"0.05","low":"0.013","hd":"","standard":"","dimension":"1024x1536"},{"high":"0.2","medium":"0.05","low":"0.013","hd":"","standard":"","dimension":"1536x1024"}],"priceToShow":{"prompt":"0.00001","completion":"0.00004","imageCost":[{"high":"0.133","medium":"0.034","low":"0.009","dimension":"1024x1024","x-key":"dimension"},{"high":"0.2","medium":"0.05","low":"0.013","dimension":"1024x1536","x-key":"dimension"},{"high":"0.2","medium":"0.05","low":"0.013","dimension":"1536x1024","x-key":"dimension"}]}},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["prompt","background","moderation","n","output_compression","output_format","quality","response_format","size","style"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-image-1-mini","name":"OpenAI: GPT Image 1: Mini","description":"OpenAI's gpt-image-1-mini is a natively multimodal image model optimized for affordability and efficiency, making it suitable for projects that need high-throughput AI image generation at a lower cost. This model is designed to handle both text and image inputs and can generate new images, perform targeted image editing (like inpainting), and use reference images for style or content guidance.","created":1760514415,"context_length":128000,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":"GPT-4 tokenizer","instruct_type":"instruct"},"pricing":{"prompt":"0.0000025","completion":"0.000008","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":[{"high":"0.036","medium":"0.011","low":"0.005","hd":"","standard":"","dimension":"1024x1024"},{"high":"0.052","medium":"0.015","low":"0.006","hd":"","standard":"","dimension":"1024x1536"},{"high":"0.052","medium":"0.015","low":"0.006","hd":"","standard":"","dimension":"1536x1024"}],"priceToShow":{"prompt":"0.0000025","completion":"0.000008","imageCost":[{"high":"0.036","medium":"0.011","low":"0.005","dimension":"1024x1024","x-key":"dimension"},{"high":"0.052","medium":"0.015","low":"0.006","dimension":"1024x1536","x-key":"dimension"},{"high":"0.052","medium":"0.015","low":"0.006","dimension":"1536x1024","x-key":"dimension"}]}},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["prompt","background","moderation","n","output_compression","output_format","quality","response_format","size","style"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-image-1","name":"OpenAI: GPT-image-1","description":"GPT-image-1 is OpenAI's latest multimodal image generation model, released in April 2025, and represents a significant advancement over previous models like DALL-E. It can generate high-resolution images—up to 4096×4096 pixels—from natural language prompts, with improved fidelity and consistency in handling complex scenes and detailed instructions. Key features include robust text-to-image generation, reliable text rendering within images, and advanced editing capabilities such as image-to-image transformation and inpainting, allowing users to upload and modify existing images using text prompts.","created":1739402250,"context_length":128000,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":"GPT-4 tokenizer","instruct_type":"instruct"},"pricing":{"prompt":"0.00001","completion":"0.00004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":[{"high":"0.167","medium":"0.042","low":"0.011","hd":"","standard":"","dimension":"1024x1024"},{"high":"0.25","medium":"0.063","low":"0.016","hd":"","standard":"","dimension":"1024x1536"},{"high":"0.25","medium":"0.063","low":"0.016","hd":"","standard":"","dimension":"1536x1024"}],"priceToShow":{"prompt":"0.00001","completion":"0.00004","imageCost":[{"low":"0.011","high":"0.167","medium":"0.042","dimension":"1024x1024","x-key":"dimension"},{"low":"0.016","high":"0.25","medium":"0.063","dimension":"1024x1536","x-key":"dimension"},{"low":"0.016","high":"0.25","medium":"0.063","dimension":"1536x1024","x-key":"dimension"}]}},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["prompt","background","moderation","n","output_compression","output_format","quality","response_format","size","style"],"models_extra_params":{"category":["Image Generation"]},"is_active":true,"creator":"OpenAI"},{"id":"google/gemini-2.5-flash-image","name":"Google: Gemini 2.5 Flash Image (Nano Banana)","description":"Google Gemini 2.5 Flash Image (often nicknamed \"nano-banana\") is Google's latest state-of-the-art multimodal model for image generation and editing, designed for both developers and enterprises. It enables users to create, blend, and edit images with natural language prompts and multi-image input, all while benefiting from Gemini's deep real-world understanding and conversational editing features.","created":1759870431,"context_length":32768,"architecture":{"modality":"text+image-\u003etext+image","input_modalities":["image","text"],"output_modalities":["image","text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.0000003","completion":"0.0000025","request":"0","image":"0.00003","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["max_tokens","response_format","seed","structured_outputs","temperature","top_p","aspectRatio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"Qwen/Qwen2.5-72B-Instruct","name":"Qwen: Qwen2.5 72B Instruct","description":"Alibaba's largest Qwen2.5 model featuring improved capabilities in coding, mathematics, and instruction following across more than 29 languages. With 72B parameters and 128K context support, it delivers top-tier performance for complex tasks requiring deep context understanding and sophisticated reasoning.","created":1726704000,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.00000012","completion":"0.00000039","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","response_format","structured_outputs","logit_bias","logprobs","top_logprobs","seed","min_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Qwen"},{"id":"Qwen/Qwen2.5-7B-Instruct","name":"Qwen: Qwen2.5 7B Instruct","description":"A compact yet capable 7B parameter model with enhanced knowledge, coding abilities, mathematical reasoning, and instruction following. Supports over 29 languages and 128K context windows, making it one of the most versatile small models available for deployment on resource-constrained systems while maintaining strong performance.","created":1729036800,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.00000012","completion":"0.00000024","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","logit_bias","min_p","response_format","seed"],"models_extra_params":{"category":null},"is_active":true,"creator":"Qwen"},{"id":"Qwen/Qwen2.5-Coder-32B-Instruct","name":"Qwen: Qwen2.5 Coder 32B Instruct","description":"A specialized 32B parameter model fine-tuned specifically for coding tasks with enhanced capabilities in code generation, debugging, and algorithmic reasoning. Maintains Qwen's traditional strengths in mathematics while offering improved performance across multiple programming languages and development tasks.","created":1731368400,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"chatml"},"pricing":{"prompt":"0.00000006","completion":"0.00000015","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","logit_bias","min_p","response_format","seed","logprobs","top_logprobs","top_a"],"models_extra_params":{"category":null},"is_active":true,"creator":"Qwen"},{"id":"Qwen/Qwen3-14B","name":"Qwen: Qwen3 14B","description":"A versatile dense model offering powerful reasoning abilities paired with efficient dialogue processing. Developed by Alibaba, it supports extensive 32K context windows (extendable to 131K), features both thinking and non-thinking modes, and excels in coding, mathematics, and multilingual support across 119 languages and dialects.","created":1745876478,"context_length":40960,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.00000012","completion":"0.00000024","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":40960,"max_completion_tokens":40960,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","stop","frequency_penalty","presence_penalty","repetition_penalty","response_format","top_k","seed","min_p","logit_bias","logprobs","top_logprobs"],"models_extra_params":{"category":[]},"is_active":true,"creator":"Qwen"},{"id":"Qwen/Qwen3-30B-A3B","name":"Qwen: Qwen3 30B A3B","description":"A Mixture-of-Experts model that activates only 3.3B of its 30.5B parameters per forward pass. Offers dual thinking modes: a detailed step-by-step reasoning mode for complex problems and a faster non-thinking mode for simpler queries. Despite its small active parameter count, it outperforms QwQ-32B that has 10 times more activated parameters.","created":1745878604,"context_length":40960,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.00000008","completion":"0.00000029","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":40960,"max_completion_tokens":40960,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","reasoning","include_reasoning","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","response_format","structured_outputs","logit_bias","logprobs","top_logprobs","seed","min_p"],"models_extra_params":{"category":[]},"is_active":true,"creator":"Qwen"},{"id":"ace-step/prompt-to-audio","name":"ACE Step: Prompt-to-Audio","description":"ACE-Step is an open-source model built to address the shortcomings of previous music generation systems by combining diffusion techniques, Sana’s Deep Compression AutoEncoder (DCAE), and a lightweight linear transformer. The ace-step/prompt-to-audio functionality is an interface within the ACE-Step music generation model that enables users to create audio directly from text prompts. This architecture allows it to produce high-quality, musically coherent audio efficiently and with a high degree of control. Users can input various prompt types -- including short tags, descriptive phrases, or full lyrics -- and the model interprets these inputs to generate corresponding music. With features like lyric editing and detailed control over song structure and style, the prompt-to-audio tool offers a powerful, flexible way to create music using natural language.","created":1739402400,"architecture":{"modality":"text-\u003eaudio","input_modalities":["text"],"output_modalities":["audio"],"tokenizer":"","instruct_type":"none"},"pricing":{"prompt":"0","completion":"0","request":"0","image":"","web_search":"0","citation":"","reasoning":"","duration":"0.005","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"duration":"0.005"}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":true},"supported_parameters":["instrumental","number_of_steps","scheduler","guidance_type","granularity_scale","guidance_interval","guidance_interval_decay","guidance_scale","minimum_guidance_scale","tag_guidance_scale","lyric_guidance_scale"],"models_extra_params":{"category":["Audio Generation"]},"is_active":true,"creator":"ACE Step"},{"id":"anthropic/claude-opus-4-20250514","name":"Anthropic: Claude Opus 4","description":"Claude Opus 4 is Anthropic’s most capable model yet, excelling in long-term coding, research, and creative content generation. It enables autonomous agents, background code execution, and high-quality writing, making it a powerful tool for businesses and developers handling complex, large-scale tasks. With strong reasoning and adaptability, it's designed for advanced use across engineering, research, and creative industries.","created":1747931245,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Claude"},"pricing":{"prompt":"0.000015","completion":"0.000075","request":"0","image":"0.024","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000015","input_cache_write":"0.00001875","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000750","completion":"0.00003750","input_cache_read":"0.00000075","input_cache_write":"0.000009375"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":32000,"is_moderated":true},"supported_parameters":["max_tokens","temperature","stop","reasoning","include_reasoning","tools","tool_choice","top_p","top_k"],"models_extra_params":{"category":["Coding","Legal","Health","Creative Writing"]},"is_active":true,"creator":"Anthropic"},{"id":"black-forest-labs/flux-dev","name":"Black Forest Labs: FLUX Dev","description":" FLUX.1 [dev] is a 12 billion parameter text-to-image model developed by Black Forest Labs for high-quality image generation and editing from text prompts.It excels at generating photorealistic images with strong prompt adherence, real-world physics understanding (e.g., lighting, spatial relationships), and readable text rendering.","created":1724239509,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"BlackForestLabs"},{"id":"black-forest-labs/flux-kontext-pro","name":"Black Forest Labs: FLUX Kontext Pro","description":"FLUX.1 Kontext [pro] is an advanced image generation and editing model from Black Forest Labs that supports both text prompts and reference images for in-context modifications.","created":1748517909,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"BlackForestLabs"},{"id":"black-forest-labs/flux-pro-2.0","name":"Black Forest Labs: FLUX 2 Pro","description":"FLUX.2 Pro (also styled as Flux-2-Pro or FLUX.2 [pro]) is a production-grade AI image generation and editing model developed by Black Forest Labs, released in November 2025 as the flagship of the FLUX.2 family. It excels in creating high-quality, photorealistic images up to 4 megapixels from text prompts, with advanced controls for editing, multi-reference image use (up to 8 references), and precise outputs suitable for commercial workflows like product photography, UI prototyping, and brand design.","created":1764058653,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"BlackForestLabs"},{"id":"bytedance/seedance","name":"ByteDance: Seedance","description":"Seedance 1.0 Lite is a ByteDance model for streaming, multi-shot video generation. It supports both text and image inputs and can create 1080p videos with smooth, stable motion. The model is specifically designed for narrative content: it can natively produce multi-shot stories, keeping subjects and style consistent across scenes. Seedance Lite features precise prompt following and diverse style support, achieving high scores in motion quality and aesthetics. In practical terms, it empowers creators to generate coherent short films with cinematic detail on the fly.","created":1739403115,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"SeedanceTokenizer","instruct_type":"none"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"480P":"0.18","720P":"0.24","1080p":"0.54","default":"0.18","length":"5"},{"480P":"0.3","720P":"0.42","1080p":"0.72","default":"0.3","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"480p":"0.18","720p":"0.24","1080p":"0.54","default":"0.18","length":"5 sec","x-key":"length"},{"480p":"0.3","720p":"0.42","1080p":"0.72","default":"0.3","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","resolution","length","seed"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"ByteDance"},{"id":"bytedance/seedance-1.5-pro","name":"ByteDance: Seedance 1.5 Pro","description":"Seedance 1.5 Pro is an advanced, joint audio-video AI generation model from ByteDance Seed, designed to create high-fidelity, cinematic, 4–12 second clips with synchronized audio, dialogue, and sound effects in a single pass. Utilizing a dual-branch diffusion transformer (DB-DiT), it specializes in realistic human motion, emotional nuance, and precise lip-sync for storytelling, advertising, and short-form content.","created":1766454997,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"SeedanceTokenizer","instruct_type":"none"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"480P":"0.092","720P":"0.192","1080p":"0.456","default":"0.092","length":"4"},{"480P":"0.115","720P":"0.24","1080p":"0.57","default":"0.115","length":"5"},{"480P":"0.138","720P":"0.288","1080p":"0.684","default":"0.138","length":"6"},{"480P":"0.161","720P":"0.336","1080p":"0.798","default":"0.161","length":"7"},{"480P":"0.184","720P":"0.384","1080p":"0.912","default":"0.184","length":"8"},{"480P":"0.207","720P":"0.432","1080p":"1.026","default":"0.207","length":"9"},{"480P":"0.23","720P":"0.48","1080p":"1.14","default":"0.23","length":"10"},{"480P":"0.253","720P":"0.528","1080p":"1.254","default":"0.253","length":"11"},{"480P":"0.276","720P":"0.576","1080p":"1.368","default":"0.276","length":"12"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"480P":"0.092","720p":"0.192","1080p":"0.456","default":"0.092","length":"4","x-key":"length"},{"480P":"0.115","720p":"0.24","1080p":"0.57","default":"0.115","length":"5","x-key":"length"},{"480P":"0.138","720p":"0.288","1080p":"0.684","default":"0.138","length":"6","x-key":"length"},{"480P":"0.161","720p":"0.336","1080p":"0.798","default":"0.161","length":"7","x-key":"length"},{"480P":"0.184","720p":"0.384","1080p":"0.912","default":"0.184","length":"8","x-key":"length"},{"480P":"0.207","720p":"0.432","1080p":"1.026","default":"0.207","length":"9","x-key":"length"},{"480P":"0.23","720p":"0.48","1080p":"1.14","default":"0.23","length":"10","x-key":"length"},{"480P":"0.253","720p":"0.528","1080p":"1.254","default":"0.253","length":"11","x-key":"length"},{"480P":"0.276","720p":"0.576","1080p":"1.368","default":"0.276","length":"12","x-key":"length"}]}},"top_provider":{"context_length":400000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","resolution","length","seed","aspectRatio","generateAudio"],"models_extra_params":{"category":null},"is_active":true,"creator":"ByteDance"},{"id":"bytedance/seedance-pro","name":"ByteDance: Seedance Pro","description":"Seedance 1.0 Pro is the full version of the Seedance model (ByteDance) with professional-level capabilities. Like the Lite version, it creates 1080p multi-shot videos from text or images, but with enhanced storytelling features. Pro supports wide dynamic motion ranges for smooth large-scale movements and native multi-shot consistency (stable subjects and style across cuts). It also interprets diverse visual styles accurately. In benchmarks, Seedance Pro shows very high prompt adherence, motion quality, and aesthetic scores. It's intended for creating polished, multi-scene videos in applications like advertising or short films.","created":1739403116,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"SeedanceTokenizer","instruct_type":"none"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"480P":"0.18","1080p":"0.72","default":"0.18","length":"5"},{"480P":"0.36","1080p":"1.8","default":"0.36","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"480p":"0.18","1080p":"0.72","default":"0.18","length":"5 sec","x-key":"length"},{"480p":"0.36","1080p":"1.8","default":"0.36","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","resolution","length","seed"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"ByteDance"},{"id":"bytedance/seedream-4.0","name":"ByteDance: Seedream 4.0","description":"Seedream 4.0 is a next-generation AI image generation and editing model developed by ByteDance Seed, unifying text-to-image synthesis, image editing, and multi-image composition in a single architecture for high-resolution outputs up to 4K. It excels in emotional intelligence, atmospheric rendering, character consistency, and fast inference (as quick as 1.8 seconds per 2K image), supporting resolutions like 2048×2048, 1920×1080, and custom ratios.","created":1757425743,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"ByteDance"},{"id":"deepinfra/intfloat-e5-base-v2","name":"DeepInfra: intfloat/e5-base-v2","description":"Open-source E5-base-v2 embedding model (768-dim) fine-tuned for retrieval and semantic search.","created":1704902400,"context_length":512,"architecture":{"modality":"text-\u003evector","input_modalities":["text"],"output_modalities":["vector"],"tokenizer":""},"pricing":{"prompt":"0.00000010","completion":"0","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"0","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":512,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"models_extra_params":{"category":null},"is_active":true,"creator":"DeepInfra"},{"id":"deepseek-ai/DeepSeek-R1","name":"DeepSeek: R1","description":"A frontier-level reasoning model with 671B total parameters (37B activated per token) that delivers performance comparable to OpenAI's o1 across mathematics, coding, and reasoning tasks. Licensed under MIT for free commercial use, it pioneered cold-start data techniques before reinforcement learning to achieve breakthrough capabilities.","created":1737381095,"context_length":163840,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.0000005","completion":"0.00000218","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":163840,"max_completion_tokens":163840,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","stop","frequency_penalty","presence_penalty","seed","top_k","logit_bias","logprobs","top_logprobs","repetition_penalty","response_format","structured_outputs","min_p","tools","tool_choice"],"models_extra_params":{"category":["Math","Health","Creative Writing"]},"is_active":true,"creator":"DeepSeek"},{"id":"deepseek-ai/DeepSeek-R1-Distill-Llama-70B","name":"DeepSeek: R1 Distill Llama 70B","description":"A streamlined model that distills DeepSeek R1's advanced reasoning abilities into the Llama-3.3-70B architecture. Achieves strong performance across various reasoning benchmarks while maintaining the efficiency and versatility of the Llama architecture for more practical deployment scenarios compared to the massive 671B R1 model.","created":1737663169,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Llama3","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.0000006","completion":"0.0000012","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","top_k","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","min_p","repetition_penalty","response_format","structured_outputs"],"models_extra_params":{"category":[]},"is_active":true,"creator":"DeepSeek"},{"id":"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B","name":"DeepSeek: R1 Distill Qwen 32B","description":"A knowledge distillation model that transfers DeepSeek R1's powerful reasoning capabilities into the more efficient Qwen 2.5 32B architecture. Delivers exceptional performance on complex mathematical reasoning benchmarks like AIME and MATH while maintaining the efficiency advantages of a much smaller model.","created":1738194830,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.00000027","completion":"0.00000027","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","stop","frequency_penalty","presence_penalty","repetition_penalty","response_format","top_k","seed","min_p","logit_bias"],"models_extra_params":{"category":[]},"is_active":true,"creator":"DeepSeek"},{"id":"deepseek-ai/DeepSeek-V3-0324","name":"DeepSeek: DeepSeek V3 0324","description":"A massive 685B parameter MoE model activating 37B parameters per token. Leveraging Multi-head Latent Attention and DeepSeekMoE architecture, it's trained on 14.8 trillion diverse tokens without requiring recovery from loss spikes. Matches leading closed-source models in performance while maintaining efficient training and inference.","created":1742824755,"context_length":163840,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"DeepSeek"},"pricing":{"prompt":"0.00000027","completion":"0.00000088","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":163840,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","presence_penalty","frequency_penalty","repetition_penalty","top_k","stop","tools","tool_choice","response_format","structured_outputs","logit_bias","logprobs","top_logprobs","seed","min_p"],"models_extra_params":{"category":["Legal","Creative Writing"]},"is_active":true,"creator":"DeepSeek"},{"id":"google/gemini-2.0-flash-001","name":"Google: Gemini 2.0 Flash","description":"Gemini 2.0 Flash serves as Google's versatile model designed for everyday applications. This solution provides robust general capabilities with real-time streaming support. It delivers enhanced performance compared to Gemini 1.5 Pro at reduced pricing, while offering substantially improved efficiency over Gemini 1.5 Flash, albeit at higher cost.","created":1738769413,"context_length":1048576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.0000001","completion":"0.0000004","request":"0","image":"0.0000258","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000025","input_cache_write":"0.0000001833","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1048576,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/gemini-2.0-flash-lite-001","name":"Google: Gemini 2.0 Flash Lite","description":"Gemini 2.0 Flash Lite is Google's most cost-efficient model specifically optimized for high-volume text processing applications. This lightweight variant delivers comparable quality to larger Gemini models while maintaining extremely low token costs and minimal latency. The model provides significant performance improvements over Gemini 1.5 Flash in time-to-first-token metrics and offers simplified pricing with consistent per-token costs regardless of context length. It's particularly suitable for scaled production environments where cost optimization is critical, including chatbots, content generation systems, and text processing pipelines requiring high throughput at minimal expense.","created":1740506212,"context_length":1048576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.000000075","completion":"0.0000003","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1048576,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/gemini-2.5-flash","name":"Google: Gemini 2.5 Flash","description":"Gemini 2.5 Flash is Google's hybrid reasoning model offering an optimal balance between performance, cost, and latency. It features full thinking capabilities that can be toggled on/off or fine-tuned using a thinking budget parameter (0-24576 tokens). This model excels at complex tasks requiring multi-step reasoning while maintaining the efficiency needed for high-volume and real-time applications. It outperforms comparable models in reasoning benchmarks while maintaining lower cost and latency profiles, making it ideal for enterprise deployments requiring both deep analysis and scalability.","created":1744914667,"context_length":1048576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.0000003","completion":"0.0000025","request":"0","image":"0.001238","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000075","input_cache_write":"0.0000003833","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.00000030","completion_more_than_200k_input":"0.0000025","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00000030","prompt_more_than_200k_input":"0.00000030","completion":"0.0000025","completion_more_than_200k_input":"0.0000025"}},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","stop","response_format","structured_outputs"],"models_extra_params":{"category":["Math","Legal","Health","Creative Writing"]},"is_active":true,"creator":"Google"},{"id":"google/gemini-2.5-pro","name":"Google: Gemini 2.5 Pro ","description":"Gemini 2.5 Pro is Google's premiere thinking model built for enterprise-grade applications requiring sophisticated reasoning. It delivers top performance on critical benchmarks, featuring built-in reasoning capabilities that enable it to plan and analyze before responding to complex queries. The model maintains high accuracy across STEM domains with native multimodal processing and an extensive context window. This preview release allows early access to Google's most advanced model, which excels in code development, scientific problem-solving, and multi-step logical tasks while maintaining human-aligned responses. The preview designation indicates ongoing optimization before general availability.","created":1746578513,"context_length":1048576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"Gemini"},"pricing":{"prompt":"0.00000125","completion":"0.00001","request":"0","image":"0.00516","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000031","input_cache_write":"0.000001625","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"0.00000250","completion_more_than_200k_input":"0.000015","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00000125","prompt_more_than_200k_input":"0.00000250","completion":"0.000010","completion_more_than_200k_input":"0.000015"}},"top_provider":{"context_length":1048576,"max_completion_tokens":65535,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","stop","seed","response_format","structured_outputs","reasoning","include_reasoning"],"models_extra_params":{"category":["Coding","Math","Legal","Health","Creative Writing"]},"is_active":true,"creator":"Google"},{"id":"google/gemini-embedding-001","name":"Google: gemini-embedding","description":"State-of-the-art Gemini embedding model (3072-dim) optimised for retrieval and semantic similarity.","created":1740787200,"context_length":2048,"architecture":{"modality":"text-\u003evector","input_modalities":["text"],"output_modalities":["vector"],"tokenizer":""},"pricing":{"prompt":"0.00000015","completion":"0","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"0","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":2048,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/gemma-4-31b-it","name":"Google: Gemma 4 31B","description":"google/gemma-4-31b-it is Google DeepMind’s instruction-tuned 31B Gemma 4 model, built for strong reasoning, coding, agentic workflows, and multimodal understanding. It supports text and image input, has a 256K context window, and is positioned as the dense, higher-quality counterpart to the 26B MoE variant.","created":1775148486,"context_length":262144,"architecture":{"modality":"text+image+video-\u003etext","input_modalities":["image","text","video"],"output_modalities":["text"],"tokenizer":"Gemma"},"pricing":{"prompt":"0.00000020","completion":"0.00000050","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":262144,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/imagen-4.0-generate-001","name":"Google: Imagen 4","description":"Imagen 4 is a Google's flagship text-to-image generation model, designed for high-fidelity, photorealistic images from text prompts.\nIt supports generating up to 4 images per request at resolutions like 2K, with capabilities for diverse art styles (e.g., realism, watercolor, pixel art), improved spelling/typography, sharp clarity, and intricate details such as textures and lighting.","created":1755163830,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"0.04","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"image":"0.04"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["prompt","n","aspectRatio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/veo2","name":"Google: Google Veo 2","description":"Google Veo 2 (2023) was an early release for high-quality video from images, using Gemini/Imagen tech. It excelled at placing objects realistically in 3D space and generating smooth camera moves. Veo 2 supported straightforward prompt-based generation but did not include advanced features like native audio. It produced relatively sharp, realistic clips, often at 720p. Veo 2 was well-regarded for its spatial coherence and served as a benchmark for later models.","created":1739403111,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VeoTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"default":"2.4","length":"5"},{"default":"3.84","length":"8"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"default":"2.4","length":"5 sec","x-key":"length"},{"default":"3.84","length":"8 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","length","seed","aspectRatio"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Google"},{"id":"google/veo3","name":"Google: Google Veo 3","description":"Veo 3 is Google's latest text/image-to-video model (Gemini/Imagen family). It improves on Veo 2 by integrating audio: users can add character dialogue, sound effects, and ambient audio to generated videos. This makes Veo 3 outputs more immersive and cinematic. The model produces higher-fidelity visuals and better spatial consistency than its predecessor. While primarily a research/closed model, Veo 3 demonstrates Google's focus on multimodal (audio+video) storytelling. In practice, it's ideal for narrative content; it yields polished scenes with sound design when properly prompted. Overall, Veo 3's standout features are its audio integration and enhanced image quality relative to earlier Google video models.","created":1739403003,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VeoTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"default":"4","length":"8"}],"videoCostWithAudio":[{"default":"6","length":"8"}],"imageCost":null,"priceToShow":{"videoCost":[{"default":"4","length":"8 sec","x-key":"length"}],"videoCostWithAudio":[{"default":"6","length":"8 sec","x-key":"length"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":true},"supported_parameters":["image","prompt","resolution","length","seed","aspectRatio","generateAudio"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Google"},{"id":"google/veo3-fast","name":"Google: Google Veo 3 Fast","description":"Veo 3 Fast is a speed-optimized version of Google DeepMind’s flagship Veo 3 video generation model, launched in mid-2025. Designed for rapid content creation, it produces 8-second, 720p videos—with synchronized dialogue, ambient sounds, music, and effects—in under a minute. Compared to the standard Veo 3, Veo 3 Fast is around 30% quicker and cuts compute costs by up to 80%. Optimized for efficiency, Veo 3 Fast accepts both text and image inputs and supports cinematic camera motion. It's especially well-suited for fast prototyping, social media, marketing, and educational applications where speed and affordability are more critical than ultra-high fidelity.","created":1739403003,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VeoTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"default":"1.62","length":"8"}],"videoCostWithAudio":[{"default":"2.52","length":"8"}],"imageCost":null,"priceToShow":{"videoCost":[{"default":"1.62","length":"8 sec","x-key":"length"}],"videoCostWithAudio":[{"default":"2.52","length":"8 sec","x-key":"length"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":true},"supported_parameters":["image","prompt","resolution","length","seed","aspectRatio","generateAudio"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Google"},{"id":"google/veo3.1","name":"Google: Google Veo 3.1","description":"Google Veo 3.1 is a preview model code in the Gemini API for Google's Veo 3.1, a state-of-the-art cinematic video generation engine optimized for professional-grade 4K output, natively synchronized audio, and complex camera movements with high temporal consistency.","created":1766454997,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VeoTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.2","1080p":"0.2","4k":"0.4","default":"0.2","length":"1"}],"videoCostWithAudio":[{"720P":"0.4","1080p":"0.4","4k":"0.6","default":"0.4","length":"1"}],"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.2","1080p":"0.2","4k":"0.4","default":"0.2","length":"1 sec","x-key":"length"}],"videoCostWithAudio":[{"720p":"0.4","1080p":"0.4","4k":"0.6","default":"0.4","length":"1 sec","x-key":"length"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["image","images","prompt","resolution","length","seed","aspectRatio","generateAudio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/veo3.1-fast","name":"Google: Google Veo 3.1 Fast","description":"Google Veo 3.1 Fast is a generally available (GA) version of Google's Veo 3.1 Fast video generation model in Vertex AI and Gemini API, optimized for faster inference while delivering high-quality videos with synchronized native audio from text or image prompts.","created":1766454997,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VeoTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.1","1080p":"0.1","4k":"0.3","default":"0.1","length":"1"}],"videoCostWithAudio":[{"720P":"0.15","1080p":"0.15","4k":"0.35","default":"0.15","length":"1"}],"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.1","1080p":"0.1","4k":"0.3","default":"0.1","length":"1 sec","x-key":"length"}],"videoCostWithAudio":[{"720p":"0.15","1080p":"0.15","4k":"0.35","default":"0.15","length":"1 sec","x-key":"length"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["image","images","prompt","resolution","length","seed","aspectRatio","generateAudio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"google/veo3.1-lite","name":"Google: Google Veo 3.1 Lite","description":"Google Veo 3.1 Lite, a high-efficiency, cost-effective video generation model in the Gemini API and Vertex AI, designed for developers building high-volume applications at under 50% the cost of Veo 3.1 Fast with equivalent speed.","created":1766454997,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VeoTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.05","1080p":"0.08","default":"0.05","length":"1"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.05","1080p":"0.08","default":"0.05","length":"1 sec","x-key":"length"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["image","prompt","resolution","length","seed","aspectRatio","generateAudio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Google"},{"id":"kling-ai/kling-v1-6","name":"Kling AI: Kling V1.6","description":"Kling 1.6 (Dec 2024) was a major upgrade over Kling 1.5, reportedly boosting performance by ~195%. It features much better prompt understanding and physics, resulting in very natural movements and facial expressions in the generated videos. The image-to-video quality is significantly enhanced: videos are more vibrant and consistent, with refined lighting and shadows. Kling 1.6 offers dual modes (Standard and Professional) to balance speed vs. quality. Notably, these improvements were delivered with no extra cost to users, making it a highly attractive option for free AI video generation.","created":1739403103,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"KlingTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"std":"0.21","pro":"0.42","default":"0.21","length":"5"},{"std":"0.42","pro":"0.9","default":"0.21","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"std":"0.21","pro":"0.42","default":"0.21","length":"5 sec","x-key":"length"},{"std":"0.42","pro":"0.9","default":"0.21","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","length","mode"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Kling AI"},{"id":"kling-ai/kling-v2","name":"Kling AI: Kling V2","description":"Kling 2.0 (2024) was Kling's image/video model aimed at cinematic output. It introduced higher resolution outputs and smoother camera movements than Kling 1.x. The model generated realistic motion and stable scenes, with a focus on high-quality visual polish. Compared to the later 2.1, Kling 2.0 was a solid general-purpose model but had fewer advanced features (e.g. no multi-frame generation). It supported both image and text prompts for 720p/1080p animations. In effect, Kling 2.0 provided film-like AI videos that set the stage for even more advanced models.","created":1739403110,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"KlingTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"default":"1.08","length":"5"},{"default":"2.16","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"default":"1.08","length":"5 sec","x-key":"length"},{"default":"2.16","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","length"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Kling AI"},{"id":"kling-ai/kling-v2-1","name":"Kling AI: Kling V2.1","description":"Kling 2.1 is Kuaishou's advanced image-to-video model (2025). It transforms reference images (and optional text) into high-definition cinematic videos using a 3D spatiotemporal attention architecture. The model simulates real-world physics and delivers very natural, intricate motion and lifelike characters. It can produce multiple video variants from the same prompt and uses AI-assisted prompting to help users craft detailed instructions. In comparative tests, Kling 2.1 rivals top closed-source competitors. Overall, it marks a significant step up in realism, coherence, and prompt-following over Kling 2.0.","created":1739403002,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"KlingTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"std":"0.21","pro":"0.42","default":"0.21","length":"5"},{"std":"0.42","pro":"0.9","default":"0.42","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"std":"0.21","pro":"0.42","default":"0.21","length":"5 sec","x-key":"length"},{"std":"0.42","pro":"0.9","default":"0.42","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","length","mode"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Kling AI"},{"id":"kling-ai/kling-v2-1-master","name":"Kling AI: Kling V2.1 Master","description":"Kling 2.1 Master is the premium version of Kling's 2.1 model, designed for professional use. It delivers top-tier image-to-video performance with exceptionally fluid motion and cinematic visuals. The model provides smoother playback and more dramatic action scenes compared to lower tiers. It offers very precise prompt adherence, allowing complex instructions to produce coherent, high-quality video. The high output quality comes with increased compute cost, but users gain a very polished, filmic look. In summary, Kling 2.1 Master produces advanced, dynamic animations with excellent detail and strict alignment to input prompts.","created":1739403001,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"KlingTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"default":"1.2","length":"5"},{"default":"2.4","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"default":"1.2","length":"5 sec","x-key":"length"},{"default":"2.4","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","length"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Kling AI"},{"id":"kling-ai/kling-v3","name":"Kling AI: Kling V3","description":"Kling V3 is Kuaishou's latest AI video generation model, delivering high-quality, physics-accurate video synthesis with improved motion consistency, lighting realism, and creative control. It supports text-to-video and image-to-video workflows, generating clips up to 10 seconds at resolutions up to 1080p, ideal for cinematic storytelling, marketing content, and rapid prototyping.","created":1770299343,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"KlingTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"std":"0.24","pro":"0.36","default":"0.24","length":"1"},{"std":"0.48","pro":"0.72","default":"0.48","length":"2"},{"std":"0.72","pro":"1.08","default":"0.72","length":"3"},{"std":"0.96","pro":"1.44","default":"0.96","length":"4"},{"std":"1.2","pro":"1.8","default":"1.2","length":"5"},{"std":"1.44","pro":"2.16","default":"1.44","length":"6"},{"std":"1.68","pro":"2.52","default":"1.68","length":"7"},{"std":"1.92","pro":"2.88","default":"1.92","length":"8"},{"std":"2.16","pro":"3.24","default":"2.16","length":"9"},{"std":"2.4","pro":"3.6","default":"2.4","length":"10"},{"std":"2.64","pro":"3.96","default":"2.64","length":"11"},{"std":"2.88","pro":"4.32","default":"2.88","length":"12"},{"std":"3.12","pro":"4.68","default":"3.12","length":"13"},{"std":"3.36","pro":"5.04","default":"3.36","length":"14"},{"std":"3.6","pro":"5.4","default":"3.6","length":"15"},{"std":"3.84","pro":"5.76","default":"3.84","length":"16"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"std":"0.24","pro":"0.36","default":"0.24","length":"1 sec","x-key":"length"},{"std":"0.48","pro":"0.72","default":"0.48","length":"2 sec","x-key":"length"},{"std":"0.72","pro":"1.08","default":"0.72","length":"3 sec","x-key":"length"},{"std":"0.96","pro":"1.44","default":"0.96","length":"4 sec","x-key":"length"},{"std":"1.2","pro":"1.8","default":"1.2","length":"5 sec","x-key":"length"},{"std":"1.44","pro":"2.16","default":"1.44","length":"6 sec","x-key":"length"},{"std":"1.68","pro":"2.52","default":"1.68","length":"7 sec","x-key":"length"},{"std":"1.92","pro":"2.88","default":"1.92","length":"8 sec","x-key":"length"},{"std":"2.16","pro":"3.24","default":"2.16","length":"9 sec","x-key":"length"},{"std":"2.4","pro":"3.6","default":"2.4","length":"10 sec","x-key":"length"},{"std":"2.64","pro":"3.96","default":"2.64","length":"11 sec","x-key":"length"},{"std":"2.88","pro":"4.32","default":"2.88","length":"12 sec","x-key":"length"},{"std":"3.12","pro":"4.68","default":"3.12","length":"13 sec","x-key":"length"},{"std":"3.36","pro":"5.04","default":"3.36","length":"14 sec","x-key":"length"},{"std":"3.6","pro":"5.4","default":"3.6","length":"15 sec","x-key":"length"},{"std":"3.84","pro":"5.76","default":"3.84","length":"16 sec","x-key":"length"}]}},"top_provider":{"context_length":400000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","length","mode","generateAudio"],"models_extra_params":{"category":null},"is_active":true,"creator":"Kling AI"},{"id":"kling-ai/kling-video-o1","name":"Kling AI: Kling Video O1","description":"Kling Video O1 is the world's first unified multimodal AI video generation model developed by Kuaishou, offering an integrated platform for multiple video creation and editing tasks.","created":1764569103,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"KlingTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"pro":"0.3","default":"0.3","length":"3"},{"pro":"0.36","default":"0.36","length":"4"},{"pro":"0.48","default":"0.48","length":"5"},{"pro":"0.6","default":"0.6","length":"6"},{"pro":"0.72","default":"0.72","length":"7"},{"pro":"0.84","default":"0.84","length":"8"},{"pro":"0.96","default":"0.96","length":"9"},{"pro":"1.12","default":"1.12","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"pro":"0.3","default":"0.3","length":"3 sec","x-key":"length"},{"pro":"0.36","default":"0.36","length":"4 sec","x-key":"length"},{"pro":"0.48","default":"0.48","length":"5 sec","x-key":"length"},{"pro":"0.6","default":"0.6","length":"6 sec","x-key":"length"},{"pro":"0.72","default":"0.72","length":"7 sec","x-key":"length"},{"pro":"0.84","default":"0.84","length":"8 sec","x-key":"length"},{"pro":"0.96","default":"0.96","length":"9 sec","x-key":"length"},{"pro":"1.12","default":"1.12","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","images","aspectRatio","prompt","length","mode"],"models_extra_params":{"category":null},"is_active":true,"creator":"Kling AI"},{"id":"leonardo-ai/lucid-origin","name":"Leonardo.AI: Lucid Origin","description":"Lucid Origin is Leonardo AI's versatile text-to-image model optimized for exceptional prompt adherence, vibrant Full HD outputs, and diverse visual styles from photorealistic to stylized illustration.","created":1754393109,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"Leonardo.AI"},{"id":"leonardo-ai/lucid-realism","name":"Leonardo.AI: Lucid Realism","description":"Lucid Realism is a specialized text-to-image model from Leonardo AI optimized for hyperrealistic, cinematic photorealism with exceptional detail, natural lighting, and minimal generation errors.","created":1749122709,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"Leonardo.AI"},{"id":"leonardo-ai/phoenix","name":"Leonardo.AI: Phoenix","description":"Phoenix is Leonardo.AI's flagship foundational text-to-image model designed for exceptional prompt adherence and photorealistic results up to 5 megapixels.","created":1718364309,"context_length":4096,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"price":"As per provider"}},"top_provider":{"context_length":4096,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["prompt","n","size"],"models_extra_params":{"category":null},"is_active":true,"creator":"Leonardo.AI"},{"id":"meta-llama/llama-3.1-8b-instant","name":"Meta: LLaMA 3.1 8B Instant","description":"Llama 3.1 8B Instruct is Meta's compact, efficiency-focused language model from the Llama 3.1 family. At just 8 billion parameters, this instruction-tuned variant delivers impressive performance with minimal computational requirements, making it ideal for resource-constrained environments and latency-sensitive applications. Despite its smaller size, the model maintains competitive performance in human evaluations against much larger models, particularly excelling in straightforward dialogue, content generation, and common instruction-following tasks. The model features improved inference speed and reduced deployment costs compared to larger variants while maintaining the core capabilities that make Llama models effective. It's particularly well-suited for mobile applications, edge devices, and scaled deployments where efficiency and performance balance are critical considerations.","created":1739401240,"context_length":8192,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"LLaMA3","instruct_type":"instruct"},"pricing":{"prompt":"0.00000005","completion":"0.00000008","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["max_tokens","temperature","top_p","top_k","min_p","tools","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"Meta"},{"id":"meta-llama/llama-3.3-70b-versatile","name":"Meta: LLaMA 3.3 70B Versatile","description":"Meta's Llama-3.3-70B-Versatile is a 70-billion-parameter large language model optimized for a wide range of natural language processing tasks. It delivers high performance across various benchmarks while maintaining efficiency suitable for diverse applications.","created":1739401255,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"LLaMA3","instruct_type":"instruct"},"pricing":{"prompt":"0.00000059","completion":"0.00000079","request":"0","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","frequency_penalty","presence_penalty"],"models_extra_params":{"category":null},"is_active":true,"creator":"Meta"},{"id":"meta-llama/llama-4-scout-17b-16e-instruct","name":"Meta: LLaMA 4 Scout 17B 16E Instruct","description":"Llama-4-Scout-17B-16E-Instruct is a multimodal, instruction-tuned language model developed by Meta, designed to handle long-context tasks efficiently. It features 17 billion active parameters with a total of 109 billion parameters across 16 experts, utilizing a mixture-of-experts (MoE) architecture. The model supports a context window of up to 10 million tokens, making it suitable for applications requiring extensive context, such as multi-document summarization and large codebase exploration.","created":1739401355,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"LLaMA4","instruct_type":"instruct"},"pricing":{"prompt":"0.00000011","completion":"0.00000034","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":8192,"is_moderated":false},"supported_parameters":["max_tokens","temperature","seed"],"models_extra_params":{"category":null},"is_active":true,"creator":"Meta"},{"id":"minimax/minimax-m2.1","name":"MiniMax: MiniMax M2.1","description":"Minimax is a recursive decision-making algorithm used in two-player, zero-sum games (like chess or tic-tac-toe) to select the optimal move by assuming the opponent plays perfectly to minimize the player's score while the player maximizes it.","created":1766454997,"context_length":196608,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.00000027","completion":"0.00000095","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000000299999997","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":196608,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"MiniMax"},{"id":"minimax/minimax-m2.5","name":"MiniMax: MiniMax M2.5","description":"MiniMaxAI/MiniMax-M2.5 is a 229B-parameter Mixture-of-Experts (MoE) language model with 10B active parameters per token, designed as the world's first production-level model natively optimized for agentic scenarios like coding, tool use, search, and office productivity.","created":1770908502,"context_length":196608,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000003","completion":"0.0000012","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000003","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":196608,"max_completion_tokens":196608,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"MiniMax"},{"id":"minimax/minimax-m2.5-highspeed","name":"MiniMax: MiniMax M2.5 Highspeed","description":"MiniMax-M2.5-HighSpeed (also called the Lightning variant) is the high-throughput version of MiniMax's 229B-parameter Mixture-of-Experts (MoE) model, delivering 100 tokens/second natively—roughly 2x faster than other frontier models—while maintaining identical capabilities to the standard 50 tokens/second version.","created":1770908502,"context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000006","completion":"0.0000024","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000003","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":204800,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","parallel_tool_calls","presence_penalty","reasoning","reasoning_effort","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Minimax"},{"id":"minimax/minimax-m2.7","name":"MiniMax: MiniMax M2.7","description":"MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity with self-improvement capabilities. Created on March 18, 2026, it integrates advanced agentic capabilities through multi-agent collaboration, enabling it to plan, execute, and refine complex tasks across dynamic environments.","created":1773836697,"context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000003","completion":"0.0000012","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000006","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":204800,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","temperature","tool_choice","tools","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Minimax"},{"id":"minimax/minimax-m2.7-highspeed","name":"MiniMax: MiniMax M2.7 Highspeed","description":"MiniMax-M2.7-HighSpeed is the ultra-fast variant of MiniMax's M2.7 flagship model, delivering approximately 100 tokens per second—3x faster than competitors like Claude Opus 4.6 (~33 tps) and GPT-5 (~40 tps)—while maintaining identical performance on complex tasks at a fraction of the cost.","created":1773836697,"context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000006","completion":"0.0000024","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000003","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":204800,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","temperature","tool_choice","tools","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Minimax"},{"id":"mistralai/Mistral-7B-Instruct-v0.1","name":"Mistral: Mistral 7B Instruct","description":"A highly efficient 7.3B parameter model that delivers remarkable performance for its size class. Optimized for both speed and extensive context handling, it represents one of the most practical options for deployment in production systems with limited computational resources while maintaining strong capabilities.","created":1716768000,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.000000028","completion":"0.000000054","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","logit_bias","min_p","response_format","seed","logprobs","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"MistralAI"},{"id":"mistralai/Mistral-7B-Instruct-v0.3","name":"Mistral: Mistral 7B Instruct v0.3","description":"An enhanced version of Mistral's popular 7B model with expanded vocabulary and added function calling capabilities. Maintains the original's speed and context length optimization while adding new features that make it more versatile for practical application development and integration with external tools and APIs.","created":1716768000,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.000000028","completion":"0.000000054","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":16384,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","logit_bias","min_p","response_format","seed","logprobs","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"MistralAI"},{"id":"mistralai/Mistral-Nemo-Instruct-2407","name":"Mistral: Mistral Nemo","description":"A streamlined 12B parameter model developed in collaboration with NVIDIA featuring 128K context window support. Offers multilingual capabilities and integrated function calling, making it well-suited for deployment on NVIDIA hardware with optimized performance characteristics for real-time applications.","created":1721347200,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.00000002","completion":"0.00000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":65536,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","response_format","structured_outputs","seed","top_k","logit_bias","logprobs","top_logprobs","tools","tool_choice","repetition_penalty","min_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"MistralAI"},{"id":"mistralai/Mistral-Small-24B-Instruct-2501","name":"Mistral: Mistral Small 3","description":"A 24B parameter model optimized for speed and efficiency while rivaling models 3x its size like Llama 3.3 70B. Achieves 81% accuracy on MMLU benchmarks with impressive 150 tokens/second throughput on consumer hardware. Released under Apache 2.0 license, it serves as an excellent base for developing advanced reasoning capabilities.","created":1738255409,"context_length":28000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral"},"pricing":{"prompt":"0.00000005","completion":"0.00000008","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":28000,"max_completion_tokens":14000,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","repetition_penalty","response_format","top_k","seed","min_p","structured_outputs","logit_bias","logprobs"],"models_extra_params":{"category":[]},"is_active":true,"creator":"MistralAI"},{"id":"mistralai/Mixtral-8x7B-Instruct-v0.1","name":"Mistral: Mixtral 8x7B Instruct","description":"A sophisticated Mixture-of-Experts architecture with 8 experts totaling 47B parameters but activating only a fraction per forward pass. Fine-tuned specifically for conversational and instruction-following use cases, it delivers performance competitive with much larger dense models while maintaining significantly better computational efficiency.","created":1702166400,"context_length":32768,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Mistral","instruct_type":"mistral"},"pricing":{"prompt":"0.00000054","completion":"0.00000054","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","top_k","repetition_penalty","logit_bias","min_p","response_format","seed","logprobs","top_logprobs"],"models_extra_params":{"category":null},"is_active":true,"creator":"MistralAI"},{"id":"openai/dall-e-2","name":"OpenAI: DALL·E 2","description":"DALL-E 2 is an advanced AI image generation model developed by OpenAI that creates original, high-resolution images and artwork from natural language prompts. It excels at translating detailed or imaginative text descriptions into visually compelling and realistic images, combining multiple concepts, attributes, and styles in a single output. Key features include improved image clarity, enhanced inpainting for editing specific parts of images, and better text-to-image matching, making it a versatile tool for creative applications such as marketing, design, and concept art.","created":1739402200,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":"","instruct_type":"none"},"pricing":{"prompt":"0.00001","completion":"0.00004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":[{"high":"","medium":"","low":"","hd":"","standard":"0.02","dimension":"1024x1024"},{"high":"","medium":"","low":"","hd":"","standard":"0.018","dimension":"512x512"},{"high":"","medium":"","low":"","hd":"","standard":"0.016","dimension":"256x256"}],"priceToShow":{"imageCost":[{"standard":"0.02","dimension":"1024x1024","x-key":"dimension"},{"standard":"0.018","dimension":"512x512","x-key":"dimension"},{"standard":"0.016","dimension":"256x256","x-key":"dimension"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":true},"supported_parameters":["prompt","n","quality","size"],"models_extra_params":{"category":["Image Generation"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/dall-e-3","name":"OpenAI: DALL·E 3","description":"DALL-E 3 is OpenAI's advanced text-to-image generation model, designed to translate natural language prompts into highly detailed and accurate images. It offers significant improvements over previous versions, with a deeper understanding of nuance and context, allowing users to create images that closely match their descriptions—even for complex or imaginative scenarios.","created":1739402150,"architecture":{"modality":"text-\u003eimage","input_modalities":["text"],"output_modalities":["image"],"tokenizer":"","instruct_type":"none"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":[{"high":"","medium":"","low":"","hd":"0.08","standard":"0.04","dimension":"1024x1024"},{"high":"","medium":"","low":"","hd":"0.12","standard":"0.08","dimension":"1024x1792"},{"high":"","medium":"","low":"","hd":"0.12","standard":"0.08","dimension":"1792x1024"}],"priceToShow":{"imageCost":[{"hd":"0.08","standard":"0.04","dimension":"1024x1024","x-key":"dimension"},{"hd":"0.12","standard":"0.08","dimension":"1024x1792","x-key":"dimension"},{"hd":"0.12","standard":"0.08","dimension":"1792x1024","x-key":"dimension"}]}},"top_provider":{"context_length":0,"max_completion_tokens":0,"is_moderated":true},"supported_parameters":["prompt","n","quality","size"],"models_extra_params":{"category":["Image Generation"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-3.5-turbo","name":"OpenAI: GPT-3.5 Turbo","description":"GPT-3.5 Turbo represents OpenAI's efficiency-optimized language model designed for maximum responsiveness in conversational and completion applications. This solution delivers optimal performance for real-time interactions, code generation, and content creation tasks with minimal latency. Despite its focus on processing speed, the model maintains impressive capabilities in understanding and generating both natural language and programming code across diverse domains. GPT-3.5 Turbo is specifically tuned for dialogue applications, making it particularly effective for chatbots, virtual assistants, and interactive systems requiring rapid responses. \nWith training data extending to September 2021, the model possesses comprehensive general knowledge while maintaining computational efficiency suitable for high-throughput applications. As OpenAI's most cost-effective and responsive production model, it provides an optimal balance of capability, performance, and affordability for applications where processing speed and deployment economics are critical considerations.","created":1685232000,"context_length":16385,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000005","completion":"0.0000015","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-3.5-turbo-0125","name":"OpenAI: GPT-3.5 Turbo 0125","description":"GPT-3.5 Turbo has been largely superseded by more advanced models in OpenAI's lineup. While still available for legacy applications and cost-sensitive implementations, its capabilities are now significantly outpaced by GPT-4o and the O-series models. The model remains suitable for basic conversational AI, content generation, and simple function-calling scenarios, but lacks the advanced reasoning, multilingual capabilities, and multimodal features of newer models. For applications requiring only core language processing without the need for cutting-edge performance, it continues to offer a cost-effective option.","created":1685232000,"context_length":16385,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000005","completion":"0.0000015","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-3.5-turbo-1106","name":"OpenAI: GPT-3.5 Turbo 16k (older v1106)","description":"An older GPT-3.5 Turbo model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Sep 2021.","created":1699228800,"context_length":16385,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000001","completion":"0.000002","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-3.5-turbo-16k","name":"OpenAI: GPT-3.5 Turbo 16k","description":"This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.","created":1693180800,"context_length":16385,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000003","completion":"0.000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":16385,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4","name":"OpenAI: GPT-4","description":"The original GPT-4 model has been replaced by more advanced versions in OpenAI's lineup. With knowledge cutoff from September 2021, it lacks awareness of more recent events and developments. GPT-4o now serves as OpenAI's primary multimodal model, offering similar capabilities with substantially improved speed, cost efficiency, and multilingual support. For applications requiring more advanced reasoning, the O-series models (o1, o3, o4) provide specialized capabilities for complex problem-solving in mathematics, science, coding, and other technical domains.","created":1685232000,"context_length":8191,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00003","completion":"0.00006","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":8191,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4-turbo","name":"OpenAI: GPT-4 Turbo","description":"GPT-4 Turbo has been superseded by GPT-4o as OpenAI's flagship multimodal model. While it still offers vision capabilities, JSON mode, and function calling, GPT-4o provides faster response times (2x faster), enhanced multilingual support, and improved visual understanding at 50% reduced cost. As of May 2025, GPT-4 Turbo is being phased out in favor of GPT-4o and the newer reasoning-focused O-series models.","created":1712620800,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00001","completion":"0.00003","request":"0","image":"0.01445","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4.1","name":"OpenAI: GPT-4.1","description":"GPT-4.1 is OpenAI's latest flagship model showcasing significant advances in coding, instruction following, and long-context understanding. It processes up to 1 million tokens (approximately 750,000 words) in a single context window, with knowledge updated to June 2024. Performance benchmarks demonstrate substantial improvements over GPT-4o, including 54.6% completion on SWE-bench Verified coding tasks (21.4% increase) and 38.3% on MultiChallenge instruction following (10.5% increase). The model excels at real-world software engineering with reduced extraneous edits (from 9% to 2%), superior code exploration capabilities, and enhanced long-document comprehension. GPT-4.1 provides improved agentic reliability while maintaining a lower price point than its predecessors, making it particularly valuable for development environments, document analysis, and enterprise knowledge systems.","created":1744651385,"context_length":1047576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000002","completion":"0.000008","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000005","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000100","completion":"0.00000400"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":["Coding","Legal","Health","Creative Writing"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4.1-mini","name":"OpenAI: GPT-4.1 Mini","description":"GPT-4.1 Mini delivers GPT-4o-level performance with significantly improved efficiency metrics across latency and cost. This mid-sized model maintains the full 1 million token context window of its larger counterpart while achieving impressive benchmark scores: 45.1% on hard instruction evaluations, 35.8% on MultiChallenge, and 84.1% on IFEval. Despite its reduced parameter count, GPT-4.1 Mini demonstrates robust coding capabilities (31.6% on Aider's polyglot diff benchmark) and strong vision understanding. The model reduces latency by nearly half and costs 83% less than GPT-4o, making it ideal for interactive applications with tight performance constraints, high-throughput services, and cost-sensitive enterprise deployments requiring responsive AI capabilities without compromising on quality.","created":1744651381,"context_length":1047576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000004","completion":"0.0000016","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000001","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000020","completion":"0.00000080"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":[]},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4.1-nano","name":"OpenAI: GPT-4.1 Nano","description":"GPT-4.1 Nano is OpenAI's most efficient model optimized for maximum speed and minimum cost in the GPT-4.1 family. Released in April 2025, this compact model maintains the full 1 million token context window while delivering impressive benchmark performance: 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding—outperforming even GPT-4o mini. At just $0.10 per million input tokens and $0.40 per million output tokens, it represents OpenAI's most affordable option. The model excels in latency-critical applications including classification, autocompletion, information extraction, and high-throughput document processing where efficiency and cost considerations are paramount.","created":1744651369,"context_length":1047576,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000001","completion":"0.0000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000005","completion":"0.00000020"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1047576,"max_completion_tokens":32768,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":[]},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o","name":"OpenAI: GPT-4o","description":"GPT-4o (o for omni) is OpenAI's multimodal flagship model combining advanced language processing with sophisticated visual understanding. Released in 2024, this versatile solution processes both text and image inputs to generate high-quality text outputs across diverse applications. While maintaining the same intelligence level as GPT-4 Turbo, it delivers twice the processing speed and 50% greater cost efficiency. The model features significantly enhanced multilingual capabilities and improved visual analysis functions, making it particularly valuable for global applications requiring both text and image processing. As the foundation for OpenAI's consumer and enterprise offerings, GPT-4o provides a balanced combination of quality, speed, and affordability that makes advanced AI capabilities more accessible for diverse use cases.","created":1715558400,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000025","completion":"0.00001","request":"0","image":"0.003613","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000125","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000125","completion":"0.00000500"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-2024-05-13","name":"OpenAI: GPT-4o (2024-05-13)","description":"GPT-4o has established itself as OpenAI's primary multimodal model, integrating text, images, and audio processing in a single unified architecture. Since its initial release, it has received updates that further improve its rapid response capabilities (averaging 320ms), enhance its non-English language performance, and strengthen its visual understanding. Its multimodal design allows for natural, intuitive interactions across input types, making it particularly effective for applications requiring seamless switching between text, image, and audio comprehension. GPT-4o maintains competitive performance while offering significant speed and cost advantages over specialized models.","created":1715558400,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000005","completion":"0.000015","request":"0","image":"0.007225","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000250","completion":"0.00000750"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-2024-08-06","name":"OpenAI: GPT-4o (2024-08-06)","description":"GPT-4o (August 2024 version) enhances OpenAI's flagship multimodal model with sophisticated structured output capabilities. This specific release introduces advanced JSON schema support through the response_format parameter, enabling precise control over output structure and data formatting for applications requiring consistent, well-defined response patterns. The model maintains all core GPT-4o capabilities: processing both text and image inputs while generating high-quality text outputs with double the speed and 50% greater cost efficiency than GPT-4 Turbo. Additional refinements include improved non-English language processing and enhanced visual analysis capabilities. \nReleased in August 2024, this version particularly benefits developers building data-driven applications, APIs, and systems requiring predictable, structured information exchange with minimal post-processing. It represents an important evolution in OpenAI's efforts to make their models more useful for programmatic and enterprise applications requiring strict output formats alongside traditional natural language generation.","created":1722902400,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000025","completion":"0.00001","request":"0","image":"0.003613","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000125","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-2024-11-20","name":"OpenAI: GPT-4o (2024-11-20)","description":"GPT-4o (November 2024 version) enhances OpenAI's flagship multimodal model with specialized improvements in creative writing and document analysis. This updated release delivers more natural, engaging, and contextually tailored writing with improved relevance and readability across various content types. The model demonstrates significantly enhanced document processing capabilities, providing deeper insights and more comprehensive responses when working with uploaded files. While maintaining the core multimodal architecture that processes both text and image inputs, this version incorporates refinements that improve multilingual processing and visual understanding. The model continues to deliver the performance level of GPT-4 Turbo with double the processing speed and 50% greater cost efficiency, making it particularly valuable for content creation, document analysis, and multilingual applications requiring both quality and responsiveness.","created":1732127594,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000025","completion":"0.00001","request":"0","image":"0.003613","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000125","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-mini","name":"OpenAI: GPT-4o-mini","description":"GPT-4o mini (o for omni) is OpenAI's efficient, cost-effective compact model designed for targeted applications. This solution processes both textual and visual inputs while generating text responses (including Structured Outputs). It is particularly well-suited for fine-tuning applications, and outputs from more advanced models like GPT-4o can be condensed to GPT-4o-mini to achieve comparable results with reduced expenses and response times.","created":1721260800,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000015","completion":"0.0000006","request":"0","image":"0.000217","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000075","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000008","completion":"0.00000030"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs","tools","tool_choice"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-mini-2024-07-18","name":"OpenAI: GPT-4o-mini (2024-07-18)","description":"GPT-4o mini (July 2024 version) represents OpenAI's efficiency-focused multimodal model combining strong performance with minimal resource requirements. Released in July 2024, this model processes both text and image inputs while generating text outputs at a significantly reduced cost—60% cheaper than GPT-3.5 Turbo at just $0.15 per million input tokens and $0.60 per million output tokens. Despite its optimized size, the model achieves impressive benchmark results including an 82% score on MMLU and 87% on MGSM for mathematical reasoning, outperforming comparable small models like Gemini 1.5 Flash and Claude 3 Haiku. With a 128K token context window and exceptionally fast processing speed (202 tokens per second), this model is particularly valuable for high-throughput applications, real-time services, and cost-sensitive deployments requiring responsive multimodal capabilities without sacrificing core intelligence.","created":1721260800,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000015","completion":"0.0000006","request":"0","image":"0.007225","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000075","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","web_search_options","seed","logit_bias","logprobs","top_logprobs","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-mini-search-preview","name":"OpenAI: GPT-4o-mini Search Preview","description":"GPT-4o mini Search Preview offers a balanced approach to web-integrated AI, providing GPT-4o mini's efficiency with added real-time search capabilities. Released in March 2025, it delivers faster responses and lower costs than the full GPT-4o Search Preview model while still enabling applications to access current web information. The model uses specialized web search adaptation layers to efficiently process search results, supports the same geographic localization features as its larger counterpart, and is optimized for high-volume applications where speed and cost-effectiveness are priorities. It has been largely replaced by the GPT-4.1 family's mini variants released in April 2025.","created":1741818122,"context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000015","completion":"0.0000006","request":"0.0275","image":"0.000217","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"25","web_search_per_1000_requests_medium":"27","web_search_per_1000_requests_high":"30","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00000015","completion":"0.0000006","web_search_per_1000_requests_low":"25","web_search_per_1000_requests_medium":"27","web_search_per_1000_requests_high":"30"}},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["web_search_options","max_tokens","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-realtime-preview-2024-12-17","name":"GPT-4o Realtime Preview (2024-12-17)","description":"This is a preview release of the GPT-4o Realtime model, capable of responding to audio and text inputs in realtime over WebRTC or a WebSocket interface. Supports a 32,000 token context window and up to 4,096 output tokens per response (knowledge cutoff: Oct 01, 2023).","created":1734393600,"context_length":32000,"architecture":{"modality":"text+audio-\u003etext+audio","input_modalities":["text","audio"],"output_modalities":["text","audio"],"tokenizer":"tiktoken"},"pricing":{"prompt":"0.000005","completion":"0.00002","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"0.00004","audio_output":"0.00008","cached_audio_input":"0.0000025","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","max_output_tokens","modalities","voice","turn_detection","input_audio_format","output_audio_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-realtime-preview-2025-06-03","name":"GPT-4o Realtime Preview (2025-06-03)","description":"This is a preview release of the GPT-4o Realtime model, capable of responding to audio and text inputs in realtime over WebRTC or a WebSocket interface. Supports a 32,000 token context window and up to 4,096 output tokens per response (knowledge cutoff: Oct 01, 2023).","created":1748908800,"context_length":32000,"architecture":{"modality":"text+audio-\u003etext+audio","input_modalities":["text","audio"],"output_modalities":["text","audio"],"tokenizer":"tiktoken"},"pricing":{"prompt":"0.000005","completion":"0.00002","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"0.00004","audio_output":"0.00008","cached_audio_input":"0.0000025","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","max_output_tokens","modalities","voice","turn_detection","input_audio_format","output_audio_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-4o-search-preview","name":"OpenAI: GPT-4o Search Preview","description":"GPT-4o Search Preview represents OpenAI's integration of web search capabilities directly into the GPT-4o model architecture. Released in March 2025, it combines GPT-4o's advanced natural language processing with real-time web access, enabling users to receive up-to-date, factually grounded responses with live citation links. The model features geolocation-based search customization through the \"user_location\" parameter, supports structured JSON outputs, and maintains GPT-4o's multimodal capabilities. It serves as a specialized tool for applications requiring current information and has largely been succeeded by the more capable GPT-4.1 family of models released in April 2025.","created":1741817949,"context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000025","completion":"0.00001","request":"0.035","image":"0.003613","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"30","web_search_per_1000_requests_medium":"35","web_search_per_1000_requests_high":"50","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.0000025","completion":"0.00001","web_search_per_1000_requests_low":"30","web_search_per_1000_requests_medium":"35","web_search_per_1000_requests_high":"50"}},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["web_search_options","max_tokens","response_format","structured_outputs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5","name":"OpenAI: GPT-5","description":"GPT‑5 is OpenAI’s flagship model, tailored for complex, multi-step reasoning and high-fidelity code generation. It combines superior world knowledge with streamlined “agentic” capabilities, enabling it to autonomously tackle tasks with minimal prompting. The model excels in logic-driven scenarios, debugging, and structured workflows—making it ideal for developers and professionals with high demands. With enhanced reasoning and smoother interaction, GPT‑5 offers users both intelligence and autonomy in one package.","created":1754587413,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000125","completion":"0.000010","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000125","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000063","completion":"0.000005","input_cache_read":"0.00000006"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["max_completion_tokens","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5-mini","name":"OpenAI: GPT-5-Mini","description":"GPT‑5 Mini offers a lighter and more cost-effective option, designed for everyday chat and instruction-following use cases. It retains much of GPT‑5’s reasoning power while running efficiently at lower compute costs, striking a balance between performance and affordability. Perfect for startups, pupils, or anyone mindful of resource budget, GPT‑5 Mini makes advanced capabilities more accessible. Despite its compact size, it still packs solid intelligence for routine workflows and interactions.","created":1754587407,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000025","completion":"0.000002","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000013","completion":"0.000001","input_cache_read":"0.00000001"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_completion_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5-nano","name":"OpenAI: GPT-5-Nano","description":"GPT‑5 Nano is optimized for ultra-fast performance and very low latency—ideal for real-time or embedded applications. Stripped down for speed, it excels at simple instruction-following and classification tasks, making it perfect for high-throughput API usage. With minimal overhead and rapid response times, GPT‑5 Nano is a great fit for lightweight environments or devices with limited compute capabilities. Yet, it preserves sufficient reasoning capability to remain practical across many practical scenarios.","created":1754587402,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000005","completion":"0.0000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000005","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000003","completion":"0.00000020","input_cache_read":"0.00000000"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_completion_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.1","name":"OpenAI: GPT-5.1","description":"GPT-5.1, released in November 2025, is the latest flagship large language model from OpenAI, featuring significant upgrades in intelligence, reasoning, and user experience compared to its predecessor, GPT-5.","created":1763060305,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000125","completion":"0.00001","request":"0","image":"0","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000125","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_completion_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.2","name":"OpenAI: GPT-5.2","description":"GPT-5.2 is OpenAI’s flagship frontier model in the GPT‑5 series, designed for the most demanding reasoning, coding, and long‑context workloads. It emphasizes stronger general intelligence, more reliable instruction following, and advanced safety compared with earlier 5.1 releases.","created":1765389775,"context_length":400000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000175","completion":"0.000014","request":"0","image":"0","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000175","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_completion_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.2-chat","name":"OpenAI: GPT-5.2 Chat","description":"GPT-5.2-Chat is a general-purpose large language model in the GPT‑5.2 family, optimized for fast, conversational use across everyday and professional tasks. It is designed to balance strong reasoning and reliability with low latency and efficient token usage.","created":1765389783,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["file","image","text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000175","completion":"0.000014","request":"0","image":"0","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000175","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["max_tokens","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.2-pro","name":"OpenAI: GPT-5.2 Pro","description":"GPT-5.2 Pro is OpenAI's flagship enterprise-grade reasoning model released on December 10, 2025, designed for sophisticated professional workflows requiring advanced problem-solving capabilities.","created":1765389780,"context_length":400000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000021","completion":"0.000168","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":400000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["include_reasoning","max_tokens","reasoning","response_format","seed","structured_outputs","tool_choice","tools"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.3-chat","name":"OpenAI: GPT-5.3 Chat","description":"GPT-5.3-chat (also referred to as GPT-5.3 Instant or gpt-5.3-chat-latest) is OpenAI's latest fast-response ChatGPT model, optimized for smoother, more helpful everyday conversations with improved accuracy, reduced hallucinations, and fewer unnecessary refusals.","created":1772564061,"context_length":128000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000175","completion":"0.000014","request":"","image":"","web_search":"0.1","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.000000175","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":16384,"is_moderated":true},"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_completion_tokens","presence_penalty","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.4","name":"OpenAI: GPT-5.4","description":"GPT-5.4 is the newest flagship model from OpenAI, merging the Codex and GPT product lines into one unified system. It supports a context window exceeding 1 million tokens — with up to 922K tokens for input and 128K for output — and accepts both text and image inputs. This allows it to handle high-context reasoning, code generation, and multimodal analysis all within a single workflow. The model shows notable improvements in areas such as coding, document comprehension, tool integration, and instruction adherence. It is built to serve as a reliable default for both general-purpose and software engineering tasks, capable of producing production-ready code, aggregating insights from diverse sources, and carrying out intricate multi-step processes with reduced iterations and improved token efficiency.","created":1772734352,"context_length":1050000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000025","completion":"0.000015","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000025","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"0.000005","completion_more_than_272k_input":"0.0000225","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.0000025","completion":"0.000015","prompt_more_than_272k_input":"0.000005","completion_more_than_272k_input":"0.0000225"}},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_completion_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-5.4-pro","name":"OpenAI: GPT-5.4 Pro","description":"GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K output) with support for text and image inputs. Optimized for step-by-step reasoning, instruction following, and accuracy, GPT-5.4 Pro excels at agentic coding, long-context workflows, and multi-step problem solving.","created":1772734366,"context_length":1050000,"architecture":{"modality":"text+image+file-\u003etext","input_modalities":["text","image","file"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00003","completion":"0.00018","request":"","image":"","web_search":"0.01","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"0.00006","completion_more_than_272k_input":"0.00027","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.00003","completion":"0.00018","prompt_more_than_272k_input":"0.00006","completion_more_than_272k_input":"0.00027"}},"top_provider":{"context_length":1050000,"max_completion_tokens":128000,"is_moderated":true},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","presence_penalty","reasoning","response_format","seed","stop","structured_outputs","tool_choice","tools","top_logprobs"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-oss-120b","name":"OpenAI: GPT OSS 120B","description":"GPT-OSS-120B is OpenAI’s most powerful open-weight model, designed for production-grade reasoning tasks with 117 billion parameters and a Mixture-of-Experts (MoE) architecture that activates 5.1 billion parameters per token. It uses a 36-layer transformer and supports long contexts (up to 128K tokens), excelling in areas like coding, math competitions, and health applications. The model offers strong tool-use capabilities (e.g., browsing, function calling) and performs on par with o4-mini in core reasoning benchmarks.","created":1754414231,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.00000015","completion":"0.00000075","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","logit_bias","logprobs","include_reasoning","reasoning_effort","reasoning","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-oss-20b","name":"OpenAI: GPT OSS 20B","description":"GPT-OSS-20B is a lightweight, efficient open-weight model optimized for low-latency, on-device, and edge deployments. With 21 billion parameters and a similar MoE structure as its larger sibling, it supports 128K token contexts while running comfortably on 16GB consumer hardware. It matches or beats o3-mini on standard tasks and excels in coding, math, and health-related reasoning. ","created":1754414229,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.0000001","completion":"0.0000005","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":32768,"is_moderated":false},"supported_parameters":["frequency_penalty","logit_bias","logprobs","include_reasoning","reasoning_effort","reasoning","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-realtime","name":"GPT Realtime","description":"This is OpenAI's general-availability realtime model, capable of responding to audio and text inputs in realtime over WebRTC, WebSocket, or SIP connections. Supports a 32,000 token context window and up to 4,096 output tokens per response (knowledge cutoff: Oct 01, 2023). Also supports image input (text/audio/image → text/audio).","created":1756339200,"context_length":32000,"architecture":{"modality":"text+audio+image-\u003etext+audio","input_modalities":["text","audio","image"],"output_modalities":["text","audio"],"tokenizer":"tiktoken"},"pricing":{"prompt":"0.000004","completion":"0.000016","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000005","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"0.000032","audio_output":"0.000064","cached_audio_input":"0.0000005","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","max_output_tokens","modalities","voice","turn_detection","input_audio_format","output_audio_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-realtime-1.5","name":"OpenAI: GPT Realtime 1.5","description":"GPT Realtime 1.5 is OpenAI's flagship low-latency, speech-to-speech model optimized for live conversational systems, voice agents, and customer support, using persistent streaming sessions via the Realtime API (WebRTC/WebSocket).","created":1771829768,"context_length":32000,"architecture":{"modality":"text+audio+image-\u003etext+audio","input_modalities":["text","audio","image"],"output_modalities":["text","audio"],"tokenizer":"tiktoken"},"pricing":{"prompt":"0.000004","completion":"0.000016","request":"","image":"0.000005","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.0000004","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"0.000032","audio_output":"0.000064","cached_audio_input":"0.0000004","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","max_output_tokens","modalities","voice","turn_detection","input_audio_format","output_audio_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/gpt-realtime-mini","name":"GPT Realtime Mini","description":"A cost-efficient version of GPT Realtime, capable of responding to audio and text inputs in realtime over WebRTC, WebSocket, or SIP connections. Supports a 32,000 token context window and up to 4,096 output tokens per response (knowledge cutoff: Oct 01, 2023). Also supports image input (text/audio/image → text/audio).","created":1759708800,"context_length":32000,"architecture":{"modality":"text+audio+image-\u003etext+audio","input_modalities":["text","audio","image"],"output_modalities":["text","audio"],"tokenizer":"tiktoken"},"pricing":{"prompt":"0.0000006","completion":"0.0000024","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"0.00000006","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"0.00001","audio_output":"0.00002","cached_audio_input":"0.0000003","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32000,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["temperature","max_output_tokens","modalities","voice","turn_detection","input_audio_format","output_audio_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/o1","name":"OpenAI: o1","description":"OpenAI's o1 model family is its most advanced yet, built to reason more deeply through extended thought and large-scale reinforcement learning. Optimized for STEM tasks, o1 consistently achieves PhD-level accuracy on benchmarks in physics, chemistry, and biology.","created":1734459999,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"GPT"},"pricing":{"prompt":"0.000015","completion":"0.00006","request":"0","image":"0.021675","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000075","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000750","completion":"0.00003000"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"supported_parameters":["tools","tool_choice","seed","max_completion_tokens","response_format","structured_outputs"],"models_extra_params":{"category":["Math","Legal","Health"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/o3","name":"OpenAI: o3","description":"OpenAI o3 is an advanced reasoning model, setting new standards in coding, math, science, and visual understanding. It outperforms on benchmarks like Codeforces, SWE-bench (without custom scaffolding), and MMMU. Ideal for complex, nuanced problems, o3 excels at visual tasks and reduces major errors by 20% compared to o1 in real-world evaluations. It’s especially strong in programming, business, and creative ideation, with early users praising its ability to generate and critically assess novel ideas in biology, math, and engineering.","created":1744823457,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.000002","completion":"0.000008","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.0000005","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000100","completion":"0.00000400"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"supported_parameters":["tools","tool_choice","seed","reasoning","max_completion_tokens","response_format","structured_outputs"],"models_extra_params":{"category":["Coding","Math","Legal","Health","Creative Writing"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/o3-mini","name":"OpenAI: o3 Mini","description":"OpenAI o3-mini (January 2025 version) is a specialized reasoning model optimized for STEM domains while maintaining cost efficiency. This model features adjustable reasoning capabilities and demonstrates exceptional performance in science, mathematics, and coding tasks, with expert evaluators preferring its responses 56% of the time over previous versions and noting a 39% reduction in major errors on complex questions. Even at medium reasoning settings, o3-mini matches the larger o1 model's performance on challenging evaluations like AIME and GPQA while maintaining significantly lower latency and cost profiles. The model supports developer-focused features including function calling, structured outputs, and streaming capabilities, though it lacks vision processing functionality. This text-only design focuses computational resources on reasoning quality, making it particularly valuable for technical applications requiring accurate problem-solving without multimodal requirements.","created":1738351721,"context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000011","completion":"0.0000044","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000055","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000055","completion":"0.00000220"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"supported_parameters":["tools","tool_choice","seed","reasoning","max_completion_tokens","response_format","structured_outputs"],"models_extra_params":{"category":["Coding","Math"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/o3-mini-high","name":"OpenAI: o3 Mini High","description":"OpenAI o3-mini-high is the enhanced reasoning variant of o3-mini with reasoning_effort permanently set = high for maximum problem-solving capabilities. Released in January 2025, this specialized configuration prioritizes thorough analysis and step-by-step reasoning over processing speed, making it ideal for complex STEM problems requiring in-depth analysis. While maintaining the same core architecture as standard o3-mini, this variant allocates additional computational resources to reasoning processes, resulting in higher accuracy on challenging problems in science, mathematics, and coding domains. The model maintains all developer-focused features of the standard version, including function calling, structured outputs, and streaming capabilities, while focusing its additional reasoning capacity on problem complexity rather than multimodal processing. This configuration is especially valuable for applications where solution quality and reasoning thoroughness outweigh processing speed considerations, such as educational tools, research assistants, and technical problem-solving systems requiring maximum accuracy in areas demanding rigorous analytical thinking.","created":1738351721,"context_length":200000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000011","completion":"0.0000044","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000055","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"supported_parameters":["tools","tool_choice","seed","reasoning","max_completion_tokens","response_format","structured_outputs"],"models_extra_params":{"category":["Coding"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/o4-mini","name":"OpenAI: o4 Mini","description":"OpenAI o4-mini combines exceptional efficiency with sophisticated reasoning capabilities in a compact model from the o-series. Released in April 2025, this streamlined solution maintains powerful multimodal and agentic capabilities while significantly reducing computational requirements. The model achieves remarkable benchmark performance, including near-perfect scores on AIME with Python (99.5%) and competitive results on SWE-bench, outperforming its predecessor o3-mini and approaching the larger o3 model in several domains. Despite its optimized size, o4-mini excels in STEM tasks, visual problem-solving (MathVista, MMMU), and code editing applications. Its refined reinforcement learning architecture enables sophisticated capabilities like tool chaining, structured output generation, and multi-step task completion with minimal latency—often resolving complex problems in under a minute, making it ideal for high-throughput scenarios prioritizing both speed and quality.","created":1744820942,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000011","completion":"0.0000044","request":"0","image":"0.0008415","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000275","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000055","completion":"0.00000220"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"supported_parameters":["tools","tool_choice","seed","reasoning","max_completion_tokens","response_format","structured_outputs"],"models_extra_params":{"category":["Coding","Math"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/o4-mini-high","name":"OpenAI: o4 Mini High","description":"OpenAI o4-mini-high is the enhanced reasoning variant of o4-mini with reasoning_effort permanently set to \"high\" for maximum problem-solving capabilities. Released in April 2025, this configuration prioritizes thorough analysis and step-by-step reasoning over processing speed, making it ideal for complex STEM problems, mathematical proofs, and detailed code generation tasks. While maintaining the same model architecture as standard o4-mini, this variant allocates additional computational resources to reasoning processes, resulting in higher accuracy on complex problems with a corresponding increase in token usage and processing time. The model demonstrates exceptional performance on sophisticated benchmarks including AIME and achieves impressive results on visual reasoning tasks. This specialized configuration is particularly valuable for applications where solution quality and reasoning thoroughness outweigh speed considerations, such as educational tools, research assistance, and complex problem-solving systems requiring maximum accuracy.","created":1744824212,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text","file"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000011","completion":"0.0000044","request":"0","image":"0.0008415","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.000000275","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":200000,"max_completion_tokens":100000,"is_moderated":true},"supported_parameters":["tools","tool_choice","seed","reasoning","max_completion_tokens","response_format","structured_outputs"],"models_extra_params":{"category":["Coding"]},"is_active":true,"creator":"OpenAI"},{"id":"openai/omni-moderation-latest","name":"Omni Moderation (Latest)","description":"Omni Moderation (Latest) is a state-of-the-art moderation model designed for real-time classification of unsafe, offensive, or policy-violating content. It supports text moderation across multiple languages and content types, including user messages, document uploads, code, and social media content. The model is optimized for low false positives and fast ruling in both API and streaming workflows.","created":1739403200,"context_length":4096,"architecture":{"modality":"text-\u003eclassification","input_modalities":["text"],"output_modalities":["label"],"tokenizer":"OmniTokenizer","instruct_type":"none"},"pricing":{"prompt":"0","completion":"0","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":4096,"max_completion_tokens":1,"is_moderated":false},"per_request_limits":{},"supported_parameters":["input","image"],"models_extra_params":{"category":[]},"is_active":true,"creator":"OpenAI"},{"id":"openai/sora-2","name":"OpenAI: Sora 2","description":"Sora-2 is OpenAI's state-of-the-art AI video and audio generation model, released in September 2025, and marks a major leap in controllable, realistic, and physically accurate video synthesis. It transforms text prompts and images into cinematic HD videos with synchronized audio, realistic motion, and advanced world simulation—enabling users to generate scenes with perfect lip-sync, sound effects, and genuine continuity.","created":1759215188,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":""},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720x1280":"0.10","1280x720":"0.10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"unit":"sec","videoCost":[{"720x1280":"0.10","1280x720":"0.10","Seconds":"1","x-key":"Seconds"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["prompt","size","seconds"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/sora-2-pro","name":"OpenAI: Sora 2 Pro","description":"Sora-2 Pro is OpenAI’s premium offering for its latest AI-powered video and audio generation system, Sora 2, launched in October 2025. Sora-2 Pro is designed for professional creators and advanced users, giving them enhanced creative controls, access to longer and higher-quality generative video clips, and advanced editing tools.","created":1759304819,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":""},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720x1280":"0.30","1280x720":"0.30","1024x1792":"0.50","1792x1024":"0.50"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"unit":"sec","videoCost":[{"720x1280":"0.30","1280x720":"0.30","1024x1792":"0.50","1792x1024":"0.50","Seconds":"1","x-key":"Seconds"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["prompt","size","seconds"],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/text-embedding-3-large","name":"OpenAI: text-embedding-3-large","description":"text-embedding-3-large is OpenAI's high-capacity embedding model for text, aimed at delivering the strongest semantic encoding. It produces 3072-dimensional embeddings and achieves the highest accuracy among OpenAI's embedding models (about 64.6% on the MTEB benchmark). Because of its size, it is slower and more costly – around $0.13 per 1K tokens – compared to smaller models. This makes it well-suited to use cases where embedding quality is critical, such as complex document search or analytics. In summary, text-embedding-3-large outperforms both text-embedding-3-small and the older ada-002 model in accuracy at the expense of higher computational cost and latency.","created":1706140800,"context_length":8192,"architecture":{"modality":"text-\u003evector","input_modalities":["text"],"output_modalities":["vector"],"tokenizer":"cl100k_base"},"pricing":{"prompt":"0.00000013","completion":"0","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"0","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000007","completion":"0"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":8192,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/text-embedding-3-small","name":"OpenAI: text-embedding-3-small","description":"text-embedding-3-small is a compact OpenAI text embedding model designed for general-purpose semantic tasks (search, classification, similarity) with high throughput. It produces 1536-dimensional vectors and is optimized for efficiency. Its cost is very low (about $0.02 per 1K tokens), making it far cheaper than larger models, though its accuracy is correspondingly slightly lower (MTEB benchmark ~62.3%). The model is ideal for cost-sensitive or high-throughput scenarios where good semantic embeddings are needed but ultra-high accuracy is not critical. In practice, it still performs well on search and retrieval tasks, but trades off some accuracy and richness of representation relative to the larger embedding models.","created":1706140800,"context_length":8192,"architecture":{"modality":"text-\u003evector","input_modalities":["text"],"output_modalities":["vector"],"tokenizer":"cl100k_base"},"pricing":{"prompt":"0.00000002","completion":"0","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"0","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000001","completion":"0"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":8192,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"openai/text-embedding-ada-002","name":"OpenAI: text-embedding-ada-002","description":"text-embedding-ada-002 (often called ada v2) is an earlier-generation OpenAI embedding model for general semantic tasks like text and code search, clustering, and classification. It produces 1536-dimensional embeddings and was initially state-of-the-art for tasks such as text search, code search, and sentence similarity. Its performance on benchmarks is moderate (around 61% on MTEB), and it costs about $0.10 per 1K tokens. Ada-002 is suited to general retrieval and similarity tasks but is generally considered a legacy model; newer v3 models (3-small and 3-large) have mostly superseded it for better efficiency or accuracy. In fact, documentation notes that ada-002 is primarily kept for legacy use cases, while most new applications use the 3-small or 3-large models for improved performance.","created":1706140800,"context_length":8192,"architecture":{"modality":"text-\u003evector","input_modalities":["text"],"output_modalities":["vector"],"tokenizer":"cl100k_base"},"pricing":{"prompt":"0.0000001","completion":"0","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"0","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{"prompt":"0.00000005","completion":"0"},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":8192,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":[],"models_extra_params":{"category":null},"is_active":true,"creator":"OpenAI"},{"id":"perplexity/sonar","name":"Perplexity: Sonar","description":"Sonar is a lightweight, cost-effective non-reasoning search model built for delivering fast, grounded answers through real-time web search. With a context length of 128k, it is optimized for quick lookups and straightforward Q\u0026A tasks, making it ideal for users who need immediate, reliable information without extensive processing. Sonar provides answers backed by citations, ensuring transparency while maintaining speed and low cost. Typical use cases include summarizing books, TV shows, or movies, looking up definitions or quick facts, and browsing up-to-date content across categories like news, sports, health, and finance. Its efficiency and accuracy make it well-suited for everyday information needs.","created":1738013808,"context_length":127072,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.000001","completion":"0.000001","request":"0.005","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"5","web_search_per_1000_requests_medium":"8","web_search_per_1000_requests_high":"12","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000001","completion":"0.000001","web_search_per_1000_requests_low":"5","web_search_per_1000_requests_high":"12","web_search_per_1000_requests_medium":"8"}},"top_provider":{"context_length":127072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","web_search_options","top_k","frequency_penalty","presence_penalty"],"models_extra_params":{"category":[]},"is_active":true,"creator":"Perplexity"},{"id":"perplexity/sonar-deep-research","name":"Perplexity: Sonar Deep Research","description":"Sonar Deep Research is a powerful deep research and reasoning model designed for conducting thorough investigations and generating expert-level insights. With a context length of 128k, it excels at synthesizing information from hundreds of sources to produce highly detailed, structured reports. Ideal for exhaustive research tasks, Sonar Deep Research offers in-depth subject analysis and is capable of crafting content with precision and authority. Common use cases include writing white papers for industry thought leadership, developing comprehensive go-to-market (GTM) strategies, and creating advanced educational material for universities or training programs. It's the ideal tool for users who need depth, clarity, and rigor in their research output.","created":1741311246,"context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.000002","completion":"0.000008","request":"0.005","image":"0","web_search":"0.005","citation":"0.000002","reasoning":"0.000003","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000002","citation":"0.000002","reasoning":"0.000003","completion":"0.000008","web_search_per_1000_requests":"5"}},"top_provider":{"context_length":128000,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","top_k","frequency_penalty","presence_penalty"],"models_extra_params":{"category":null},"is_active":true,"creator":"Perplexity"},{"id":"perplexity/sonar-pro","name":"Perplexity: Sonar Pro","description":"Sonar Pro is an advanced non-reasoning search model designed for handling complex, multi-step queries that demand deeper content understanding and high citation accuracy. With a large context length of 200k, it excels at retrieving and synthesizing information from extensive text sources. Sonar Pro delivers in-depth answers supported by twice as many citations as the standard Sonar model, thanks to its enhanced information retrieval architecture. Optimized for multi-step tasks, it’s particularly useful for applications such as academic literature reviews, competitor and industry research, and generating curated content like restaurant catalogs with reviews. Its ability to surface comprehensive, well-cited responses makes it a powerful tool for users needing detailed and reliable information.","created":1741312423,"context_length":200000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.000003","completion":"0.000015","request":"0.005","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"6","web_search_per_1000_requests_medium":"10","web_search_per_1000_requests_high":"14","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000003","completion":"0.000015","web_search_per_1000_requests_low":"6","web_search_per_1000_requests_high":"14","web_search_per_1000_requests_medium":"10"}},"top_provider":{"context_length":200000,"max_completion_tokens":8000,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","web_search_options","top_k","frequency_penalty","presence_penalty"],"models_extra_params":{"category":null},"is_active":true,"creator":"Perplexity"},{"id":"perplexity/sonar-reasoning-pro","name":"Perplexity: Sonar Reasoning Pro","description":"Sonar Reasoning Pro is a high-performance reasoning model designed to tackle complex topics that require advanced multi-step thinking and deep content understanding. With a context length of 128k, it combines enhanced Chain-of-Thought (CoT) reasoning with a powerful information retrieval architecture to deliver well-structured, thoughtful responses. It provides twice as many citations on average compared to the standard Sonar Reasoning model, ensuring greater transparency and grounding. Ideal for tasks like competitive product analysis, exploring intricate scientific subjects, or creating detailed travel plans, Sonar Reasoning Pro excels in scenarios that demand both reasoning and reliable, well-sourced information.","created":1741313308,"context_length":128000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Other","instruct_type":"deepseek-r1"},"pricing":{"prompt":"0.000002","completion":"0.000008","request":"0.005","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"6","web_search_per_1000_requests_medium":"10","web_search_per_1000_requests_high":"14","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000002","completion":"0.000008","web_search_per_1000_requests_low":"6","web_search_per_1000_requests_high":"14","web_search_per_1000_requests_medium":"10"}},"top_provider":{"context_length":128000,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","web_search_options","top_k","frequency_penalty","presence_penalty"],"models_extra_params":{"category":null},"is_active":true,"creator":"Perplexity"},{"id":"pika/pika-v2-2","name":"Pika: Pika V2.2","description":"Pika 2.2 (early 2025) is a versatile video model supporting text-to-video and image-to-video. It generates dynamic 10-second clips at 1080p with improved realism and prompt responsiveness. New features include keyframe transitions (Pikaframes) that smoothly interpolate between start/end images, and various AI-powered effects (Pikaffects, Pikadditions, etc.) for creative control. Pika 2.2 delivers cinematic motion and camera effects, though very complex prompts can still produce occasional artifacts. Overall, it represents a major upgrade with higher resolution and fluidity over Pika 2.1, aimed at content creators and social media applications.","created":1739403112,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"PikaTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.21","1080p":"0.45","default":"0.21","length":"5"},{"720P":"0.6","1080p":"1.02","default":"0.6","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.21","1080p":"0.45","default":"0.21","length":"5 sec","x-key":"length"},{"720p":"0.6","1080p":"1.02","default":"0.6","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","resolution","length","seed"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Pika"},{"id":"pollo/pollo-v1-6","name":"Pollo: Pollo V1.6","description":"Pollo 1.6 is Pollo AI's flagship video model, optimized for efficiency. Described as \"better, cheaper, and faster,\" targeting high-quality cinematic outputs, it supports both image-to-video and text-to-video inputs. In practice, 1.6 delivers more realistic and creative animations than previous Pollo versions. It emphasizes speed and cost-effectiveness, making it accessible for a wide range of users. Overall, Pollo 1.6 aims to balance visual fidelity with fast generation, serving as a versatile general-purpose model.","created":1739402450,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"PolloTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"480P":"0.18","720P":"0.24","1080p":"0.54","default":"0.18","length":"5"},{"480P":"0.3","720P":"0.42","1080p":"0.72","default":"0.3","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"480p":"0.18","720p":"0.24","1080p":"0.54","default":"0.18","length":"5 sec","x-key":"length"},{"480p":"0.3","720p":"0.42","1080p":"0.72","default":"0.3","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","resolution","length","seed","aspectRatio"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Pollo"},{"id":"qwen/qwen2.5-vl-32b-instruct","name":"Qwen: Qwen2.5 VL 32B Instruct","description":"Qwen2.5-VL-32B-Instruct is a state-of-the-art multimodal vision-language model from Alibaba’s Qwen team, designed to handle complex visual and textual tasks with 32 billion parameters. It supports long context lengths (up to 64K tokens for video) and features a Transformer-based architecture with enhanced ViT, dynamic resolution training, and advanced positional encoding for both images and videos. The model excels in object recognition, document parsing, structured data extraction, and long video understanding, including precise event localization. It also acts as a visual agent, capable of software control, tool integration, and autonomous multi-step task execution. Reinforced for strong mathematical reasoning, it delivers detailed, human-aligned responses across diverse use cases.","created":1742839838,"context_length":16384,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Qwen"},"pricing":{"prompt":"0.0000002","completion":"0.0000006","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":16384,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","logit_bias","logprobs","max_tokens","min_p","presence_penalty","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Qwen"},{"id":"qwen/qwen3-235b-a22b","name":"Qwen: Qwen3 235B A22B","description":"Qwen's flagship MoE model that achieves competitive results against top models like DeepSeek-R1, o1, and Gemini-2.5-Pro with only 22B of its 235B parameters activated per token. Built on 36 trillion training tokens across 119 languages, it features dual thinking modes and extended 131K context handling for complex reasoning tasks.","created":1745875757,"context_length":40960,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.00000013","completion":"0.0000006","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":40960,"max_completion_tokens":40960,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","reasoning","include_reasoning","structured_outputs","response_format","stop","frequency_penalty","presence_penalty","seed","top_k","min_p","repetition_penalty","logprobs","top_logprobs","logit_bias"],"models_extra_params":{"category":["Health"]},"is_active":true,"creator":"Qwen"},{"id":"qwen/qwen3-32b","name":"Qwen: Qwen3 32B","description":"Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ","created":1745875945,"context_length":40960,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3","instruct_type":"qwen3"},"pricing":{"prompt":"0.00000029","completion":"0.00000059","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":40960,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","reasoning","include_reasoning","stop","frequency_penalty","presence_penalty","response_format","top_logprobs","logprobs","logit_bias","seed","tools","tool_choice","structured_outputs","top_k","min_p","repetition_penalty"],"models_extra_params":{"category":null},"is_active":true,"creator":"Qwen"},{"id":"qwen/qwen3-coder","name":"Qwen: Qwen3 Coder ","description":"Qwen3-Coder-480B-A35B-Instruct is a powerful code generation model from the Qwen3 series, designed for advanced agentic tasks such as coding and browser-based interaction. It matches the performance of models like Claude Sonnet. Built as a Mixture-of-Experts (MoE) model, it has 480 billion total parameters, with 35 billion active per forward pass. The model supports long-context understanding with a 256K token window (expandable to 1M using Yarn) and handles diverse coding tasks including function calling and tool integration. It also introduces a specialized function call format tailored for agentic coding on popular IDEs.","created":1753230546,"context_length":262144,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Qwen3"},"pricing":{"prompt":"0.0000004","completion":"0.0000016","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":262144,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","stop","frequency_penalty","presence_penalty","repetition_penalty","response_format","top_k","seed","min_p","structured_outputs","logit_bias","logprobs","top_logprobs"],"models_extra_params":{"category":["Coding"]},"is_active":true,"creator":"Qwen"},{"id":"runway/runway-gen-3-turbo","name":"Runway: Runway Gen-3 Turbo","description":"Runway Gen-3 was Runway's early video model (circa 2023) for professional animation tasks. It can animate still images or generate text-to-video clips up to 10 seconds long. Gen-3 outputs were typically 720p (with upscaling options) and aimed for cinematic style. It emphasized creative control, allowing prompts to specify actions, camera moves, and visual styles. Users praised its consistency in characters and environments, though it could struggle with very complex scenes. In practice, Gen-3 served as a reliable workhorse for creative industries, producing high-quality, animated content and setting the stage for its successors.","created":1739403102,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"RunwayTokenizer","instruct_type":"none"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.3","default":"0.3","length":"5"},{"720P":"0.6","default":"0.6","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.3","default":"0.3","length":"5 sec","x-key":"length"},{"720p":"0.6","default":"0.6","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","length","seed","aspectRatio"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Runway"},{"id":"runway/runway-gen-4-turbo","name":"Runway: Runway Gen-4 Turbo","description":"Runway Gen-4 Turbo is optimized for speed and cost. By default it outputs 720p video but supports 4K upscaling for higher quality. Turbo preserves much of Gen-4's motion realism—handling dynamic lighting and depth convincingly—while sacrificing some fine details in very complex scenes. This makes it ideal for rapid prototyping and scenarios where quick turnaround is needed. Overall, Gen-4 Turbo delivers visually impressive 720p clips with fluid action and is faster (and cheaper) than the original Gen-4, at the cost of slight quality tradeoffs.","created":1739403101,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"RunwayTokenizer","instruct_type":"none"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.3","default":"0.3","length":"5"},{"720P":"0.6","default":"0.6","length":"10"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.3","default":"0.3","length":"5 sec","x-key":"length"},{"720p":"0.6","default":"0.6","length":"10 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","length","seed","aspectRatio"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Runway"},{"id":"sarvam/bulbul:v2","name":"Sarvam: Bulbul V2","description":"Sarvam: Bulbul V2 is a text-to-speech (TTS) model from Sarvam AI, delivering real-time, natural-sounding speech in 11 Indian languages at a fraction of global costs.\nIt builds on prior versions with key enhancements including improved audio quality, 30+ speaker voices (expanded from six distinct voices in v1 for diverse use cases like professional or conversational tones), and adjustable speech speed from 0.5x to 2.0x for customized delivery.","created":1746597903,"context_length":1500,"architecture":{"modality":"text-\u003eaudio","input_modalities":["text"],"output_modalities":["audio"],"tokenizer":""},"pricing":{"prompt":"0.000016","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":1500,"max_completion_tokens":1500,"is_moderated":true},"supported_parameters":["target_language_code","speaker","pitch","pace","loudness","speech_sample_rate","enable_preprocessing","output_audio_codec","temperature","enable_cached_responses"],"models_extra_params":{"category":null},"is_active":true,"creator":"Sarvam"},{"id":"sarvam/saaras:v3","name":"Sarvam: Saaras V3","description":"Saaras V3 is Sarvam AI's next-generation automatic speech recognition (ASR) model engineered for Indian languages and real-world speech conditions.\nSaaras V3 supports 23 languages (22 official Indian languages plus English) within a unified multilingual model. The model achieves approximately 19% Word Error Rate (WER) on the IndicVoices benchmark, improving significantly from its predecessor Saaras V2.5 which had ~22% WER. On the 10 most popular languages in the IndicVoices dataset, it achieves 19.3% WER, and performance advantages widen for lower-resource Indian languages","created":1770794385,"context_length":2046,"architecture":{"modality":"audio-\u003etext","input_modalities":["audio"],"output_modalities":["text"],"tokenizer":""},"pricing":{"prompt":"","completion":"","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"0.000088","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"audio_input":"0.000088"}},"top_provider":{"context_length":2046,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["input_audio_codec","mode","language_code"],"models_extra_params":{"category":null},"is_active":true,"creator":"Sarvam"},{"id":"sarvam/sarvam-105b","name":"Sarvam: Sarvam 105B","description":"Sarvam 105B is Sarvam AI's flagship open-source Mixture-of-Experts (MoE) large language model (LLM), trained from scratch as India's first competitive 105-billion-parameter model. It delivers state-of-the-art performance across Indian languages and excels in enterprise-grade applications, with strong capabilities in multi-step reasoning, mathematics, coding, knowledge retrieval, and instruction-following.","created":1771399185,"context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["presence_penalty","tool_choice","tools","stop","temperature","top_p","reasoning_effort","n","reasoning","max_tokens","frequency_penalty","seed","include_reasoning"],"models_extra_params":{"category":null},"is_active":true,"creator":"Sarvam"},{"id":"sarvam/sarvam-30b","name":"Sarvam: Sarvam 30B","description":"Sarvam-30B is Sarvam AI's efficient, production-ready Mixture-of-Experts (MoE) large language model, released on February 18, 2026 under the Apache 2.0 open-source license. It's optimized for real-time deployment and multilingual understanding across India's 22 scheduled languages, while maintaining competitive global performance.","created":1771399185,"context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":""},"pricing":{"prompt":"0","completion":"0","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":128000,"is_moderated":false},"supported_parameters":["presence_penalty","tool_choice","tools","stop","temperature","top_p","reasoning_effort","n","reasoning","max_tokens","frequency_penalty","seed","include_reasoning"],"models_extra_params":{"category":null},"is_active":true,"creator":"Sarvam"},{"id":"vidu/vidu-q1","name":"Vidu: Vidu Q1","description":"Vidu Q1 (2025) is the newest model from Vidu AI (ByteDance) for text/image to video. It produces 1080p clips with outstanding realism; reviewers call its outputs \"arguably the most lifelike\" seen to date. The model features very sharp visuals, smooth transitions, and detailed scene composition. It excels at cinematic lighting and keeps characters consistent, even when following complex emotional or action-packed prompts. Built on an intuitive interface, Vidu Q1 balances ease of use with high-end results. In short, it specializes in generating high-fidelity videos with strong narrative coherence and rich visual effects.","created":1739403113,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VIDUTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"1080p":"0.6","default":"0.6","length":"5"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"1080p":"0.6","default":"0.6","length":"5 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","resolution","length","seed"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Vidu"},{"id":"vidu/vidu-v2-0","name":"Vidu: Vidu V2.0","description":"Vidu 2.0 is an improved model (early 2024) that focuses on image-to-video generation. It adds support for more sophisticated animations and produces higher-quality output than Vidu 1.x. The model is better at interpreting detailed visual cues, allowing more intricate camera moves and character actions. It typically outputs at HD resolution. Compared to its predecessor, Vidu 2.0 yields smoother, more polished videos with richer detail. It's commonly used for bringing concept art or still images to life in an animated clip.","created":1739403114,"context_length":8192,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":"VIDUTokenizer","instruct_type":"instruct"},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"720P":"0.18","1080p":"0.48","default":"0.18","length":"4"},{"720P":"0.48","default":"0.48","length":"8"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.18","1080p":"0.48","default":"0.18","length":"4 sec","x-key":"length"},{"720p":"0.48","default":"0.48","length":"8 sec","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":false},"supported_parameters":["image","prompt","resolution","length","seed"],"models_extra_params":{"category":["Video Generation"]},"is_active":true,"creator":"Vidu"},{"id":"wanx/wan-v2-6","name":"Alibaba: Wan 2.6 ","description":"Wan 2.6 is a state-of-the-art, open-source multimodal AI video generation model developed by Alibaba Cloud (released around December 2025). It is designed to create high-fidelity, 1080p cinematic videos up to 15 seconds long from text, images, or reference videos.","created":1765823400,"context_length":400000,"architecture":{"modality":"text+image-\u003evideo","input_modalities":["text","image"],"output_modalities":["video"],"tokenizer":""},"pricing":{"prompt":"","completion":"","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":[{"480P":"0.12","720P":"0.18","1080p":"0.3","default":"0.18","length":"2"},{"480P":"0.18","720P":"0.3","1080p":"0.48","default":"0.3","length":"3"},{"480P":"0.18","720P":"0.36","1080p":"0.66","default":"0.36","length":"4"},{"480P":"0.24","720P":"0.42","1080p":"0.72","default":"0.42","length":"5"},{"480P":"0.3","720P":"0.54","1080p":"0.9","default":"0.54","length":"6"},{"480P":"0.36","720P":"0.66","1080p":"1.08","default":"0.66","length":"7"},{"480P":"0.36","720P":"0.78","1080p":"1.26","default":"0.78","length":"8"},{"480P":"0.42","720P":"0.84","1080p":"1.44","default":"0.84","length":"9"},{"480P":"0.42","720P":"0.72","1080p":"1.44","default":"0.72","length":"10"},{"480P":"0.48","720P":"1.02","1080p":"1.74","default":"1.02","length":"11"},{"480P":"0.54","720P":"1.14","1080p":"1.92","default":"1.14","length":"12"},{"480P":"0.6","720P":"1.26","1080p":"2.1","default":"1.26","length":"13"},{"480P":"0.66","720P":"1.32","1080p":"2.22","default":"1.32","length":"14"},{"480P":"0.66","720P":"1.44","1080p":"2.16","default":"1.44","length":"15"}],"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"videoCost":[{"720p":"0.18","1080p":"0.3","default":"0.18","length":"2","x-key":"length"},{"720p":"0.3","1080p":"0.48","default":"0.3","length":"3","x-key":"length"},{"720p":"0.36","1080p":"0.66","default":"0.36","length":"4","x-key":"length"},{"720p":"0.42","1080p":"0.72","default":"0.42","length":"5","x-key":"length"},{"720p":"0.54","1080p":"0.9","default":"0.54","length":"6","x-key":"length"},{"720p":"0.66","1080p":"1.08","default":"0.66","length":"7","x-key":"length"},{"720p":"0.78","1080p":"1.26","default":"0.78","length":"8","x-key":"length"},{"720p":"0.84","1080p":"1.44","default":"0.84","length":"9","x-key":"length"},{"720p":"0.72","1080p":"1.44","default":"0.72","length":"10","x-key":"length"},{"720p":"1.02","1080p":"1.74","default":"1.02","length":"11","x-key":"length"},{"720p":"1.14","1080p":"1.92","default":"1.14","length":"12","x-key":"length"},{"720p":"1.26","1080p":"2.1","default":"1.26","length":"13","x-key":"length"},{"720p":"1.32","1080p":"2.22","default":"1.32","length":"14","x-key":"length"},{"720p":"1.44","1080p":"2.16","default":"1.44","length":"15","x-key":"length"}]}},"top_provider":{"context_length":8192,"max_completion_tokens":4096,"is_moderated":true},"supported_parameters":["image","prompt","resolution","length","seed"],"models_extra_params":{"category":null},"is_active":true,"creator":"Alibaba"},{"id":"x-ai/grok-2-1212","name":"xAI: Grok 2 1212","description":"Grok 2 1212 is xAI's advanced language model released in December 2024 (hence 1212) featuring substantial improvements in three critical areas: response accuracy, instruction following fidelity, and multilingual capability. This comprehensive upgrade enhances the model's precision across factual recall, reasoning tasks, and domain-specific knowledge, particularly in technical and scientific fields. The improved instruction adherence provides developers with greater control and predictability in model behavior, allowing for more precise tuning to specific application requirements. Enhanced multilingual support expands the model's practical utility across diverse linguistic contexts, making it more accessible for global applications. The combination of these advancements makes Grok 2 1212 particularly valuable for developers requiring highly steerable AI systems capable of handling sophisticated tasks with reliable accuracy and consistent responses across multiple languages.","created":1734232814,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.000002","completion":"0.00001","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-2-vision-1212","name":"xAI: Grok 2 Vision 1212","description":"Grok 2 Vision 1212 brings advanced visual understanding, multilingual support, and refined instruction-following to power smarter, more intuitive image-based applications with enhanced reasoning and control.","created":1734237338,"context_length":32768,"architecture":{"modality":"text+image-\u003etext","input_modalities":["text","image"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.000002","completion":"0.00001","request":"0","image":"0.0036","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":32768,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-3-beta","name":"xAI: Grok 3 Beta","description":"Grok 3 Beta is xAI's flagship model featuring specialized domain expertise in finance, healthcare, law, and scientific fields. This enterprise-focused solution delivers exceptional performance in data extraction, code generation, and text summarization tasks. Released in 2025, the model particularly excels in structured tasks and achieves superior results on challenging benchmarks like GPQA, LCB, and MMLU-Pro, consistently outperforming the smaller Grok 3 Mini even when the latter operates at high thinking levels. Grok 3's sophisticated knowledge architecture makes it particularly valuable for professional and technical applications requiring deep contextual understanding in regulated industries. The model offers dual deployment options: a standard endpoint optimized for quality and a fast endpoint prioritizing throughput, allowing users to select the appropriate performance profile based on their specific application requirements.","created":1744240068,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.000005","completion":"0.000025","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000125","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","stop","frequency_penalty","presence_penalty","seed","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":["Math","Legal"]},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-3-mini-beta","name":"xAI: Grok 3 Mini Beta","description":"Grok 3 Mini Beta is xAI's lightweight reasoning model designed specifically for quantitative problem-solving with transparent thinking processes. Unlike conventional AI systems that generate immediate answers, this model implements explicit reasoning steps before responding, making it particularly effective for mathematics, puzzles, and logical reasoning tasks. The model defaults to low reasoning effort but can be configured with increased reasoning capacity (reasoning: { effort: high }) for more complex problems. With accessible thinking traces, users can examine the model's step-by-step problem-solving approach, providing valuable insights into its reasoning process. This beta release offers early access to xAI's reasoning capabilities through dual endpoint options: a standard endpoint optimized for quality and a fast endpoint prioritizing throughput for latency-sensitive applications.","created":1744240195,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.0000006","completion":"0.000004","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000015","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["tools","tool_choice","max_tokens","temperature","top_p","reasoning","include_reasoning","stop","seed","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":[]},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-4","name":"xAI: Grok 4","description":"Grok 4, launched by xAI in July 2025, represents a significant leap in large language model development. Building on earlier versions like Grok 3 and the original 2023–24 models, Grok 4 introduces several key enhancements aimed at performance, versatility, and real-world application. It features an extended context window of up to 256,000 tokens, enabling it to process long documents, codebases, or conversations with greater depth and continuity. The model demonstrates strong performance in advanced reasoning tasks, including mathematics and programming. Its architecture supports multimodal input, with capabilities for vision and voice currently in development, and introduces advanced voice interaction with faster response times and more natural flow.","created":1752087689,"context_length":256000,"architecture":{"modality":"text+image-\u003etext","input_modalities":["image","text"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.000003","completion":"0.000015","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000075","input_cache_write":"","prompt_more_than_128k_input":"0.000006","completion_more_than_128k_input":"0.00003","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":{"prompt":"0.000003","completion":"0.000015","prompt_more_than_128k_input":"0.000006","completion_more_than_128k_input":"0.00003"}},"top_provider":{"context_length":256000,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","structured_outputs","seed","logprobs","top_logprobs","response_format"],"models_extra_params":{"category":["Coding","Math","Legal","Health","Creative Writing"]},"is_active":true,"creator":"X-ai"},{"id":"x-ai/grok-code-fast-1","name":"xAI: Grok Code Fast 1","description":"Grok-Code-Fast-1 is a fast, cost-efficient reasoning model built for agentic coding. It can autonomously perform multi-step development tasks, issue tool calls, and refine solutions with transparent reasoning traces. Optimized for speed, it delivers near-instant suggestions that enable smooth, interactive coding workflows. With a 256K token context window, it handles entire codebases and large logs without losing comprehension, making it highly practical for real-world software development.","created":1756238927,"context_length":256000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Grok"},"pricing":{"prompt":"0.0000002","completion":"0.0000015","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000002","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":256000,"max_completion_tokens":10000,"is_moderated":false},"supported_parameters":["logprobs","max_tokens","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"X-ai"},{"id":"z-ai/glm-4.5","name":"Z.AI: GLM 4.5","description":"GLM-4.5 is the flagship model in Z.ai’s latest AI series, designed for advanced reasoning, coding, and intelligent agent tasks. It uses a Mixture of Experts (MoE) architecture with 355 billion total parameters and 32 billion active parameters per inference, offering both scale and efficiency. The model integrates features like Grouped-Query Attention with partial RoPE and a Multi-Token Prediction layer, boosting long-context reasoning and decoding speed. Its deep-and-narrow architecture and 96 attention heads enhance reasoning accuracy, even at the cost of training loss. Trained on 22 trillion tokens, including 8 trillion for domain-specific use, GLM-4.5 excels in complex, high-performance agentic applications.","created":1753471347,"context_length":131072,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000006","completion":"0.0000022","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":131072,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","reasoning","include_reasoning","frequency_penalty","min_p","presence_penalty","repetition_penalty","seed","stop","top_k","response_format","logprobs","logit_bias","top_logprobs"],"models_extra_params":{"category":["Coding"]},"is_active":true,"creator":"Z.AI"},{"id":"z-ai/glm-4.5-air","name":"Z.AI: GLM 4.5 Air","description":"GLM-4.5-Air is a lightweight counterpart to GLM-4.5, built for more efficient deployment scenarios without sacrificing core capabilities. It uses the same Mixture of Experts design with 106 billion total parameters and 12 billion active parameters, striking a balance between performance and cost. Like its larger sibling, it includes Grouped-Query Attention, speculative decoding via Multi-Token Prediction, and a deep architecture tailored for reasoning tasks. With access to the same 22 trillion token training pipeline and reinforcement learning through Z.ai’s “Slime” system, GLM-4.5-Air is optimized for faster, resource-conscious inference across coding, reasoning, and agent workflows.","created":1753471258,"context_length":128000,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000002","completion":"0.0000011","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0.00000003","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":128000,"max_completion_tokens":96000,"is_moderated":false},"supported_parameters":["max_tokens","temperature","top_p","tools","tool_choice","reasoning","include_reasoning","stop","frequency_penalty","presence_penalty","repetition_penalty","response_format","top_k","seed","min_p"],"models_extra_params":{"category":[]},"is_active":true,"creator":"Z.AI"},{"id":"z-ai/glm-4.6","name":"Z.AI: GLM 4.6","description":"GLM-4.6 is the latest iteration in the GLM series by Zhipu AI, designed as a large language model with about 355 billion parameters in a Mixture of Experts (MoE) architecture. It is optimized for various complex tasks including real-world coding, long-context processing, advanced reasoning, intelligent agent applications, and refined writing. The model features an expanded context window of 200,000 tokens (up from 128,000 in GLM-4.5), allowing it to handle longer and more complex interactions such as extensive documents or multi-turn conversations.","created":1759235576,"context_length":202752,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000006","completion":"0.0000019","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":202752,"max_completion_tokens":202752,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Z.AI"},{"id":"z-ai/glm-4.7","name":"Z.AI: GLM 4.7","description":"GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.","created":1766378014,"context_length":202752,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.00000043","completion":"0.00000175","request":"0","image":"0","web_search":"0","citation":"","reasoning":"","duration":"","internal_reasoning":"0","input_cache_read":"0","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":202752,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Z.AI"},{"id":"z-ai/glm-5","name":"Z.AI: GLM 5","description":"GLM-5 is Z.ai’s flagship open-source foundation model, built for complex system design and long-horizon agent workflows. Aimed at expert developers, it delivers production-grade results on large-scale programming tasks and competes with top closed-source models. With strong agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 goes beyond code generation to help design, build, and execute complete systems end-to-end.","created":1770829182,"context_length":204800,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.0000003","completion":"0.00000255","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":204800,"max_completion_tokens":131072,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Z.AI"},{"id":"z-ai/glm-5.1","name":"Z.ai: GLM 5.1","description":"zai-org/GLM-5.1 is Z.AI's (formerly Zhipu AI) open-weight, instruction-tuned coding flagship model—a refreshed upgrade over GLM-5—excelling in agentic engineering, complex system programming, and long-horizon tasks with SOTA open-source performance approaching Claude Opus 4.6.","created":1775578025,"context_length":202752,"architecture":{"modality":"text-\u003etext","input_modalities":["text"],"output_modalities":["text"],"tokenizer":"Other"},"pricing":{"prompt":"0.00000140","completion":"0.00000440","request":"","image":"","web_search":"","citation":"","reasoning":"","duration":"","internal_reasoning":"","input_cache_read":"","input_cache_write":"","prompt_more_than_128k_input":"","completion_more_than_128k_input":"","prompt_more_than_200k_input":"","completion_more_than_200k_input":"","prompt_more_than_272k_input":"","completion_more_than_272k_input":"","web_search_per_1000_requests_low":"","web_search_per_1000_requests_medium":"","web_search_per_1000_requests_high":"","web_search_per_1000_requests":"","audio_output_per_minute":"","audio_input":"","audio_output":"","cached_audio_input":"","batchPrice":{},"flex_service_tier_pricing":{},"videoCost":null,"videoCostWithAudio":null,"imageCost":null,"priceToShow":null},"top_provider":{"context_length":202752,"max_completion_tokens":0,"is_moderated":false},"supported_parameters":["frequency_penalty","include_reasoning","logit_bias","logprobs","max_tokens","min_p","presence_penalty","reasoning","repetition_penalty","response_format","seed","stop","structured_outputs","temperature","tool_choice","tools","top_k","top_logprobs","top_p"],"models_extra_params":{"category":null},"is_active":true,"creator":"Z.AI"}]}