Configs in vLLM
Some details on Config loading in vLLM
- Configs
hf_config: The config.json from HF
hf_text_config:llm_configfrom config.json
# api_server.py -> run_server -> init_app -> serve_http
def init_app()
...
engine = AsyncLLMEngine.from_engine_args()
class AsyncEngine:
def from_engine_args(...)
...
vllm_config = engine_args.create_engine_config(usage_context)
class EngineArgs:
def create_engine_config(self):
...
model_config = self.create_model_config()
return VllmConfig(
model_config=model_config,
...
)
def create_model_config(self):
...
return ModelConfig(...)
class ModelConfig:
def __post_init__(self):
...
hf_config = get_config(...) # read from the config registered at vllm/transformers_utils/configs/
# Read `auto_map` configs
self.hf_text_config = get_hf_text_config(self.hf_config)
def get_hf_text_config(config: PretrainedConfig):
...
## transformers lib
text_config = config.get_text_config()
# looking for decoder_possible_text_config_names = ("decoder", "generator", "text_config")
# if found, return
# otherwise return config so that text_config == config is True

2 Preprocessor_config
preprocessor_config is read by AutoImageProcessor and it’s also implemented in the same file in vLLM vllm.transformers_utils
.processor.get_image_processors
processor = AutoImageProcessor.from_pretrained(
processor_name,
*args,
revision=revision,
trust_remote_code=trust_remote_code,
**kwargs)
3 Decorators
MultiModel Registration decorator
@MULTIMODAL_REGISTRY.register_processor(
InternVLMultiModalProcessor,
info=InternVLProcessingInfo,
dummy_inputs=InternVLDummyInputsBuilder)
- Firstly
self.processing_info.get_allowed_mm_limits()is called frommultimodal.profiler.MultiModalProfilerbyprofiler.get_mm_limits() class InternVLProcessingInfoclass BaseInternVLProcessingInfoclass BaseProcessingInfofromvllm.multimodal.processing
class InternVLMultiModalProcessorclass BaseInternVLMultiModalProcessor[InternVLProcessingInfo]class BaseMultiModalProcessor[_I]fromvllm.multimodal.processing
class InternVLDummyInputsBuilderclass BaseInternVLDummyInputsBuilder[InternVLProcessingInfo]class BaseDummyInputsBuilder[_I])fromvllm.multimodal.profiling
_I = TypeVar("_I", bound=BaseInternVLProcessingInfo)class InternVLProcessorclass BaseInternVLProcessor(ABC)- This is used at
InternVLProcessingInfo.get_hf_processor

