Adding all project files

2025-08-02 02:00:33 +02:00 · 2025-08-02 02:00:33 +02:00 · cd4316ad0f
commit cd4316ad0f
parent 6c9e127bdc
42289 changed files with 8009643 additions and 0 deletions
--- a/venv/Lib/site-packages/accelerate/utils/ao.py
+++ b/venv/Lib/site-packages/accelerate/utils/ao.py
@ -0,0 +1,140 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Needed utilities for torchao FP8 training.
+"""
+
+from functools import partial
+from typing import TYPE_CHECKING, Callable, Optional
+
+import torch
+
+from .imports import is_torchao_available, torchao_required
+
+
+if TYPE_CHECKING:
+    if is_torchao_available():
+        from torchao.float8.float8_linear import Float8LinearConfig
+
+
+def find_first_last_linear_layers(model: torch.nn.Module):
+    """
+    Finds the first and last linear layer names in a model.
+
+    This is needed during FP8 to avoid issues with instability by keeping the first and last layers unquantized.
+
+    Ref: https://x.com/xariusrke/status/1826669142604141052
+    """
+    first_linear, last_linear = None, None
+    for name, module in model.named_modules():
+        if isinstance(module, torch.nn.Linear):
+            if first_linear is None:
+                first_linear = name
+            last_linear = name
+    return first_linear, last_linear
+
+
+def filter_linear_layers(module, fqn: str, layers_to_filter: list[str]) -> bool:
+    """
+    A function which will check if `module` is:
+    - a `torch.nn.Linear` layer
+    - has in_features and out_features divisible by 16
+    - is not part of `layers_to_filter`
+
+    Args:
+        module (`torch.nn.Module`):
+            The module to check.
+        fqn (`str`):
+            The fully qualified name of the layer.
+        layers_to_filter (`List[str]`):
+            The list of layers to filter.
+    """
+    if isinstance(module, torch.nn.Linear):
+        if module.in_features % 16 != 0 or module.out_features % 16 != 0:
+            return False
+    if fqn in layers_to_filter:
+        return False
+    return True
+
+
+def filter_first_and_last_linear_layers(module, fqn: str) -> bool:
+    """
+    A filter function which will filter out all linear layers except the first and last.
+
+    <Tip>
+
+        For stability reasons, we skip the first and last linear layers Otherwise can lead to the model not training or
+        converging properly
+
+    </Tip>
+
+    Args:
+        module (`torch.nn.Module`):
+            The module to check.
+        fqn (`str`):
+            The fully qualified name of the layer.
+    """
+    first_linear, last_linear = find_first_last_linear_layers(module)
+    return filter_linear_layers(module, fqn, layers_to_filter=[first_linear, last_linear])
+
+
+@torchao_required
+def has_ao_layers(model: torch.nn.Module):
+    from torchao.float8.float8_linear import Float8Linear
+
+    for name, module in model.named_modules():
+        if isinstance(module, Float8Linear):
+            return True
+    return False
+
+
+@torchao_required
+def convert_model_to_fp8_ao(
+    model: torch.nn.Module,
+    config: Optional["Float8LinearConfig"] = None,
+    module_filter_func: Optional[Callable] = filter_first_and_last_linear_layers,
+):
+    """
+    Converts all `nn.Linear` layers in the model (except the first and last) to torchao's `Float8Linear` layer inplace.
+
+    Args:
+        model (`torch.nn.Module`):
+            The model to convert.
+        config (`torchao.float8.Float8LinearConfig`, *optional*):
+            The configuration for the FP8 training. Recommended to utilize
+            `torchao.float8.recipe_name_to_linear_config` to generate this. In general, the default config should be
+            sufficient (what is passed when set to `None`).
+        module_filter_func (`Callable`, *optional*, defaults to `filter_linear_layers`):
+            Optional function that must take in a module and layer name, and returns a boolean indicating whether the
+            module should be converted to FP8. Defaults to `filter_linear_layers`. See it for an example.
+
+    Example:
+
+    ```python
+    from accelerate.utils.ao import convert_model_to_fp8_ao
+
+    model = MyModel()
+    model.to("cuda")
+    convert_to_float8_training(model)
+
+    model.train()
+    ```
+    """
+    from torchao.float8 import convert_to_float8_training
+
+    first_linear, last_linear = find_first_last_linear_layers(model)
+    if module_filter_func is None:
+        module_filter_func = partial(filter_linear_layers, layers_to_filter=[first_linear, last_linear])
+    convert_to_float8_training(model, module_filter_fn=module_filter_func, config=config)