team-8/llama.py

import json
import sys
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
from huggingface_hub import login

# -----------------------------
# CONFIG
# -----------------------------
LLAMA_MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"

# -----------------------------
# GLOBALS (caricati una sola volta)
# -----------------------------
tokenizer = None
gen_pipe = None


def init_model(hf_token: str):
    """
    Inizializza il modello LLaMA e ritorna la pipeline.
    """
    global tokenizer, gen_pipe
    print("🔑 Login a HuggingFace...")
    login(hf_token)

    print("⚙️ Caricamento modello LLaMA...")
    tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL_ID)
    model = AutoModelForCausalLM.from_pretrained(
        LLAMA_MODEL_ID,
        device_map="auto",
        torch_dtype=torch.float16
    )

    gen_pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    print("✅ Modello pronto!")


def process_plant(plant_data, description=""):
    """
    Usa LLaMA per analizzare lo stato futuro della pianta.
    Ritorna il JSON prodotto come stringa.
    """
    system_prompt = """You are a virtual agronomist specialized in predicting plant growth.

Your knowledge includes scientific agronomy data, crop physiology, and cultivation practices already known to you from your training.

You will receive structured JSON input describing the current state of a plant and its growing conditions.

Your task:
1. Base your reasoning ONLY on real agronomic knowledge you already have (do not speculate or invent).
2. If the JSON contains the **initial size or measurable parameters** (e.g. leaf area, stem height), you may predict their future evolution with approximate numerical values.
3. If such data are missing, DO NOT invent them — instead, give only qualitative descriptions.
4. Always consider crop-specific knowledge (e.g. tomato vs basil).
5. If you do not have reliable knowledge for the specific plant species in input, explicitly state this.
6. Return ONLY a JSON object with two fields: "diagnosis" and "future_aspect".
"""

    user_prompt = f"""
Plant data:
{json.dumps(plant_data, ensure_ascii=False, indent=2)}

Additional description:
"{description}"

Reply ONLY with the required JSON object.
"""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    print("🌱 Generazione descrizione con LLaMA...")
    result = gen_pipe(
        prompt,
        max_new_tokens=200,
        temperature=0.4,
        return_full_text=False
    )

    text = result[0]["generated_text"]
    return text


def main():
    # -----------------------------
    # STEP 1: Inizializza modello
    # -----------------------------
    init_model("hf_*****************")  # Inserisci token HF qui

    # -----------------------------
    # STEP 2: Esegui su dati
    # -----------------------------

    output = process_plant(plant_data)
    print("\n=== Risultato iniziale ===")
    print(output)
try:
    with open("plant_summary.json", "r", encoding="utf-8") as f:
        new_data = json.load(f)

    output = process_plant(new_data)

    print("\n=== Nuovo risultato ===")
    print(output)

    # 🔹 Salva il risultato in prediction.json
    with open("prediction.json", "w", encoding="utf-8") as out_f:
        json.dump(output, out_f, indent=2, ensure_ascii=False)
    print("✅ Output salvato in prediction.json")

except Exception as e:
    print(f"⚠️ Errore leggendo plant_summary.json: {e}")


# Solo se eseguito direttamente
if __name__ == "__main__":
    main()