import xml.etree.ElementTree as ET
from io import BytesIO

import cv2
import gradio as gr
import numpy as np
import requests
from PIL import Image


def parse_alto_xml(xml_file):
    """Parse the ALTO XML file to extract polygons and text content for each TextLine."""
    tree = ET.parse(xml_file)
    root = tree.getroot()
    ns = {"alto": "http://www.loc.gov/standards/alto/ns-v4#"}

    annotations = []
    transcriptions = {}

    for text_block in root.findall(".//alto:TextBlock", ns):
        for text_line in text_block.findall("alto:TextLine", ns):
            shape = text_line.find("alto:Shape", ns)

            if shape is not None:
                polygon = shape.find("alto:Polygon", ns)
                if polygon is not None:
                    polygon_points = polygon.attrib["POINTS"]
                    points = [
                        tuple(map(int, point.split(",")))
                        for point in polygon_points.split()
                    ]
            else:
                hpos = int(text_line.attrib["HPOS"])
                vpos = int(text_line.attrib["VPOS"])
                width = int(text_line.attrib["WIDTH"])
                height = int(text_line.attrib["HEIGHT"])
                points = [
                    (hpos, vpos),
                    (hpos + width, vpos),
                    (hpos + width, vpos + height),
                    (hpos, vpos + height),
                ]

            content = " ".join(
                [
                    string.attrib["CONTENT"]
                    for string in text_line.findall("alto:String", ns)
                ]
            )
            label = text_line.attrib["ID"]

            annotations.append((points, label))
            transcriptions[label] = content

    text_area_content = "\n".join(transcriptions[label] for label in transcriptions)

    return annotations, transcriptions, text_area_content


def visualize_polygons_on_image(
    image, annotations, alpha=0.5, include_reading_order=False
):
    """Visualize polygons on the image with an optional reading order overlay."""
    overlay = image.copy()
    for _, (polygon, label) in enumerate(annotations):
        color = (
            np.random.randint(0, 255),
            np.random.randint(0, 255),
            np.random.randint(0, 255),
        )
        cv2.fillPoly(overlay, [np.array(polygon, dtype=np.int32)], color)

        if include_reading_order:
            centroid = np.mean(np.array(polygon), axis=0).astype(int)
            cv2.putText(
                overlay,
                str(label),
                tuple(centroid),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 0, 0),
                1,
                cv2.LINE_AA,
            )

    return cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)


def visualize(
    xml_file, image_source, image_id, uploaded_image, include_reading_order=False
):
    if image_source == "Use IIIF image":
        if not image_id:
            raise gr.Error("Please enter an Image ID.")
        image_url = f"https://iiifintern.ra.se/arkis!{image_id}/full/max/0/default.jpg"
        response = requests.get(image_url)
        if response.status_code != 200:
            raise gr.Error(f"Failed to download image from {image_url}")
        image = np.array(Image.open(BytesIO(response.content)))
    else:
        if uploaded_image is None:
            raise gr.Error("Please upload an image.")
        image = uploaded_image

    annotations, transcriptions, text_area_content = parse_alto_xml(xml_file)
    annotated_image = visualize_polygons_on_image(
        image, annotations, include_reading_order=include_reading_order
    )

    return annotated_image, annotations, transcriptions, text_area_content


def get_transcription_from_coords(annotations, transcriptions, evt: gr.SelectData):
    """Get the transcription for the polygon clicked in the annotated image."""
    x, y = evt.index[0], evt.index[1]
    for points, label in annotations:
        polygon = np.array(points, dtype=np.int32)
        if cv2.pointPolygonTest(polygon, (x, y), False) >= 0:
            return transcriptions.get(label, "No transcription available.")
    return "No transcription available."


with gr.Blocks(title="XML Visualization App") as app:
    with gr.Tab("Visualize"):
        annotations_state = gr.State()
        transcriptions_state = gr.State()

        with gr.Row():
            with gr.Column():
                xml_input = gr.File(label="Upload ALTO XML File", file_types=[".xml"])
            with gr.Column():
                image_source = gr.Radio(
                    choices=["Use IIIF image", "Upload your own image"],
                    label="Image Source",
                    value="Use IIIF image",
                )
                image_id_input = gr.Textbox(
                    label="Image ID",
                    placeholder="Enter image ID (e.g., 30003365_00001)",
                    visible=True,
                )
                image_upload = gr.Image(
                    label="Upload Image", type="numpy", visible=False
                )
                include_reading_order_input = gr.Checkbox(label="Include Reading Order")
                process_button = gr.Button("Visualize Alto", scale=0, variant="primary")

        def update_image_source(choice):
            if choice == "Use IIIF image":
                return [gr.update(visible=True), gr.update(visible=False)]
            else:
                return [gr.update(visible=False), gr.update(visible=True)]

        image_source.change(
            update_image_source,
            inputs=image_source,
            outputs=[image_id_input, image_upload],
        )

        with gr.Row():
            with gr.Column(scale=3):
                annotated_image_output = gr.Image(
                    label="Annotated Image", interactive=True
                )
            with gr.Column(scale=2):
                transcription_output = gr.TextArea(
                    label="Transcription",
                    interactive=False,
                    show_copy_button=True,
                    lines=30,
                )
                transcription_selected = gr.Textbox(
                    label="Selected Polygon", interactive=False, show_copy_button=True
                )

        process_button.click(
            visualize,
            inputs=[
                xml_input,
                image_source,
                image_id_input,
                image_upload,
                include_reading_order_input,
            ],
            outputs=[
                annotated_image_output,
                annotations_state,
                transcriptions_state,
                transcription_output,
            ],
        )

        annotated_image_output.select(
            get_transcription_from_coords,
            inputs=[annotations_state, transcriptions_state],
            outputs=transcription_selected,
        )

app.queue()
app.launch()