Source code for gpp_client.domains.site_status

"""
Module for retrieving current Gemini site status information.
"""

__all__ = ["SiteStatusDomain"]

import re
from enum import Enum
from typing import Any, Literal, Optional

from bs4 import BeautifulSoup
from httpx import AsyncClient


class Site(str, Enum):
    SOUTH = "south"
    NORTH = "north"


SITE_CONFIG = {
    Site.NORTH: {
        "src_url": "https://www.gemini.edu/sciops/schedules/obsStatus/GN_Instrument.html",
        "gmos_url": "https://www.gemini.edu/sciops/schedules/obsStatus/gmosN.html",
        "shutter_keyword": "shutter",
        "instrument_keyword": "inst",
        "validity_keyword": "update",
    },
    Site.SOUTH: {
        "src_url": "https://www.gemini.edu/sciops/schedules/obsStatus/too_GS.json",
        "gmos_url": "https://www.gemini.edu/sciops/schedules/obsStatus/gmosS.html",
        "shutter_keyword": "open",
        "instrument_keyword": "instruments",
        "validity_keyword": "valid",
    },
}


[docs] class SiteStatusDomain: """ Domain for retrieving current Gemini site status information. """
[docs] async def get_by_id(self, site_id: Literal["south", "north"]) -> dict[str, Any]: """ Get the current site status payload for Gemini North or South. Parameters ---------- site_id : Literal["south", "north"] The observatory site name (case-insensitive). Returns ------- dict[str, Any] A dictionary containing current status, instruments, and GMOS config info. """ # Validate the site. site_key = site_id.strip().lower() site = Site(site_key) config = SITE_CONFIG[site] async with AsyncClient(follow_redirects=True) as client: if site == Site.NORTH: status_html = await self._fetch_webpage(client, config["src_url"]) status_data = _parse_gemini_north_webpage(status_html) # Add site manually to match Gemini South payload. status_data["Site"] = "Gemini North" else: status_data = await self._fetch_json(client, config["src_url"]) gmos_html = await self._fetch_webpage(client, config["gmos_url"]) gmos_payload = _parse_gmos_config_page(gmos_html) shutter_payload = _parse_shutter(status_data.get(config["shutter_keyword"])) instruments_payload = _parse_instruments( status_data.get(config["instrument_keyword"]) ) return { "site": status_data.get("Site"), "validity": status_data.get(config["validity_keyword"]), "available": status_data.get("avail", ""), "instruments": instruments_payload, "comment": status_data.get("comment"), "shutter": shutter_payload, "gmos_config": gmos_payload, }
async def _fetch_json(self, client: AsyncClient, url: str) -> dict[str, Any]: """ Fetch JSON content from the given URL. Parameters ---------- client : AsyncClient An HTTP client instance. url : str The URL to retrieve. Returns ------- dict[str, Any] Parsed JSON content. """ response = await client.get(url) response.raise_for_status() return response.json() async def _fetch_webpage(self, client: AsyncClient, url: str) -> str: """ Fetch a webpage. Parameters ---------- client : AsyncClient An HTTP client instance. url : str The url to fetch. Returns ------- str The returned html. """ response = await client.get(url) response.raise_for_status() return response.text
def _parse_gemini_north_webpage(html: str) -> dict[str, Any]: """ Parse the Gemini North status HTML page to extract status values by ID. Parameters ---------- html : str Raw HTML content from the Gemini North status page. Returns ------- dict[str, Any] Dictionary of extracted values keyed by known element IDs. """ soup = BeautifulSoup(html, "html.parser") ids_to_extract = [ "update", "avail", "inst", "comment", "shutter", ] data: dict[str, Any] = {} for element_id in ids_to_extract: tag = soup.find(id=element_id) data[element_id] = tag.get_text(strip=True) if tag else None return data def _parse_gmos_config_page(html: str) -> Optional[dict[str, Any]]: """ Parse the GMOS configuration HTML page. Parameters ---------- html : str Raw HTML content. Returns ------- dict[str, Any], optional Contains timestamp, gratings, and slits. """ if not html or not html.strip(): return None soup = BeautifulSoup(html, "html.parser") h3_tags = soup.find_all("h3") timestamp = None h1 = soup.find("h1") if h1: parts = h1.get_text(strip=True).split(" at ") if len(parts) == 2: timestamp = parts[1].strip() def collect_h5_until_next_h3(start_tag) -> list[str]: values: list[str] = [] for sibling in start_tag.find_next_siblings(): if sibling.name == "h3": break if sibling.name == "h5": values.append(sibling.get_text(strip=True)) return values gratings: list[str] = [] slits: list[str] = [] if len(h3_tags) >= 2: gratings = collect_h5_until_next_h3(h3_tags[0]) slits = collect_h5_until_next_h3(h3_tags[1]) return { "local_timestamp": timestamp, "gratings": gratings, "slits": slits, } def _parse_shutter(raw: Optional[str]) -> Optional[dict[str, Any]]: """ Parse the shutter status block. Parameters ---------- raw : str, optional The raw shutter status string. Returns ------- dict[str, Any], optional Parsed shutter state, timestamp (as string), and raw string. """ if not raw or not raw.strip(): return None # Strip any leading/trailing quotes and whitespace. raw_clean = raw.strip().replace('"', "") # Extract first word as state. state_match = re.match(r"(\w+)", raw_clean) state = state_match.group(1).lower() if state_match else None # Match ISO or slash-format datetime. timestamp_match = re.search( r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}|\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}", raw, ) return { "state": state, "timestamp": timestamp_match.group(0) if timestamp_match else None, "raw_string": raw, } def _parse_instruments(raw: Optional[str]) -> Optional[dict[str, Any]]: """ Parse the instruments block. Parameters ---------- raw : str, optional The raw instruments string. Returns ------- dict[str, Any], optional Parsed instruments list and raw string. """ if not raw or not raw.strip(): return None return { "available": raw.strip().split(), "raw_string": raw, }