diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..6fe9008 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,42 @@ +# šŸ› ļø Surya Tools + +Utility tools and scripts to support the Surya foundation model ecosystem. + +## Available Tools + +### šŸ“” SDO Data Fetcher + +Lightweight tool for fetching real-time and historical Solar Dynamics Observatory observations. + +**Location**: `tools/sdo_data_fetcher/` + +**Purpose**: +- Fetch latest SDO/AIA and HMI observations +- Fetch target-time SDO windows forward from a flare or active-region moment +- Download specific wavelengths for custom analysis +- Review all available wavelengths in a dependency-free local web UI +- Real-time solar activity monitoring +- Quick data exploration without full dataset downloads + +**Quick Start**: +```bash +cd tools/sdo_data_fetcher +pip install -r requirements.txt +python sdo_fetcher_v2.py --list +python sdo_web_ui.py +``` + +See [sdo_data_fetcher/README.md](sdo_data_fetcher/README.md) for detailed documentation. + +--- + +## Contributing New Tools + +We welcome additional tools that support the Surya ecosystem! Consider contributing: +- Data preprocessing utilities +- Visualization tools +- Custom dataset generators +- Analysis scripts +- Integration helpers + +Please follow the repository's contribution guidelines when adding new tools. diff --git a/tools/sdo_data_fetcher/README.md b/tools/sdo_data_fetcher/README.md new file mode 100644 index 0000000..0854f50 --- /dev/null +++ b/tools/sdo_data_fetcher/README.md @@ -0,0 +1,264 @@ +# šŸŒž SDO Data Fetcher + +[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![NASA SDO](https://img.shields.io/badge/NASA-SDO-red.svg)](https://sdo.gsfc.nasa.gov/) + +A powerful Python application to fetch real-time and historical solar images from NASA's **Solar Dynamics Observatory (SDO)**. Get solar data in seconds with support for all AIA wavelengths and HMI instruments! + +

+ SDO AIA 171 + SDO AIA 304 +

+ +## ✨ Features + +- šŸ”“ **Live Data** - Fetches the latest SDO observations with automatic provider fallback +- 🌈 **12 Sources** - All available SDO AIA channels plus HMI continuum and magnetogram +- šŸ•’ **Historical Target Times** - Fetch SDO imagery closest to a specific date/time through Helioviewer +- šŸ•¹ļø **Retro Web UI** - Dependency-free local Intel-blue console for reviewing flare windows +- šŸ“Š **Auto Metadata** - Each image includes JSON metadata with observation details +- ⚔ **Redundant Sources** - Automatically falls back across LMSAL Sun Today, Stanford JSOC, NASA SDO, and Helioviewer +- šŸŽÆ **CLI & Python API** - Use from command line or integrate into your code +- šŸ“¦ **Batch Downloads** - Get multiple wavelengths simultaneously +- šŸ”¬ **Space Weather Ready** - Perfect for monitoring solar activity + +## šŸš€ Quick Start + +### Installation + +```bash +# Clone the repository +git clone https://github.com/ep150de/sdo-data-fetcher.git +cd sdo-data-fetcher + +# Install dependencies +pip install -r requirements.txt +``` + +### Basic Usage + +```bash +# Get the latest AIA 171ƅ image (default) +python sdo_fetcher_v2.py + +# Prefer high-resolution rendered output first, then fallback +python sdo_fetcher_v2.py --source AIA_171 --provider auto_highres --width 4096 --image-type png + +# Force a specific provider +python sdo_fetcher_v2.py --source AIA_171 --provider lmsal + +# Get a specific wavelength +python sdo_fetcher_v2.py --source AIA_304 + +# Download multiple wavelengths +python sdo_fetcher_v2.py --multiple + +# Download all wavelengths from a target UTC time forward for 4 hours +python sdo_fetcher_v2.py --datetime "2026-02-06T12:30:00Z" --all --hours 4 --cadence 15 + +# Launch the local retro web UI +python sdo_web_ui.py + +# List all available sources +python sdo_fetcher_v2.py --list +``` + +Then open `http://127.0.0.1:8765` to use the web console. + +## šŸ“” Available Data Sources + +| Source | Wavelength | Temperature | Best For | +|--------|------------|-------------|----------| +| **AIA_94** | 94 ƅ | ~6 MK | Hot flare plasma | +| **AIA_131** | 131 ƅ | ~10 MK | Flaring regions | +| **AIA_171** | 171 ƅ | ~0.6 MK | Quiet corona, coronal loops ⭐ | +| **AIA_193** | 193 ƅ | ~1.5 MK | Active regions | +| **AIA_211** | 211 ƅ | ~2 MK | Active regions | +| **AIA_304** | 304 ƅ | ~0.05 MK | Prominences, filaments | +| **AIA_335** | 335 ƅ | ~2.5 MK | Active regions | +| **AIA_1600** | 1600 ƅ | - | Upper photosphere | +| **AIA_1700** | 1700 ƅ | - | Temperature minimum | +| **AIA_4500** | 4500 ƅ | - | Visible light photosphere | +| **HMI_Continuum** | Visible | - | Solar surface | +| **HMI_Magnetogram** | - | - | Magnetic fields | + +## šŸ’” Usage Examples + +### Command Line + +```bash +# Monitor solar activity +python sdo_fetcher_v2.py --source AIA_193 + +# Review a flare or active region from a specific UTC time +python sdo_fetcher_v2.py --datetime "2026-02-06T12:30:00Z" --source AIA_131 --hours 2 --cadence 10 + +# Interpret a timezone-naive time as your local timezone, then convert to UTC +python sdo_fetcher_v2.py --datetime "2026-02-06T07:30:00" --timezone local --all --hours 3 --cadence 15 + +# Space weather check +python sdo_advanced_examples.py # Choose option 3 + +# Download full comparison set +python sdo_advanced_examples.py # Choose option 1 +``` + +### Python Code + +```python +from sdo_fetcher_v2 import SDOFetcher + +# Initialize fetcher +fetcher = SDOFetcher(output_dir="solar_images") + +# Download latest image +metadata = fetcher.get_latest_image_direct(source="AIA_171") + +if metadata: + print(f"Image saved: {metadata['filepath']}") + print(f"Provider: {metadata['provider_name']}") + print(f"Observation time: {metadata['observation_time']}") + +# Download multiple wavelengths +sources = ["AIA_171", "AIA_193", "AIA_304", "HMI_Magnetogram"] +results = fetcher.download_multiple(sources) + +# Download all wavelengths forward from a target time +manifest = fetcher.download_time_series( + sources=list(fetcher.SDO_SOURCES.keys()), + start_time="2026-02-06T12:30:00Z", + timezone_mode="utc", + hours=4, + cadence_minutes=15, +) +``` + +## šŸ•¹ļø Historical Solar Moment Web UI + +Run the dependency-free local web app: + +```bash +python sdo_web_ui.py +``` + +Open `http://127.0.0.1:8765` and enter: + +- Target date and time +- Timezone mode: `UTC` or `Local timezone` +- Forward-only duration in hours +- Sampling cadence in minutes +- Image width and format +- All wavelengths or selected wavelengths + +The UI downloads the closest available Helioviewer-rendered image for each selected SDO source at each sample time. Results are grouped by requested timestamp with links to each image and its JSON metadata. + +Historical fetching uses Helioviewer because the other providers in this repository are latest/browse feeds rather than arbitrary-time APIs. + +## šŸ” Redundant Live Data Providers + +The fetchers now support a provider chain for current imagery: + +- **LMSAL Sun Today** - Daily AIA and HMI browse images at `suntoday.lmsal.com` +- **Stanford JSOC** - Latest HMI browse products at `jsoc1.stanford.edu` +- **NASA SDO** - Latest public browse images at `sdo.gsfc.nasa.gov` +- **Helioviewer** - API-based rendered imagery fallback (and high-resolution first mode) + +Use `--provider auto` for browse-first fallback, `--provider auto_highres` for high-resolution-first fallback, or pick one explicitly with `--provider lmsal`, `--provider jsoc`, `--provider nasa`, or `--provider helioviewer`. + +For Helioviewer downloads (latest and historical), you can also request render settings with `--width` and `--image-type`. +If fallback lands on browse providers (LMSAL/JSOC/NASA), those settings are not applied and metadata marks `render_settings_applied: false` with `resolution_class: browse_fixed`. + +## šŸŽ“ Advanced Features + +The `sdo_advanced_examples.py` script includes: + +1. **Multi-wavelength comparison sets** - Download complementary wavelengths for analysis +2. **Active region monitoring** - Track solar flares and active regions +3. **Space weather quick check** - Rapid assessment tool +4. **Prominence monitoring** - Track eruptions and filaments +5. **Continuous monitoring** - Automated periodic downloads +6. **Monitoring daemon generator** - Create long-running monitoring scripts + +```bash +python sdo_advanced_examples.py +``` + +## šŸ“‚ Output Structure + +``` +sdo_data/ +ā”œā”€ā”€ SDO_AIA_171_20260206_123456.jpg # Solar image +ā”œā”€ā”€ SDO_AIA_171_20260206_123456.json # Metadata +ā”œā”€ā”€ SDO_AIA_304_20260206_123457.jpg +ā”œā”€ā”€ SDO_AIA_304_20260206_123457.json +└── historical_20260206_123000Z/ + ā”œā”€ā”€ manifest.json + └── 20260206_123000Z/ + ā”œā”€ā”€ SDO_AIA_171_20260206_123000Z.png + └── SDO_AIA_171_20260206_123000Z.json +``` + +Each JSON file contains: +- Source and wavelength information +- Exact observation timestamp +- Requested historical timestamp and closest actual observation timestamp +- Time delta between requested and actual observation +- Download metadata +- Direct image URL +- Render provenance fields (`requested_image_width`, `requested_image_type`, `render_settings_applied`, `resolution_class`) + +## šŸ”¬ About NASA's SDO + +The **Solar Dynamics Observatory** is a NASA mission launched in February 2010 to study the Sun's atmosphere and magnetic activity. It provides: + +- šŸ›°ļø **24/7 observations** from geosynchronous orbit +- šŸ“ø **4K images every 12 seconds** in 10 wavelengths +- 🧲 **Magnetic field measurements** of the Sun's surface +- ā˜€ļø **Real-time space weather monitoring** +- šŸ“Š **Over 20 million images captured** since launch + +Learn more at [sdo.gsfc.nasa.gov](https://sdo.gsfc.nasa.gov/) + +## šŸ“– Documentation + +- **[SDO_GUIDE.md](SDO_GUIDE.md)** - Comprehensive guide with detailed examples +- **[QUICK_REFERENCE.txt](QUICK_REFERENCE.txt)** - Quick command reference card +- **[NASA SDO Website](https://sdo.gsfc.nasa.gov/)** - Official mission website +- **[Helioviewer.org](https://helioviewer.org/)** - Interactive solar image viewer + +## šŸ› ļø Requirements + +- Python 3.7+ +- `requests` library (installed via requirements.txt) + +## šŸ¤ Contributing + +Contributions are welcome! Feel free to: +- Report bugs +- Suggest new features +- Submit pull requests +- Improve documentation + +## šŸ“„ License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## 🌟 Acknowledgments + +- **NASA/SDO** and the AIA, EVE, and HMI science teams for providing open access to solar data +- **Helioviewer Project** for API access and tools +- All solar physics researchers and space weather forecasters + +## šŸ”— Useful Links + +- [SDO Mission Overview](https://sdo.gsfc.nasa.gov/mission/) +- [Space Weather Prediction Center](https://www.swpc.noaa.gov/) +- [Solar Data Analysis Center](https://umbra.nascom.nasa.gov/) +- [Helioviewer API Docs](https://api.helioviewer.org/docs/) + +--- + +**Made with ā˜€ļø for solar physics research, education, and space weather monitoring** + +*If you find this tool useful, please ⭐ star this repository!* diff --git a/tools/sdo_data_fetcher/requirements.txt b/tools/sdo_data_fetcher/requirements.txt new file mode 100644 index 0000000..c3a0f78 --- /dev/null +++ b/tools/sdo_data_fetcher/requirements.txt @@ -0,0 +1,4 @@ +requests>=2.31.0 +sunpy +drms +astropy diff --git a/tools/sdo_data_fetcher/sdo_advanced_examples.py b/tools/sdo_data_fetcher/sdo_advanced_examples.py new file mode 100644 index 0000000..f3532e6 --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_advanced_examples.py @@ -0,0 +1,292 @@ +""" +Advanced SDO Examples - Building on the basic fetcher +Demonstrates monitoring, time-series, and composite image creation +""" + +import time +from datetime import datetime, timezone +from pathlib import Path +from sdo_fetcher_v2 import SDOFetcher + + +def continuous_monitor(interval_seconds=300, sources=None, provider="auto"): + """ + Continuously monitor and download SDO images at specified intervals + + Args: + interval_seconds: Time between downloads (default: 5 minutes) + sources: List of sources to monitor (default: AIA_171) + """ + if sources is None: + sources = ["AIA_171"] + + fetcher = SDOFetcher(output_dir="monitoring") + + print(f"Starting continuous monitoring...") + print(f"Sources: {', '.join(sources)}") + print(f"Interval: {interval_seconds} seconds") + print(f"Provider mode: {provider}") + print(f"Press Ctrl+C to stop\n") + + iteration = 0 + try: + while True: + iteration += 1 + timestamp = datetime.now(timezone.utc).isoformat() + + print(f"\n{'='*60}") + print(f"Iteration #{iteration} at {timestamp}") + print(f"{'='*60}") + + for source in sources: + try: + result = fetcher.get_latest_image_direct(source, provider=provider) + if result: + print(f"āœ“ {source} downloaded successfully via {result.get('provider_name', result.get('provider', 'unknown'))}") + else: + print(f"āœ— {source} failed") + except Exception as e: + print(f"āœ— Error downloading {source}: {e}") + + print(f"\nWaiting {interval_seconds} seconds until next download...") + time.sleep(interval_seconds) + + except KeyboardInterrupt: + print(f"\n\nMonitoring stopped. Downloaded {iteration} sets of images.") + print(f"Images saved in: {fetcher.output_dir}") + + +def download_comparison_set(): + """ + Download a comparison set of multiple wavelengths + Useful for multi-wavelength solar analysis + """ + print("\n" + "="*60) + print("Downloading Multi-Wavelength Comparison Set") + print("="*60 + "\n") + + # Select complementary wavelengths + sources = [ + "AIA_171", # Quiet corona + "AIA_193", # Active regions + "AIA_304", # Prominences + "AIA_211", # Active regions (hotter) + "HMI_Magnetogram", # Magnetic field + "HMI_Continuum", # Visible surface + ] + + fetcher = SDOFetcher(output_dir="comparison_set") + results = fetcher.download_multiple(sources, provider="auto") + + print("\n" + "="*60) + print("Comparison Set Complete!") + print("="*60) + print(f"Downloaded {len(results)} images") + print("\nUse these for:") + print(" - Multi-wavelength composite images") + print(" - Temperature analysis") + print(" - Active region identification") + print(" - Prominence and filament studies") + print("="*60 + "\n") + + return results + + +def download_active_region_set(): + """ + Download wavelengths optimal for observing active regions and flares + """ + print("\n" + "="*60) + print("Downloading Active Region / Flare Observation Set") + print("="*60 + "\n") + + # Wavelengths best for active regions and flares + sources = [ + "AIA_94", # Hot flare plasma + "AIA_131", # Flaring regions + "AIA_193", # Active regions + "AIA_211", # Active regions + "HMI_Magnetogram", # Magnetic field + ] + + fetcher = SDOFetcher(output_dir="active_regions") + results = fetcher.download_multiple(sources, provider="auto") + + print("\nActive region monitoring complete!") + print("Check these images for:") + print(" - Solar flares (bright spots in 94ƅ and 131ƅ)") + print(" - Active region structure (193ƅ, 211ƅ)") + print(" - Sunspot magnetic complexity (HMI Magnetogram)") + + return results + + +def quick_space_weather_check(): + """ + Quick download for space weather assessment + """ + print("\n" + "="*60) + print("SPACE WEATHER QUICK CHECK") + print("="*60 + "\n") + + fetcher = SDOFetcher(output_dir="space_weather") + + # Get the most relevant images for space weather + sources = ["AIA_193", "HMI_Magnetogram"] + + print("Downloading key space weather indicators...") + results = fetcher.download_multiple(sources, provider="auto") + + if len(results) == 2: + print("\n" + "="*60) + print("READY FOR ANALYSIS") + print("="*60) + print("\nCheck the images for:") + print(" šŸ“ø AIA 193: Active regions and coronal holes") + print(" 🧲 HMI Magnetogram: Complex magnetic fields (flare potential)") + print("\nLook for:") + print(" āš ļø Dark regions = coronal holes → fast solar wind") + print(" āš ļø Bright active regions = potential for flares") + print(" āš ļø Complex magnetograms = higher flare risk") + print("="*60 + "\n") + + return results + + +def download_prominence_monitoring(): + """ + Download wavelengths optimal for prominence/filament observation + """ + print("\n" + "="*60) + print("Prominence/Filament Monitoring Set") + print("="*60 + "\n") + + # Best wavelengths for prominences + sources = [ + "AIA_304", # Primary prominence wavelength + "AIA_171", # Context (corona) + "HMI_Continuum", # Visible disk + ] + + fetcher = SDOFetcher(output_dir="prominences") + results = fetcher.download_multiple(sources, provider="auto") + + print("\nProminence monitoring complete!") + print("304ƅ is best for seeing prominences on the solar limb") + + return results + + +def create_monitoring_script(): + """ + Generate a standalone monitoring script + """ + script_content = '''#!/usr/bin/env python3 +""" +Automated SDO Monitoring Script +Runs continuously and downloads images every 15 minutes +""" + +import time +from datetime import datetime, timezone +from sdo_fetcher_v2 import SDOFetcher +import logging + +# Setup logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('sdo_monitor.log'), + logging.StreamHandler() + ] +) + +def main(): + fetcher = SDOFetcher(output_dir="continuous_monitoring") + sources = ["AIA_171", "AIA_193", "HMI_Magnetogram"] + interval = 900 # 15 minutes + + logging.info("Starting SDO continuous monitoring") + logging.info(f"Sources: {sources}") + logging.info(f"Interval: {interval} seconds") + + iteration = 0 + while True: + try: + iteration += 1 + logging.info(f"=== Iteration {iteration} ===") + + for source in sources: + try: + result = fetcher.get_latest_image_direct(source, provider="auto") + if result: + logging.info(f"āœ“ Downloaded {source} via {result.get('provider_name', result.get('provider', 'unknown'))}") + except Exception as e: + logging.error(f"āœ— Failed to download {source}: {e}") + + logging.info(f"Waiting {interval} seconds...") + time.sleep(interval) + + except KeyboardInterrupt: + logging.info("Monitoring stopped by user") + break + except Exception as e: + logging.error(f"Unexpected error: {e}") + time.sleep(60) # Wait 1 minute before retrying + +if __name__ == "__main__": + main() +''' + + with open("monitoring_daemon.py", 'w', encoding='utf-8') as f: + f.write(script_content) + + print("\nāœ“ Created 'monitoring_daemon.py'") + print("Run it with: python monitoring_daemon.py") + print("It will continuously download SDO images every 15 minutes") + + +def main(): + """Main menu for advanced examples""" + print("\n" + "="*60) + print("SDO Advanced Examples") + print("="*60) + print("\n1. Download multi-wavelength comparison set") + print("2. Download active region/flare observation set") + print("3. Quick space weather check") + print("4. Download prominence monitoring set") + print("5. Start continuous monitoring (Ctrl+C to stop)") + print("6. Create monitoring daemon script") + print("7. Exit") + + choice = input("\nSelect option (1-7): ").strip() + + if choice == "1": + download_comparison_set() + elif choice == "2": + download_active_region_set() + elif choice == "3": + quick_space_weather_check() + elif choice == "4": + download_prominence_monitoring() + elif choice == "5": + sources = input("Enter sources (comma-separated, or press Enter for AIA_171): ").strip() + if sources: + sources = [s.strip() for s in sources.split(",")] + else: + sources = ["AIA_171"] + interval = input("Enter interval in seconds (default 300): ").strip() + interval = int(interval) if interval else 300 + provider = input("Enter provider (auto/lmsal/jsoc/nasa/helioviewer, default auto): ").strip() or "auto" + continuous_monitor(interval, sources, provider) + elif choice == "6": + create_monitoring_script() + elif choice == "7": + print("Goodbye!") + else: + print("Invalid choice") + + +if __name__ == "__main__": + main() diff --git a/tools/sdo_data_fetcher/sdo_data_fetcher.py b/tools/sdo_data_fetcher/sdo_data_fetcher.py new file mode 100644 index 0000000..af007d9 --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_data_fetcher.py @@ -0,0 +1,194 @@ +""" +SDO (Solar Dynamic Observatory) Data Fetcher + +This script fetches the latest solar images from NASA's Solar Dynamic Observatory +using the Helioviewer API. It supports multiple instruments and wavelengths. +""" + +import requests +from datetime import datetime, timedelta, timezone +import json +from pathlib import Path +from typing import Optional, Dict +import argparse +from sdo_provider import SDOProviderClient, SDO_SOURCES + + +class SDODataFetcher: + """Fetches latest SDO data from Helioviewer API""" + + BASE_URL = "https://api.helioviewer.org/v2/" + SDO_SOURCES = SDO_SOURCES + + def __init__(self, output_dir: str = "sdo_data"): + """Initialize the fetcher with an output directory""" + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + self.provider_client = SDOProviderClient(output_dir=output_dir) + + def get_latest_available_date(self, source: str = "AIA_171", provider: str = "auto") -> Optional[str]: + """Query the API for the latest available SDO observation time""" + timestamp = self.provider_client.get_latest_timestamp(source=source, provider=provider) + if timestamp: + print(f"Latest SDO data available: {timestamp}") + return timestamp + + fallback = (datetime.now(timezone.utc) - timedelta(minutes=30)).strftime("%Y-%m-%dT%H:%M:%S.000Z") + print(f"Using fallback date: {fallback}") + return fallback + + def get_latest_image(self, source: str = "AIA_171", image_scale: float = 2.4, provider: str = "auto") -> Optional[Dict]: + """ + Fetch the latest SDO image + + Args: + source: SDO source identifier (e.g., 'AIA_171', 'HMI_Magnetogram') + image_scale: Resolution in arcseconds per pixel (lower = higher resolution) + + Returns: + Dictionary with image metadata and file path + """ + _ = image_scale + if source not in self.SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(self.SDO_SOURCES.keys())}") + + print(f"Fetching latest {source} data...") + print(f"Description: {self.SDO_SOURCES[source]['description']}") + return self.provider_client.download_latest_image(source=source, provider=provider) + + def get_latest_data_timestamp(self, source: str = "AIA_171", provider: str = "auto") -> Optional[str]: + """Get the timestamp of the latest available SDO data""" + try: + return self.provider_client.get_latest_timestamp(source=source, provider=provider) + except requests.exceptions.RequestException as e: + print(f"Error getting latest timestamp: {e}") + return None + + def download_multiple_wavelengths(self, sources: list = None, provider: str = "auto"): + """ + Download images from multiple SDO sources + + Args: + sources: List of source identifiers (defaults to common wavelengths) + """ + if sources is None: + sources = ["AIA_171", "AIA_193", "AIA_304", "HMI_Magnetogram"] + + results = [] + for source in sources: + print(f"\n{'='*60}") + result = self.get_latest_image(source, provider=provider) + if result: + results.append(result) + + print(f"\n{'='*60}") + print(f"Downloaded {len(results)} images successfully!") + return results + + @staticmethod + def list_available_sources(): + """Print all available SDO sources""" + print("\nAvailable SDO Data Sources:") + print("="*60) + for key, value in SDODataFetcher.SDO_SOURCES.items(): + print(f"{key:20} - {value['description']}") + print("\nAvailable providers:") + SDOProviderClient.list_providers() + + +def main(): + """Main CLI interface""" + parser = argparse.ArgumentParser( + description="Fetch latest SDO (Solar Dynamic Observatory) data", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Download latest AIA 171 ƅ image + python sdo_data_fetcher.py + + # Download specific wavelength + python sdo_data_fetcher.py --source AIA_304 + + # Download multiple wavelengths + python sdo_data_fetcher.py --multiple + + # List all available sources + python sdo_data_fetcher.py --list + + # Download to specific directory + python sdo_data_fetcher.py --output my_sdo_images + """ + ) + + parser.add_argument( + '--source', '-s', + type=str, + default='AIA_171', + help='SDO source to fetch (default: AIA_171)' + ) + + parser.add_argument( + '--output', '-o', + type=str, + default='sdo_data', + help='Output directory for downloaded data (default: sdo_data)' + ) + + parser.add_argument( + '--scale', + type=float, + default=2.4, + help='Image scale in arcseconds per pixel (default: 2.4)' + ) + + parser.add_argument( + '--multiple', '-m', + action='store_true', + help='Download multiple common wavelengths' + ) + + parser.add_argument( + '--list', '-l', + action='store_true', + help='List all available SDO sources' + ) + + parser.add_argument( + '--timestamp', '-t', + action='store_true', + help='Get the timestamp of latest available data' + ) + + parser.add_argument( + '--provider', '-p', + type=str, + default='auto', + help='Data provider: auto, lmsal, jsoc, nasa, helioviewer' + ) + + args = parser.parse_args() + + # List available sources and exit + if args.list: + SDODataFetcher.list_available_sources() + return + + # Initialize fetcher + fetcher = SDODataFetcher(output_dir=args.output) + + # Get latest timestamp + if args.timestamp: + timestamp = fetcher.get_latest_data_timestamp(source=args.source, provider=args.provider) + if timestamp: + print(f"Latest SDO data available at: {timestamp}") + return + + # Download data + if args.multiple: + fetcher.download_multiple_wavelengths(provider=args.provider) + else: + fetcher.get_latest_image(source=args.source, image_scale=args.scale, provider=args.provider) + + +if __name__ == "__main__": + main() diff --git a/tools/sdo_data_fetcher/sdo_fetcher_v2.py b/tools/sdo_data_fetcher/sdo_fetcher_v2.py new file mode 100644 index 0000000..1e396d5 --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_fetcher_v2.py @@ -0,0 +1,231 @@ +""" +SDO Data Fetcher v2 - Alternative Implementation +Uses NASA's Helioviewer.org latest images API +""" + +from pathlib import Path +from typing import Optional, Dict +import argparse +from sdo_provider import SDOProviderClient, SDO_SOURCES + + +class SDOFetcher: + """Simplified SDO data fetcher using Helioviewer's latest images""" + + SDO_SOURCES = SDO_SOURCES + + def __init__(self, output_dir: str = "sdo_data"): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + self.provider_client = SDOProviderClient(output_dir=output_dir) + + def get_latest_image_png( + self, + source: str = "AIA_171", + provider: str = "auto", + width: int = 1024, + image_type: str = "png", + ) -> Optional[Dict]: + """ + Fetch latest SDO image as PNG using a simpler method + + Args: + source: SDO source identifier + + Returns: + Dictionary with metadata and filepath + """ + return self.provider_client.download_latest_image( + source=source, + provider=provider, + width=width, + image_type=image_type, + ) + + def get_latest_image_direct( + self, + source: str = "AIA_171", + provider: str = "auto", + width: int = 1024, + image_type: str = "png", + ) -> Optional[Dict]: + """ + Alternative method: Fetch from SDO's direct image feed + Uses helioviewer.org's pre-rendered latest images + """ + result = self.provider_client.download_latest_image( + source=source, + provider=provider, + width=width, + image_type=image_type, + ) + if result: + print(f"\n{'='*60}") + print(f"Success! Downloaded latest SDO {source} image") + print(f"Provider: {result.get('provider_name', result.get('provider', 'unknown'))}") + if result.get("observation_time"): + print(f"Observation time: {result['observation_time']}") + print(f"{'='*60}\n") + return result + + def download_multiple( + self, + sources: list = None, + provider: str = "auto", + width: int = 1024, + image_type: str = "png", + ): + """Download multiple wavelengths""" + if sources is None: + sources = ["AIA_171", "AIA_193", "AIA_304", "HMI_Magnetogram"] + + print(f"\nDownloading {len(sources)} different SDO images...") + print("="*60) + + results = [] + for source in sources: + result = self.get_latest_image_direct( + source, + provider=provider, + width=width, + image_type=image_type, + ) + if result: + results.append(result) + + print(f"\n{'='*60}") + print(f"Successfully downloaded {len(results)}/{len(sources)} images") + print(f"{'='*60}\n") + + return results + + def download_at_time( + self, + source: str = "AIA_171", + target_time: str = None, + timezone_mode: str = "utc", + width: int = 1024, + image_type: str = "png", + ) -> Optional[Dict]: + """Download the image closest to a requested date/time.""" + if not target_time: + raise ValueError("target_time is required") + return self.provider_client.download_image_at( + source=source, + target_time=target_time, + timezone_mode=timezone_mode, + width=width, + image_type=image_type, + ) + + def download_time_series( + self, + sources: list = None, + start_time: str = None, + timezone_mode: str = "utc", + hours: float = 1.0, + cadence_minutes: int = 15, + width: int = 1024, + image_type: str = "png", + ): + """Download a forward-only historical sample window.""" + if not start_time: + raise ValueError("start_time is required") + return self.provider_client.download_samples( + sources=sources, + start_time=start_time, + timezone_mode=timezone_mode, + hours=hours, + cadence_minutes=cadence_minutes, + width=width, + image_type=image_type, + ) + + @staticmethod + def list_sources(): + """List all available sources""" + print("\n" + "="*60) + print("Available SDO Data Sources") + print("="*60) + for key, info in SDOFetcher.SDO_SOURCES.items(): + print(f" {key:20} - {info['name']} ({info['wavelength']})") + print("="*60 + "\n") + + +def main(): + parser = argparse.ArgumentParser( + description="SDO Data Fetcher v2 - Fetch latest solar images from NASA's SDO", + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--source', '-s', default='AIA_171', + help='SDO source (default: AIA_171)') + parser.add_argument('--output', '-o', default='sdo_data', + help='Output directory (default: sdo_data)') + parser.add_argument('--multiple', '-m', action='store_true', + help='Download multiple wavelengths') + parser.add_argument('--list', '-l', action='store_true', + help='List available sources') + parser.add_argument('--provider', '-p', default='auto', + help='Data provider: auto, auto_highres, lmsal, jsoc, nasa, helioviewer') + parser.add_argument('--datetime', '--date', dest='target_datetime', + help='Target date/time for historical fetch, e.g. 2026-02-06T12:30:00Z') + parser.add_argument('--timezone', choices=['utc', 'local'], default='utc', + help='Interpret timezone-naive --datetime as UTC or local time (default: utc)') + parser.add_argument('--all', action='store_true', + help='Download all available SDO wavelengths/sources') + parser.add_argument('--hours', type=float, default=1.0, + help='Forward time window in hours for --datetime (default: 1)') + parser.add_argument('--cadence', type=int, default=15, + help='Sample cadence in minutes for --datetime (default: 15)') + parser.add_argument('--width', type=int, default=1024, + help='Requested image width in pixels for Helioviewer downloads (default: 1024)') + parser.add_argument('--image-type', choices=['png', 'jpg', 'webp'], default='png', + help='Requested image type for Helioviewer downloads (default: png)') + + args = parser.parse_args() + + if args.list: + SDOFetcher.list_sources() + SDOProviderClient.list_providers() + return + + fetcher = SDOFetcher(output_dir=args.output) + + if args.target_datetime: + sources = list(SDO_SOURCES.keys()) if args.all else [args.source] + if args.hours > 0 and args.cadence > 0: + manifest = fetcher.download_time_series( + sources=sources, + start_time=args.target_datetime, + timezone_mode=args.timezone, + hours=args.hours, + cadence_minutes=args.cadence, + width=args.width, + image_type=args.image_type, + ) + print(f"Manifest: {manifest['manifest_filepath']}") + else: + for source in sources: + fetcher.download_at_time( + source=source, + target_time=args.target_datetime, + timezone_mode=args.timezone, + width=args.width, + image_type=args.image_type, + ) + return + + if args.multiple: + fetcher.download_multiple(provider=args.provider, width=args.width, image_type=args.image_type) + else: + fetcher.get_latest_image_direct( + source=args.source, + provider=args.provider, + width=args.width, + image_type=args.image_type, + ) + + +if __name__ == "__main__": + main() diff --git a/tools/sdo_data_fetcher/sdo_provider.py b/tools/sdo_data_fetcher/sdo_provider.py new file mode 100644 index 0000000..4dfeabf --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_provider.py @@ -0,0 +1,745 @@ +""" +Shared SDO data provider logic with automatic fallback. +""" + +from datetime import datetime, timedelta, timezone +import json +from pathlib import Path +import time +from typing import Callable, Dict, Iterable, List, Optional, Union + +import requests + + +SDO_SOURCES = { + "AIA_94": { + "sourceId": 8, + "name": "AIA 94", + "wavelength": "94ƅ", + "description": "AIA 94 ƅ - Hot flare plasma", + "nasa_code": "0094", + "lmsal_code": "0094", + }, + "AIA_131": { + "sourceId": 9, + "name": "AIA 131", + "wavelength": "131ƅ", + "description": "AIA 131 ƅ - Flaring regions", + "nasa_code": "0131", + "lmsal_code": "0131", + }, + "AIA_171": { + "sourceId": 10, + "name": "AIA 171", + "wavelength": "171ƅ", + "description": "AIA 171 ƅ - Quiet corona and coronal loops", + "nasa_code": "0171", + "lmsal_code": "0171", + }, + "AIA_193": { + "sourceId": 11, + "name": "AIA 193", + "wavelength": "193ƅ", + "description": "AIA 193 ƅ - Hot plasma in active regions", + "nasa_code": "0193", + "lmsal_code": "0193", + }, + "AIA_211": { + "sourceId": 12, + "name": "AIA 211", + "wavelength": "211ƅ", + "description": "AIA 211 ƅ - Active regions", + "nasa_code": "0211", + "lmsal_code": "0211", + }, + "AIA_304": { + "sourceId": 13, + "name": "AIA 304", + "wavelength": "304ƅ", + "description": "AIA 304 ƅ - Chromosphere and prominence", + "nasa_code": "0304", + "lmsal_code": "0304", + }, + "AIA_335": { + "sourceId": 14, + "name": "AIA 335", + "wavelength": "335ƅ", + "description": "AIA 335 ƅ - Active regions", + "nasa_code": "0335", + "lmsal_code": "0335", + }, + "AIA_1600": { + "sourceId": 15, + "name": "AIA 1600", + "wavelength": "1600ƅ", + "description": "AIA 1600 ƅ - Upper photosphere", + "nasa_code": "1600", + "lmsal_code": "1600", + }, + "AIA_1700": { + "sourceId": 16, + "name": "AIA 1700", + "wavelength": "1700ƅ", + "description": "AIA 1700 ƅ - Temperature minimum", + "nasa_code": "1700", + "lmsal_code": "1700", + }, + "AIA_4500": { + "sourceId": 17, + "name": "AIA 4500", + "wavelength": "4500ƅ", + "description": "AIA 4500 ƅ - Visible light photosphere", + "nasa_code": "4500", + "lmsal_code": "4500", + }, + "HMI_Continuum": { + "sourceId": 18, + "name": "HMI Continuum", + "wavelength": "Continuum", + "description": "HMI Continuum - Solar surface", + "nasa_code": "HMIIC", + "lmsal_code": "_HMI_cont_aiascale", + "jsoc_path": "/data/hmi/images/latest/HMI_latest_Int_1024x1024.gif", + "jsoc_timestamp_key": "continuum", + }, + "HMI_Magnetogram": { + "sourceId": 19, + "name": "HMI Magnetogram", + "wavelength": "Magnetogram", + "description": "HMI Magnetogram - Magnetic field", + "nasa_code": "HMII", + "lmsal_code": "_HMImag", + "jsoc_path": "/data/hmi/images/latest/HMI_latest_Mag_1024x1024.gif", + "jsoc_timestamp_key": "magnetogram", + }, +} + + +PROVIDER_LABELS = { + "lmsal": "LMSAL Sun Today", + "jsoc": "Stanford JSOC", + "nasa": "NASA SDO", + "helioviewer": "Helioviewer API", +} + + +AUTO_PROVIDER_ORDER = ("lmsal", "jsoc", "nasa", "helioviewer") +AUTO_PROVIDER_ORDER_HIGHRES = ("helioviewer", "lmsal", "jsoc", "nasa") + + +def parse_target_datetime(value: Union[str, datetime], timezone_mode: str = "utc") -> datetime: + """Parse a target time and return an aware UTC datetime.""" + if isinstance(value, datetime): + parsed = value + else: + text = value.strip() + if not text: + raise ValueError("Target datetime cannot be empty") + if text.endswith("Z"): + text = text[:-1] + "+00:00" + if "T" not in text and " " in text: + text = text.replace(" ", "T", 1) + parsed = datetime.fromisoformat(text) + + if parsed.tzinfo is None: + if timezone_mode.lower() == "local": + return parsed.astimezone(timezone.utc) + else: + parsed = parsed.replace(tzinfo=timezone.utc) + + return parsed.astimezone(timezone.utc) + + +def format_utc_datetime(value: datetime) -> str: + """Format a datetime for Helioviewer APIs.""" + return value.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def utc_slug(value: datetime) -> str: + """Create a filesystem-safe UTC timestamp.""" + return value.astimezone(timezone.utc).strftime("%Y%m%d_%H%M%SZ") + + +def _parse_helioviewer_datetime(value: Optional[str]) -> Optional[datetime]: + if not value: + return None + text = value.strip().replace(" ", "T", 1) + if text.endswith("Z"): + text = text[:-1] + "+00:00" + parsed = datetime.fromisoformat(text) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +class SDOProviderClient: + """Download latest SDO imagery from multiple redundant providers.""" + + def __init__(self, output_dir: str = "sdo_data"): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + self.session = requests.Session() + + @staticmethod + def _normalize_render_params(width: int, image_type: str) -> tuple[int, str]: + image_type = image_type.lower().lstrip(".") + if image_type not in {"png", "jpg", "webp"}: + raise ValueError("image_type must be one of: png, jpg, webp") + if width <= 0: + raise ValueError("width must be greater than zero") + return width, image_type + + def download_image_at( + self, + source: str, + target_time: Union[str, datetime], + timezone_mode: str = "utc", + width: int = 1024, + image_type: str = "png", + output_subdir: Optional[str] = None, + ) -> Optional[Dict]: + """Download the image closest to a requested UTC or local target time.""" + if source not in SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(SDO_SOURCES.keys())}") + + width, image_type = self._normalize_render_params(width, image_type) + + target_dt = parse_target_datetime(target_time, timezone_mode=timezone_mode) + source_info = SDO_SOURCES[source] + + print(f"\nFetching {source} closest to {format_utc_datetime(target_dt)}...") + print(f"Wavelength: {source_info['wavelength']}") + print("Provider: Helioviewer API") + + info_response = self._request_with_retries( + "https://api.helioviewer.org/v2/getClosestImage/", + params={ + "date": format_utc_datetime(target_dt), + "sourceId": source_info["sourceId"], + }, + timeout=45, + ) + info_response.raise_for_status() + image_info = info_response.json() + image_id = image_info.get("id") + + if not image_id: + return None + + response = self._request_with_retries( + "https://api.helioviewer.org/v2/downloadImage/", + params={ + "id": image_id, + "width": width, + "type": image_type, + }, + timeout=90, + stream=True, + ) + response.raise_for_status() + + target_dir = self.output_dir / output_subdir if output_subdir else self.output_dir + target_dir.mkdir(parents=True, exist_ok=True) + filepath = target_dir / f"SDO_{source}_{utc_slug(target_dt)}.{image_type}" + + with open(filepath, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + actual_dt = _parse_helioviewer_datetime(image_info.get("date")) + signed_delta = None + abs_delta = None + if actual_dt: + signed_delta = (actual_dt - target_dt).total_seconds() + abs_delta = abs(signed_delta) + + metadata = { + "source": source, + "name": source_info["name"], + "wavelength": source_info["wavelength"], + "description": source_info["description"], + "provider": "helioviewer", + "provider_name": PROVIDER_LABELS["helioviewer"], + "filepath": str(filepath), + "download_time": datetime.now(timezone.utc).isoformat(), + "image_url": response.url, + "content_type": response.headers.get("Content-Type"), + "requested_time": format_utc_datetime(target_dt), + "observation_time": format_utc_datetime(actual_dt) if actual_dt else image_info.get("date"), + "actual_observation_time": format_utc_datetime(actual_dt) if actual_dt else image_info.get("date"), + "delta_seconds": signed_delta, + "abs_delta_seconds": abs_delta, + "image_id": image_id, + "image_width": width, + "image_type": image_type, + "helioviewer_metadata": image_info, + } + + metadata_file = filepath.with_suffix(".json") + metadata["metadata_filepath"] = str(metadata_file) + with open(metadata_file, "w", encoding="utf-8") as f: + json.dump(metadata, f, indent=2) + + print(f"āœ“ Image saved: {filepath}") + print(f"āœ“ Metadata saved: {metadata_file}") + if metadata["actual_observation_time"]: + print(f"āœ“ Observation time: {metadata['actual_observation_time']}") + + return metadata + + def download_samples( + self, + sources: Optional[List[str]], + start_time: Union[str, datetime], + timezone_mode: str = "utc", + hours: float = 1.0, + cadence_minutes: int = 15, + width: int = 1024, + image_type: str = "png", + output_subdir: Optional[str] = None, + progress_callback: Optional[Callable[[Dict], None]] = None, + ) -> Dict: + """Download a forward-only time series for the selected SDO sources.""" + if sources is None: + sources = list(SDO_SOURCES.keys()) + invalid_sources = [source for source in sources if source not in SDO_SOURCES] + if invalid_sources: + raise ValueError(f"Invalid sources: {invalid_sources}") + if hours <= 0: + raise ValueError("hours must be greater than zero") + if cadence_minutes <= 0: + raise ValueError("cadence_minutes must be greater than zero") + + start_dt = parse_target_datetime(start_time, timezone_mode=timezone_mode) + end_dt = start_dt + timedelta(hours=hours) + sample_times = [] + current_dt = start_dt + while current_dt <= end_dt: + sample_times.append(current_dt) + current_dt += timedelta(minutes=cadence_minutes) + + run_subdir = output_subdir or f"historical_{utc_slug(start_dt)}" + run_dir = self.output_dir / run_subdir + run_dir.mkdir(parents=True, exist_ok=True) + + total = len(sample_times) * len(sources) + completed = 0 + results = [] + errors = [] + + for sample_time in sample_times: + sample_subdir = f"{run_subdir}/{utc_slug(sample_time)}" + for source in sources: + try: + result = self.download_image_at( + source=source, + target_time=sample_time, + timezone_mode="utc", + width=width, + image_type=image_type, + output_subdir=sample_subdir, + ) + if result: + results.append(result) + event = {"type": "result", "completed": completed + 1, "total": total, "result": result} + else: + error = { + "source": source, + "requested_time": format_utc_datetime(sample_time), + "error": "No image found", + } + errors.append(error) + event = {"type": "error", "completed": completed + 1, "total": total, "error": error} + except Exception as exc: + error = { + "source": source, + "requested_time": format_utc_datetime(sample_time), + "error": str(exc), + } + errors.append(error) + event = {"type": "error", "completed": completed + 1, "total": total, "error": error} + + completed += 1 + if progress_callback: + progress_callback(event) + + manifest = { + "provider": "helioviewer", + "provider_name": PROVIDER_LABELS["helioviewer"], + "start_time": format_utc_datetime(start_dt), + "end_time": format_utc_datetime(end_dt), + "timezone_mode": timezone_mode, + "hours": hours, + "cadence_minutes": cadence_minutes, + "sample_times": [format_utc_datetime(sample_time) for sample_time in sample_times], + "sources": sources, + "total_requested": total, + "successful_downloads": len(results), + "failed_downloads": len(errors), + "output_dir": str(run_dir), + "results": results, + "errors": errors, + "created_at": datetime.now(timezone.utc).isoformat(), + } + manifest_file = run_dir / "manifest.json" + manifest["manifest_filepath"] = str(manifest_file) + with open(manifest_file, "w", encoding="utf-8") as f: + json.dump(manifest, f, indent=2) + + print(f"\nāœ“ Historical fetch complete: {len(results)}/{total} images") + print(f"āœ“ Manifest saved: {manifest_file}") + return manifest + + def _request_with_retries(self, url: str, retries: int = 2, **kwargs) -> requests.Response: + last_error = None + for attempt in range(retries + 1): + try: + response = self.session.get(url, **kwargs) + response.raise_for_status() + return response + except requests.exceptions.RequestException as exc: + last_error = exc + if attempt >= retries: + raise + wait_seconds = 1 + attempt + print(f"Helioviewer request failed, retrying in {wait_seconds}s: {exc}") + time.sleep(wait_seconds) + raise last_error + + def download_latest_image( + self, + source: str = "AIA_171", + provider: str = "auto", + width: int = 1024, + image_type: str = "png", + ) -> Optional[Dict]: + """Download the latest image using the requested provider or fallback chain.""" + if source not in SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(SDO_SOURCES.keys())}") + + width, image_type = self._normalize_render_params(width, image_type) + provider_order = self._resolve_provider_order(provider) + + print(f"\nFetching latest {source} image...") + print(f"Wavelength: {SDO_SOURCES[source]['wavelength']}") + print(f"Provider order: {', '.join(provider_order)}") + + last_error = None + + for provider_name in provider_order: + try: + print(f"Trying provider: {PROVIDER_LABELS[provider_name]}") + result = getattr(self, f"_download_from_{provider_name}")(source, width=width, image_type=image_type) + if result: + return result + except requests.exceptions.RequestException as e: + last_error = e + print(f"Provider {provider_name} failed: {e}") + except Exception as e: + last_error = e + print(f"Provider {provider_name} failed unexpectedly: {e}") + + if last_error: + print(f"All providers failed. Last error: {last_error}") + else: + print("All providers failed.") + return None + + def get_latest_timestamp(self, source: str = "AIA_171", provider: str = "auto") -> Optional[str]: + """Best-effort timestamp lookup using the same provider order.""" + if source not in SDO_SOURCES: + raise ValueError(f"Invalid source. Choose from: {list(SDO_SOURCES.keys())}") + + for provider_name in self._resolve_provider_order(provider): + try: + timestamp = getattr(self, f"_timestamp_from_{provider_name}")(source) + if timestamp: + return timestamp + except Exception: + continue + return None + + @staticmethod + def list_providers(): + """Print available provider names.""" + print("\nAvailable data providers:") + print("=" * 60) + print("auto - Automatic fallback chain") + print("auto_highres - High-resolution fallback chain") + for key, label in PROVIDER_LABELS.items(): + print(f"{key:12} - {label}") + + def _resolve_provider_order(self, provider: str) -> Iterable[str]: + provider = provider.lower() + if provider == "auto": + return AUTO_PROVIDER_ORDER + if provider == "auto_highres": + return AUTO_PROVIDER_ORDER_HIGHRES + if provider not in PROVIDER_LABELS: + raise ValueError(f"Invalid provider. Choose from: auto, auto_highres, {', '.join(PROVIDER_LABELS.keys())}") + return (provider,) + + def _download_from_lmsal(self, source: str, width: int = 1024, image_type: str = "png") -> Optional[Dict]: + code = SDO_SOURCES[source].get("lmsal_code") + if not code: + return None + + for day_offset in range(0, 4): + candidate_date = datetime.now(timezone.utc) - timedelta(days=day_offset) + date_path = candidate_date.strftime("%Y/%m/%d") + urls = [ + f"http://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + f"https://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + ] + + for url in urls: + try: + response = self.session.get(url, timeout=30, stream=True) + if response.status_code == 404: + response.close() + continue + + response.raise_for_status() + return self._save_response( + response=response, + source=source, + provider="lmsal", + image_url=url, + extension=".jpg", + observation_time=response.headers.get("Last-Modified") or candidate_date.strftime("%Y-%m-%d"), + extra_metadata={ + "date_path": date_path, + "requested_image_width": width, + "requested_image_type": image_type, + "render_settings_applied": False, + "resolution_class": "browse_fixed", + }, + ) + except requests.exceptions.RequestException: + continue + + return None + + def _download_from_jsoc(self, source: str, width: int = 1024, image_type: str = "png") -> Optional[Dict]: + jsoc_path = SDO_SOURCES[source].get("jsoc_path") + if not jsoc_path: + return None + + url = f"https://jsoc1.stanford.edu{jsoc_path}" + response = self.session.get(url, timeout=30, stream=True) + response.raise_for_status() + + return self._save_response( + response=response, + source=source, + provider="jsoc", + image_url=url, + extension=Path(jsoc_path).suffix or ".img", + observation_time=self._timestamp_from_jsoc(source), + extra_metadata={ + "requested_image_width": width, + "requested_image_type": image_type, + "render_settings_applied": False, + "resolution_class": "browse_fixed", + }, + ) + + def _download_from_nasa(self, source: str, width: int = 1024, image_type: str = "png") -> Optional[Dict]: + nasa_code = SDO_SOURCES[source].get("nasa_code") + if not nasa_code: + return None + + urls = [ + f"http://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + f"https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + ] + + for url in urls: + try: + response = self.session.get(url, timeout=30, stream=True) + response.raise_for_status() + + return self._save_response( + response=response, + source=source, + provider="nasa", + image_url=url, + extension=".jpg", + observation_time=response.headers.get("Last-Modified"), + extra_metadata={ + "requested_image_width": width, + "requested_image_type": image_type, + "render_settings_applied": False, + "resolution_class": "browse_fixed", + }, + ) + except requests.exceptions.RequestException: + continue + + return None + + def _download_from_helioviewer(self, source: str, width: int = 1024, image_type: str = "png") -> Optional[Dict]: + width, image_type = self._normalize_render_params(width, image_type) + source_id = SDO_SOURCES[source]["sourceId"] + info_response = self._request_with_retries( + "https://api.helioviewer.org/v2/getClosestImage/", + params={ + "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "sourceId": source_id, + }, + timeout=30, + ) + info_response.raise_for_status() + image_info = info_response.json() + image_id = image_info.get("id") + + if not image_id: + return None + + response = self._request_with_retries( + "https://api.helioviewer.org/v2/downloadImage/", + params={ + "id": image_id, + "width": width, + "type": image_type, + }, + timeout=90, + stream=True, + ) + response.raise_for_status() + + return self._save_response( + response=response, + source=source, + provider="helioviewer", + image_url=response.url, + extension=f".{image_type}", + observation_time=image_info.get("date"), + extra_metadata={ + "image_id": image_id, + "image_width": width, + "image_type": image_type, + "requested_image_width": width, + "requested_image_type": image_type, + "render_settings_applied": True, + "resolution_class": "rendered", + }, + ) + + def _timestamp_from_lmsal(self, source: str) -> Optional[str]: + code = SDO_SOURCES[source].get("lmsal_code") + if not code: + return None + + for day_offset in range(0, 4): + candidate_date = datetime.now(timezone.utc) - timedelta(days=day_offset) + date_path = candidate_date.strftime("%Y/%m/%d") + urls = [ + f"http://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + f"https://suntoday.lmsal.com/sdomedia/SunInTime/{date_path}/t{code}.jpg", + ] + for url in urls: + try: + response = self.session.head(url, timeout=15) + if response.status_code == 200: + return response.headers.get("Last-Modified") or candidate_date.strftime("%Y-%m-%d") + except requests.exceptions.RequestException: + continue + return None + + def _timestamp_from_jsoc(self, source: str) -> Optional[str]: + timestamp_key = SDO_SOURCES[source].get("jsoc_timestamp_key") + if not timestamp_key: + return None + + url = "https://jsoc1.stanford.edu/data/hmi/images/latest/image_times_UTC" + response = self.session.get(url, timeout=15) + response.raise_for_status() + + for line in response.text.splitlines(): + if ":" not in line: + continue + key, value = line.split(":", 1) + if key.strip().lower() == timestamp_key: + return value.strip() + return None + + def _timestamp_from_nasa(self, source: str) -> Optional[str]: + nasa_code = SDO_SOURCES[source].get("nasa_code") + if not nasa_code: + return None + + urls = [ + f"http://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + f"https://sdo.gsfc.nasa.gov/assets/img/latest/latest_1024_{nasa_code}.jpg", + ] + + for url in urls: + try: + response = self.session.head(url, timeout=15) + response.raise_for_status() + return response.headers.get("Last-Modified") + except requests.exceptions.RequestException: + continue + + return None + + def _timestamp_from_helioviewer(self, source: str) -> Optional[str]: + source_id = SDO_SOURCES[source]["sourceId"] + response = self.session.get( + "https://api.helioviewer.org/v2/getClosestImage/", + params={ + "date": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "sourceId": source_id, + }, + timeout=15, + ) + response.raise_for_status() + return response.json().get("date") + + def _save_response( + self, + response: requests.Response, + source: str, + provider: str, + image_url: str, + extension: str, + observation_time: Optional[str] = None, + extra_metadata: Optional[Dict] = None, + ) -> Dict: + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + filepath = self.output_dir / f"SDO_{source}_{timestamp}{extension}" + + with open(filepath, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + metadata = { + "source": source, + "name": SDO_SOURCES[source]["name"], + "wavelength": SDO_SOURCES[source]["wavelength"], + "description": SDO_SOURCES[source]["description"], + "provider": provider, + "provider_name": PROVIDER_LABELS[provider], + "filepath": str(filepath), + "download_time": datetime.now(timezone.utc).isoformat(), + "image_url": image_url, + "observation_time": observation_time, + "content_type": response.headers.get("Content-Type"), + "last_modified": response.headers.get("Last-Modified"), + } + + if extra_metadata: + metadata.update(extra_metadata) + + metadata_file = filepath.with_suffix(".json") + with open(metadata_file, "w", encoding="utf-8") as f: + json.dump(metadata, f, indent=2) + + print(f"āœ“ Image saved: {filepath}") + print(f"āœ“ Metadata saved: {metadata_file}") + print(f"āœ“ Provider used: {PROVIDER_LABELS[provider]}") + + return metadata diff --git a/tools/sdo_data_fetcher/sdo_web_ui.py b/tools/sdo_data_fetcher/sdo_web_ui.py new file mode 100644 index 0000000..62e5cad --- /dev/null +++ b/tools/sdo_data_fetcher/sdo_web_ui.py @@ -0,0 +1,806 @@ +""" +Dependency-free local web UI for historical SDO data review. + +Run with: + python sdo_web_ui.py +""" + +from datetime import datetime, timezone +from http import HTTPStatus +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +import json +from pathlib import Path +import threading +import time +from typing import Dict +from urllib.parse import unquote, urlparse + +from sdo_provider import SDOProviderClient, SDO_SOURCES, parse_target_datetime + + +HOST = "127.0.0.1" +PORT = 8765 +OUTPUT_DIR = Path("sdo_data") +MAX_HOURS = 12 +MAX_SAMPLES = 96 +MAX_WIDTH = 2048 + +JOBS: Dict[str, Dict] = {} +JOBS_LOCK = threading.Lock() + + +INDEX_HTML = """ + + + + + SDO Solar Moment Console + + + +
+

SDO Solar Moment Console

+

Intel-blue retro mission control for replaying SDO flare windows. Pick a start time, fetch forward samples, and review every available wavelength.

+
+ +
+
+

Acquisition Controls

+
+
+
+ + +
+
+ + +
+
+ + + + +
+
+ + +
+
+ + +
+
+ +
+
+ + +
+
+ + +
+
+ + +
+ + +
+
+ +
+

Review Deck

+
+
StatusIdle
+
Progress0 / 0
+
Images0 OK
+
Failures0 ERR
+
+
+
READY. Awaiting target time.
+
+
+
+ + + + +""" + + +def build_index() -> bytes: + sources = [ + { + "key": key, + "name": value["name"], + "wavelength": value["wavelength"], + "description": value["description"], + } + for key, value in SDO_SOURCES.items() + ] + return INDEX_HTML.replace("__SOURCES__", json.dumps(sources)).encode("utf-8") + + +def json_response(handler: BaseHTTPRequestHandler, payload: Dict, status: int = HTTPStatus.OK): + body = json.dumps(payload, indent=2).encode("utf-8") + handler.send_response(status) + handler.send_header("Content-Type", "application/json; charset=utf-8") + handler.send_header("Content-Length", str(len(body))) + handler.end_headers() + handler.wfile.write(body) + + +def text_response(handler: BaseHTTPRequestHandler, message: str, status: int = HTTPStatus.BAD_REQUEST): + body = message.encode("utf-8") + handler.send_response(status) + handler.send_header("Content-Type", "text/plain; charset=utf-8") + handler.send_header("Content-Length", str(len(body))) + handler.end_headers() + handler.wfile.write(body) + + +def calculate_total(hours: float, cadence_minutes: int, source_count: int) -> int: + sample_count = 1 + elapsed = 0 + window_minutes = hours * 60 + while elapsed + cadence_minutes <= window_minutes: + sample_count += 1 + elapsed += cadence_minutes + return sample_count * source_count + + +def validate_fetch_payload(payload: Dict) -> Dict: + date_value = str(payload.get("date", "")).strip() + time_value = str(payload.get("time", "")).strip() + timezone_mode = str(payload.get("timezone_mode", "utc")).strip().lower() + image_type = str(payload.get("image_type", "png")).strip().lower() + + if not date_value or not time_value: + raise ValueError("Date and time are required") + if timezone_mode not in {"utc", "local"}: + raise ValueError("timezone_mode must be utc or local") + if image_type not in {"png", "jpg", "webp"}: + raise ValueError("image_type must be png, jpg, or webp") + + hours = float(payload.get("hours", 1)) + cadence_minutes = int(payload.get("cadence_minutes", 15)) + width = int(payload.get("width", 1024)) + + if hours <= 0 or hours > MAX_HOURS: + raise ValueError(f"hours must be between 0 and {MAX_HOURS}") + if cadence_minutes <= 0: + raise ValueError("cadence_minutes must be greater than zero") + if width <= 0 or width > MAX_WIDTH: + raise ValueError(f"width must be between 1 and {MAX_WIDTH}") + + if payload.get("all_sources", True): + sources = list(SDO_SOURCES.keys()) + else: + sources = [str(source) for source in payload.get("sources", [])] + if not sources: + raise ValueError("Select at least one source") + invalid = [source for source in sources if source not in SDO_SOURCES] + if invalid: + raise ValueError(f"Invalid sources: {invalid}") + + total = calculate_total(hours, cadence_minutes, len(sources)) + if total > MAX_SAMPLES * len(SDO_SOURCES): + raise ValueError("Request is too large; reduce duration, cadence, or source count") + + target_time = f"{date_value}T{time_value}" + start_dt = parse_target_datetime(target_time, timezone_mode=timezone_mode) + + return { + "target_time": target_time, + "start_time_utc": start_dt.isoformat(), + "timezone_mode": timezone_mode, + "hours": hours, + "cadence_minutes": cadence_minutes, + "width": width, + "image_type": image_type, + "sources": sources, + "total": total, + } + + +def run_job(job_id: str): + with JOBS_LOCK: + job = JOBS[job_id] + job["status"] = "running" + job["started_at"] = datetime.now(timezone.utc).isoformat() + + def progress(event: Dict): + with JOBS_LOCK: + job = JOBS[job_id] + job["completed"] = event.get("completed", job["completed"]) + if event["type"] == "result": + job["results"].append(event["result"]) + elif event["type"] == "error": + job["errors"].append(event["error"]) + + try: + with JOBS_LOCK: + params = dict(JOBS[job_id]["params"]) + + client = SDOProviderClient(output_dir=str(OUTPUT_DIR)) + manifest = client.download_samples( + sources=params["sources"], + start_time=params["target_time"], + timezone_mode=params["timezone_mode"], + hours=params["hours"], + cadence_minutes=params["cadence_minutes"], + width=params["width"], + image_type=params["image_type"], + output_subdir=f"web_{job_id}", + progress_callback=progress, + ) + + with JOBS_LOCK: + JOBS[job_id]["status"] = "completed" + JOBS[job_id]["manifest"] = manifest + JOBS[job_id]["completed_at"] = datetime.now(timezone.utc).isoformat() + except Exception as exc: + with JOBS_LOCK: + JOBS[job_id]["status"] = "failed" + JOBS[job_id]["errors"].append({"error": str(exc)}) + JOBS[job_id]["completed_at"] = datetime.now(timezone.utc).isoformat() + + +class SDORequestHandler(BaseHTTPRequestHandler): + server_version = "SDOWebUI/1.0" + + def log_message(self, format, *args): + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"[{timestamp}] {self.address_string()} {format % args}") + + def do_GET(self): + parsed = urlparse(self.path) + if parsed.path == "/": + body = build_index() + self.send_response(HTTPStatus.OK) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + return + + if parsed.path == "/api/sources": + json_response(self, {"sources": list(SDO_SOURCES.keys())}) + return + + if parsed.path.startswith("/api/job/"): + job_id = parsed.path.rsplit("/", 1)[-1] + with JOBS_LOCK: + job = JOBS.get(job_id) + if job: + payload = json.loads(json.dumps(job)) + else: + payload = None + if not payload: + json_response(self, {"error": "Job not found"}, HTTPStatus.NOT_FOUND) + return + json_response(self, payload) + return + + if parsed.path.startswith("/files/"): + self.serve_file(parsed.path[len("/files/"):]) + return + + text_response(self, "Not found", HTTPStatus.NOT_FOUND) + + def do_POST(self): + parsed = urlparse(self.path) + if parsed.path != "/api/fetch": + text_response(self, "Not found", HTTPStatus.NOT_FOUND) + return + + try: + length = int(self.headers.get("Content-Length", "0")) + payload = json.loads(self.rfile.read(length).decode("utf-8")) + params = validate_fetch_payload(payload) + except Exception as exc: + json_response(self, {"error": str(exc)}, HTTPStatus.BAD_REQUEST) + return + + job_id = f"{int(time.time())}_{len(JOBS) + 1}" + job = { + "id": job_id, + "status": "queued", + "params": params, + "created_at": datetime.now(timezone.utc).isoformat(), + "started_at": None, + "completed_at": None, + "completed": 0, + "total": params["total"], + "results": [], + "errors": [], + "manifest": None, + } + + with JOBS_LOCK: + JOBS[job_id] = job + + thread = threading.Thread(target=run_job, args=(job_id,), daemon=True) + thread.start() + json_response(self, {"job_id": job_id, "total": params["total"]}, HTTPStatus.ACCEPTED) + + def serve_file(self, encoded_path: str): + requested = Path(unquote(encoded_path)) + try: + resolved = requested.resolve() + output_root = OUTPUT_DIR.resolve() + if output_root not in resolved.parents and resolved != output_root: + raise ValueError("Path outside output directory") + if not resolved.is_file(): + raise FileNotFoundError(str(requested)) + except Exception: + text_response(self, "File not found", HTTPStatus.NOT_FOUND) + return + + suffix = resolved.suffix.lower() + content_type = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".webp": "image/webp", + ".json": "application/json; charset=utf-8", + }.get(suffix, "application/octet-stream") + + data = resolved.read_bytes() + self.send_response(HTTPStatus.OK) + self.send_header("Content-Type", content_type) + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + + +def main(): + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + server = ThreadingHTTPServer((HOST, PORT), SDORequestHandler) + print(f"SDO Solar Moment Console running at http://{HOST}:{PORT}") + print("Press Ctrl+C to stop.") + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopping SDO Solar Moment Console.") + finally: + server.server_close() + + +if __name__ == "__main__": + main()