Source code for scripts.run_prim_files_creator

# -*- coding: utf-8 -*-
"""
PRIM Files Creator Wrapper for DVC Pipeline

This script executes the first stage of PRIM analysis: creating the input files
for PRIM from the RDM experiment results.

Usage:
    python scripts/run_prim_files_creator.py

Dependencies:
    - src/Results/ must contain the RDM output CSVs
    - src/workflow/4_PRIM/Population.xlsx
    - src/workflow/4_PRIM/prim_files_creator_cntrl.xlsx
    - src/workflow/4_PRIM/PRIM_t3f2.yaml

Author: AFR_RDM Team
"""

import os
import sys
import json
import time
from pathlib import Path

def generate_metrics(output_dir, metrics_file):
    """
    Generate metrics JSON file for DVC tracking.
    """
    metrics = {
        "stage": "prim_files_creator",
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "output_files": 0,
        "experiment_data_files": 0
    }

    # Count output files
    if output_dir.exists():
        experiment_data = output_dir / "experiment_data"
        if experiment_data.exists():
            metrics["experiment_data_files"] = len(list(experiment_data.glob("*")))

        pickle_files = list(output_dir.glob("**/*.pickle"))
        metrics["output_files"] = len(pickle_files)

    # Write metrics
    metrics_file.parent.mkdir(parents=True, exist_ok=True)
    with open(metrics_file, 'w') as f:
        json.dump(metrics, f, indent=2)

    print(f"Metrics written to {metrics_file}")
    print(f"  - Experiment data files: {metrics['experiment_data_files']}")
    print(f"  - Pickle files: {metrics['output_files']}")

[docs] def main(): """Main execution function""" print("=" * 70) print("AFR_RDM - PRIM Files Creator") print("=" * 70) # Paths project_root = Path(__file__).parent.parent prim_dir = project_root / 'src' / 'workflow' / '4_PRIM' prim_script = prim_dir / 't3f2_prim_files_creator.py' sdiscovery_dir = prim_dir / 't3b_sdiscovery' metrics_file = prim_dir / 'prim_files_creator_metrics.json' results_dir = project_root / 'src' / 'Results' # Verify prerequisites if not prim_script.exists(): print(f"Error: {prim_script} not found") sys.exit(1) if not results_dir.exists() or len(list(results_dir.glob("*.csv"))) < 2: print(f"Error: RDM results not found in {results_dir}") print("Please run 'python run.py rdm' first.") sys.exit(1) start_time = time.time() try: print(f"PRIM directory: {prim_dir}") print(f"Script: {prim_script.name}") print() # Ensure experiment_data directory structure exists # This directory is used by t3f2_prim_files_creator.py to store/read pickle files experiment_data_dir = sdiscovery_dir / 'experiment_data' / '1_Experiment' experiment_data_dir.mkdir(parents=True, exist_ok=True) print(f"✓ Verified experiment_data directory: {experiment_data_dir}") import subprocess # Step 1: Execute t3f1_prim_structure.py first (defines PRIM structure) prim_structure_script = sdiscovery_dir / 't3f1_prim_structure.py' if prim_structure_script.exists(): print("\nStep 1: Executing t3f1_prim_structure.py...") print("-" * 70) sys.stdout.flush() original_dir = os.getcwd() os.chdir(sdiscovery_dir) subprocess.run( [sys.executable, "-u", str(prim_structure_script)], check=True ) os.chdir(original_dir) print("-" * 70) print("✓ t3f1_prim_structure.py completed.") else: print(f"Warning: {prim_structure_script} not found, skipping...") # Step 2: Execute t3f2_prim_files_creator.py (creates PRIM input files) print("\nStep 2: Executing t3f2_prim_files_creator.py...") print("-" * 70) sys.stdout.flush() # Change to PRIM directory (script expects to be run from there) original_dir = os.getcwd() os.chdir(prim_dir) # Execute the PRIM files creator script # Use -u flag for unbuffered Python output result = subprocess.run( [sys.executable, "-u", str(prim_script)], check=True ) os.chdir(original_dir) print("-" * 70) print("✓ t3f2_prim_files_creator.py completed.") # Change back to original directory os.chdir(original_dir) # Generate metrics elapsed_time = time.time() - start_time print(f"\nExecution time: {elapsed_time:.2f} seconds") generate_metrics(sdiscovery_dir, metrics_file) print("\nPRIM Files Creator completed successfully!") except subprocess.CalledProcessError as e: print(f"\nError during PRIM files creation: {e}") sys.exit(1) except Exception as e: print(f"\nError: {e}") import traceback traceback.print_exc() sys.exit(1)
if __name__ == "__main__": main()