diff --git a/docker/morphocluster/Dockerfile b/docker/morphocluster/Dockerfile index 77afe45..05cacca 100644 --- a/docker/morphocluster/Dockerfile +++ b/docker/morphocluster/Dockerfile @@ -39,6 +39,7 @@ COPY pyproject.toml uv.lock versioneer.py setup.cfg MANIFEST.in README.rst ./ COPY tests ./tests COPY morphocluster ./morphocluster COPY migrations ./migrations +COPY data ./data # Install the application with dependencies from lockfile RUN uv sync --frozen diff --git a/morphocluster/api.py b/morphocluster/api.py index 74cbe4c..3d34de3 100644 --- a/morphocluster/api.py +++ b/morphocluster/api.py @@ -325,6 +325,646 @@ def upload_files(path=""): raise werkzeug.exceptions.BadRequest() +# =============================================================================== +# /upload - Data Pipeline Upload Interface +# =============================================================================== + + +@api.route("/upload", methods=["POST"]) +def upload_archives(): + """ + Upload data archives for processing pipeline. + Saves files to FILES_DIR and returns file information. + """ + uploaded_files = request.files.getlist("files") + + if not uploaded_files: + raise werkzeug.exceptions.BadRequest("No files provided") + + result = {"message": "Files uploaded successfully", "files": []} + + for upload_file in uploaded_files: + if upload_file.filename: + # Use the same security function as the existing upload + filename = secure_path_and_name(upload_file.filename) + + # Save to FILES_DIR (same location validation expects) + server_path = os.path.join(app.config["FILES_DIR"], filename) + + # Ensure directory exists + os.makedirs(os.path.dirname(server_path), exist_ok=True) + + # Save the file + upload_file.save(server_path) + + # Get actual file size + file_size = os.path.getsize(server_path) + + result["files"].append( + { + "name": filename, + "size": file_size, + "id": filename, # Use filename as ID for validation + "status": "uploaded", + } + ) + + return jsonify(result), 200 + + +@api.route("/files//validate", methods=["GET"]) +def validate_archive(file_id): + """ + Validate an uploaded archive file. + Checks ZIP structure, required files, and detects format. + """ + import zipfile + import csv + import chardet + from pathlib import Path + + try: + # Decode URL-encoded filename + from urllib.parse import unquote + + filename = unquote(file_id) + + # Find the uploaded file + upload_path = Path(app.config["FILES_DIR"]) / filename + + if not upload_path.exists(): + return ( + jsonify( + { + "is_valid": False, + "error": f"File {filename} not found", + "validation_warnings": [], + } + ), + 404, + ) + + result = { + "is_valid": False, + "format": "unknown", + "needs_conversion": False, + "file_count": 0, + "image_count": 0, + "detected_encoding": None, + "detected_delimiter": None, + "validation_warnings": [], + } + + # Check if it's a ZIP file + if not zipfile.is_zipfile(upload_path): + result["error"] = "File is not a valid ZIP archive" + return jsonify(result), 200 + + # Examine ZIP contents + with zipfile.ZipFile(upload_path, "r") as zip_file: + file_list = zip_file.namelist() + result["file_count"] = len(file_list) + + # Count image files + image_extensions = (".jpg", ".jpeg", ".png", ".tiff", ".tif") + image_files = [f for f in file_list if f.lower().endswith(image_extensions)] + result["image_count"] = len(image_files) + + # Look for metadata files + csv_files = [f for f in file_list if f.endswith(".csv")] + tsv_files = [f for f in file_list if f.endswith(".tsv")] + + # Detect format based on files present + if "index.csv" in file_list: + result["format"] = "standard" + result["needs_conversion"] = False + metadata_file = "index.csv" + elif tsv_files or any("ecotaxa" in f.lower() for f in csv_files): + result["format"] = "ecotaxa" + result["needs_conversion"] = True + metadata_file = tsv_files[0] if tsv_files else csv_files[0] + elif csv_files: + result["format"] = "csv" + result["needs_conversion"] = True + metadata_file = csv_files[0] + else: + result["validation_warnings"].append("No metadata file (CSV/TSV) found") + metadata_file = None + + # Analyze metadata file if found + if metadata_file: + try: + with zip_file.open(metadata_file) as csv_data: + # Detect encoding + raw_data = csv_data.read(10000) # Read first 10KB + encoding_result = chardet.detect(raw_data) + result["detected_encoding"] = encoding_result.get( + "encoding", "utf-8" + ) + + # Detect delimiter + sample_text = raw_data.decode( + result["detected_encoding"], errors="ignore" + ) + sample_lines = sample_text.split("\n")[:5] + + if sample_lines: + # Count delimiters in first few lines + delimiters = [",", "\t", ";", "|"] + delimiter_counts = {} + + for line in sample_lines: + for delim in delimiters: + delimiter_counts[delim] = delimiter_counts.get( + delim, 0 + ) + line.count(delim) + + # Choose most common delimiter + if delimiter_counts: + result["detected_delimiter"] = max( + delimiter_counts, key=delimiter_counts.get + ) + + except Exception as e: + result["validation_warnings"].append( + f"Could not analyze metadata file: {str(e)}" + ) + + # Validation checks + if result["image_count"] == 0: + result["validation_warnings"].append("No image files found") + + if metadata_file is None: + result["validation_warnings"].append("No metadata file found") + + # Archive is valid if it has images and metadata + result["is_valid"] = result["image_count"] > 0 and metadata_file is not None + + return jsonify(result), 200 + + except Exception as e: + return ( + jsonify( + { + "is_valid": False, + "error": f"Validation failed: {str(e)}", + "validation_warnings": [], + } + ), + 500, + ) + + +@api.route("/files//preview", methods=["GET"]) +def preview_archive(file_id): + """ + Preview archive contents and extract sample data from CSV/TSV files. + """ + import zipfile + import csv + import chardet + from pathlib import Path + from urllib.parse import unquote + + try: + filename = unquote(file_id) + upload_path = Path(app.config["FILES_DIR"]) / filename + + if not upload_path.exists(): + return jsonify({"error": f"File {filename} not found"}), 404 + + if not zipfile.is_zipfile(upload_path): + return jsonify({"error": "File is not a valid ZIP archive"}), 400 + + result = { + "files": [], + "total_rows": 0, + "detected_encoding": None, + "detected_delimiter": None, + "columns": [], + "sample_rows": [], + } + + with zipfile.ZipFile(upload_path, "r") as zip_file: + # Get all files in archive + file_list = zip_file.namelist() + result["files"] = sorted(file_list) + + # Find metadata file (CSV/TSV) + csv_files = [f for f in file_list if f.endswith(".csv")] + tsv_files = [f for f in file_list if f.endswith(".tsv")] + + metadata_file = None + if "index.csv" in file_list: + metadata_file = "index.csv" + elif tsv_files: + metadata_file = tsv_files[0] + elif csv_files: + metadata_file = csv_files[0] + + if metadata_file: + try: + with zip_file.open(metadata_file) as csv_data: + # Detect encoding + raw_data = csv_data.read( + 50000 + ) # Read first 50KB for better detection + encoding_result = chardet.detect(raw_data) + detected_encoding = encoding_result.get("encoding", "utf-8") + + # Handle common encoding issues + if detected_encoding.lower() in [ + "ascii", + "windows-1252", + "iso-8859-1", + ]: + detected_encoding = "utf-8" + + result["detected_encoding"] = detected_encoding + + # Decode text and detect delimiter + try: + text = raw_data.decode(detected_encoding, errors="replace") + except UnicodeDecodeError: + text = raw_data.decode("utf-8", errors="replace") + + # Detect delimiter by analyzing first few lines + lines = text.split("\n")[:10] + non_empty_lines = [ + line.strip() for line in lines if line.strip() + ] + + if non_empty_lines: + # Count delimiters in header and first few data rows + delimiters = [",", "\t", ";", "|"] + delimiter_scores = {} + + for delim in delimiters: + scores = [] + for line in non_empty_lines[:5]: # Check first 5 lines + count = line.count(delim) + scores.append(count) + + # Prefer delimiters that appear consistently + if scores and max(scores) > 0: + consistency = ( + len(set(scores)) == 1 + ) # All lines have same count + delimiter_scores[delim] = (max(scores), consistency) + + if delimiter_scores: + # Choose delimiter with highest count and consistency + best_delim = max( + delimiter_scores, + key=lambda x: ( + delimiter_scores[x][1], + delimiter_scores[x][0], + ), + ) + result["detected_delimiter"] = best_delim + + # Parse CSV and extract sample data + if result["detected_delimiter"]: + # Re-read file from beginning for CSV parsing + zip_file.seek(0) # Reset zip file position + with zip_file.open(metadata_file) as csv_data: + text_data = csv_data.read().decode( + detected_encoding, errors="replace" + ) + lines = text_data.split("\n") + + # Parse with detected delimiter + csv_reader = csv.DictReader( + lines, delimiter=result["detected_delimiter"] + ) + + # Get column names + if csv_reader.fieldnames: + result["columns"] = [ + {"key": col.strip(), "label": col.strip()} + for col in csv_reader.fieldnames + if col + ] + + # Get sample rows (first 5) + sample_rows = [] + row_count = 0 + + for row in csv_reader: + row_count += 1 + if len(sample_rows) < 5: + # Clean up row data + clean_row = {} + for key, value in row.items(): + if key: # Skip empty keys + clean_row[key.strip()] = ( + str(value).strip() if value else "" + ) + if clean_row: # Only add non-empty rows + sample_rows.append(clean_row) + + result["sample_rows"] = sample_rows + result["total_rows"] = row_count + + except Exception as e: + result["error"] = f"Could not parse metadata file: {str(e)}" + + return jsonify(result), 200 + + except Exception as e: + return jsonify({"error": f"Preview failed: {str(e)}"}), 500 + + +@api.route("/files//convert", methods=["POST"]) +def convert_ecotaxa_format(file_id): + """ + Start EcoTaxa format conversion background job for uploaded archive. + """ + from urllib.parse import unquote + from morphocluster.background import convert_ecotaxa_job + + filename = unquote(file_id) + parameters = request.get_json() or {} + + try: + # Queue the background job + job = convert_ecotaxa_job.queue(filename, parameters) + + # Initialize job metadata + job.meta["status"] = "queued" + job.meta["progress"] = 0 + job.meta["current_step"] = "Waiting in queue..." + job.meta["created_at"] = datetime.now().isoformat() + job.meta["job_type"] = "format_conversion" + job.meta["archive_name"] = filename + job.meta["parameters"] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "EcoTaxa conversion job queued", + "parameters": parameters, + } + + return jsonify(result), 202 + + except Exception as e: + return ( + jsonify({"error": f"Failed to queue EcoTaxa conversion job: {str(e)}"}), + 500, + ) + + +@api.route("/files//extract", methods=["POST"]) +def extract_features(file_id): + """ + Start feature extraction background job for uploaded archive. + """ + from urllib.parse import unquote + from morphocluster.background import extract_features_job + + filename = unquote(file_id) + parameters = request.get_json() or {} + + try: + # Queue the background job + job = extract_features_job.queue(filename, parameters) + + # Initialize job metadata + job.meta["status"] = "queued" + job.meta["progress"] = 0 + job.meta["current_step"] = "Waiting in queue..." + job.meta["created_at"] = datetime.now().isoformat() + job.meta["job_type"] = "feature_extraction" + job.meta["archive_name"] = filename + job.meta["parameters"] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "Feature extraction job queued", + "parameters": parameters, + } + + return jsonify(result), 202 + + except Exception as e: + return ( + jsonify({"error": f"Failed to queue feature extraction job: {str(e)}"}), + 500, + ) + + +@api.route("/files//cluster", methods=["POST"]) +def create_clustering_project(file_id): + """ + Start initial clustering background job to create a new MorphoCluster project. + """ + from urllib.parse import unquote + from morphocluster.background import initial_clustering_job + + filename = unquote(file_id) + parameters = request.get_json() or {} + + # Extract feature file from parameters or construct default name + feature_file = parameters.get("feature_file") + if not feature_file: + # Construct feature file name based on archive name + archive_stem = pathlib.Path(filename).stem + feature_file = f"{archive_stem}_features.h5" + + try: + # Queue the background job + job = initial_clustering_job.queue(filename, feature_file, parameters) + + # Initialize job metadata + job.meta["status"] = "queued" + job.meta["progress"] = 0 + job.meta["current_step"] = "Waiting in queue..." + job.meta["created_at"] = datetime.now().isoformat() + job.meta["job_type"] = "initial_clustering" + job.meta["archive_name"] = filename + job.meta["feature_file"] = feature_file + job.meta["parameters"] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "Initial clustering job queued", + "parameters": parameters, + "feature_file": feature_file, + } + + return jsonify(result), 202 + + except Exception as e: + return ( + jsonify({"error": f"Failed to queue initial clustering job: {str(e)}"}), + 500, + ) + + +@api.route("/jobs/user", methods=["GET"]) +def get_user_jobs(): + """ + Get all jobs from the RQ queue. + Returns jobs with status, progress, and metadata. + """ + try: + from rq import Queue + from morphocluster.extensions import rq + + queue = rq.get_queue() + all_jobs = [] + + # Get jobs from different registries + try: + # Active/queued jobs + for job in queue.jobs: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + # Running jobs (currently being executed) + started_registry = queue.started_job_registry + for job_id in started_registry.get_job_ids(0, 20): + job = queue.fetch_job(job_id) + if job: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + # Recently finished jobs + finished_registry = queue.finished_job_registry + for job_id in finished_registry.get_job_ids(0, 20): + job = queue.fetch_job(job_id) + if job: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + # Failed jobs + failed_registry = queue.failed_job_registry + for job_id in failed_registry.get_job_ids(0, 20): + job = queue.fetch_job(job_id) + if job: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + except Exception as e: + print(f"Error fetching jobs: {e}") + + # Sort by creation time (newest first) + all_jobs.sort(key=lambda x: x.get("created_at", ""), reverse=True) + + return jsonify(all_jobs), 200 + + except Exception as e: + print(f"Error in get_user_jobs: {e}") + return jsonify([]), 200 + + +def _format_job_data(job): + """Format RQ job data for frontend consumption.""" + try: + if not job: + return None + + # Determine job status based on RQ job state + # Priority: RQ's built-in states take precedence over meta status + if job.is_failed: + status = "failed" + elif job.is_finished: + status = "completed" + elif job.is_started: + # Job is actively running - always show "running" status + status = "running" + elif job.is_queued: + status = "queued" + else: + # Fallback for edge cases + if job.started_at: + status = "running" + else: + status = "queued" + + job_data = { + "id": job.id, + "job_type": job.meta.get("job_type", "unknown"), + "status": status, + "progress": job.meta.get("progress", 0), + "created_at": job.meta.get("created_at"), + "current_step": job.meta.get("current_step"), + "parameters": job.meta.get("parameters", {}), + "archive_name": job.meta.get("archive_name"), + "logs": job.meta.get("logs", []), + } + + # Add completion/failure details + if job.meta.get("completed_at"): + job_data["completed_at"] = job.meta["completed_at"] + job_data["result"] = job.meta.get("result") + + if job.meta.get("failed_at"): + job_data["failed_at"] = job.meta["failed_at"] + job_data["error_message"] = job.meta.get("error_message") + + # Add timing info + if job.started_at: + job_data["started_at"] = job.started_at.isoformat() + if job.ended_at: + job_data["ended_at"] = job.ended_at.isoformat() + + return job_data + + except Exception as e: + print(f"Error formatting job {job.id if job else 'None'}: {e}") + return None + + +@api.route("/jobs//status", methods=["GET"]) +def get_job_status(job_id): + """ + Mock endpoint for getting individual job status. + Returns mock job status for frontend testing. + """ + # Mock job status based on job_id + if job_id == "job_001": + job = { + "id": job_id, + "status": "completed", + "progress": 100, + "result_url": "/files/converted_sample", + } + elif job_id == "job_002": + job = { + "id": job_id, + "status": "running", + "progress": 65, + "current_step": "Processing batch 650/1000", + "eta": 180, + } + else: + job = {"id": job_id, "status": "pending", "progress": 0} + + return jsonify(job), 200 + + +@api.route("/jobs/", methods=["DELETE"]) +def cancel_job(job_id): + """ + Mock endpoint for cancelling a job. + Returns mock cancellation response for frontend testing. + """ + result = {"message": f"Job {job_id} cancellation requested", "status": "cancelling"} + + return jsonify(result), 200 + + # =============================================================================== # /projects # =============================================================================== @@ -409,6 +1049,42 @@ def save_project(project_id): return jsonify({"url": tree_url}) +@api.route("/projects//recluster", methods=["POST"]) +def recluster_project(project_id): + """ + Start re-clustering background job for an existing project. + """ + from morphocluster.background import reclustering_job + + parameters = request.get_json() or {} + + try: + # Queue the background job + job = reclustering_job.queue(project_id, parameters) + + # Initialize job metadata + job.meta["status"] = "queued" + job.meta["progress"] = 0 + job.meta["current_step"] = "Waiting in queue..." + job.meta["created_at"] = datetime.now().isoformat() + job.meta["job_type"] = "reclustering" + job.meta["project_id"] = project_id + job.meta["parameters"] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "Re-clustering job queued", + "parameters": parameters, + } + + return jsonify(result), 202 + + except Exception as e: + return jsonify({"error": f"Failed to queue re-clustering job: {str(e)}"}), 500 + + # =============================================================================== # /nodes # =============================================================================== @@ -1448,3 +2124,160 @@ def get_job(job_id): result = JobSchema().dump(data) return jsonify(result) + + +# =============================================================================== +# Uploaded Archives Management +# =============================================================================== + + +@api.route("/uploaded-archives", methods=["GET"]) +def get_uploaded_archives(): + """Get all uploaded archives for the current user/session.""" + from morphocluster.models import uploaded_archives + + with database.engine.connect() as conn: + result = conn.execute( + uploaded_archives.select().order_by(uploaded_archives.c.upload_date.desc()) + ).fetchall() + + archives = [] + for row in result: + archive_data = { + "id": row.id, + "filename": row.filename, + "original_filename": row.original_filename, + "file_size": row.file_size, + "upload_date": row.upload_date.isoformat() if row.upload_date else None, + "status": row.status, + "is_valid": row.is_valid, + "needs_conversion": row.needs_conversion, + "validation_data": row.validation_data, + "feature_file": row.feature_file, + "project_id": row.project_id, + "error_message": row.error_message, + "metadata": row.metadata or "{}", + } + archives.append(archive_data) + + return jsonify(archives) + + +@api.route("/uploaded-archives", methods=["POST"]) +def save_uploaded_archive(): + """Save a new uploaded archive record.""" + from morphocluster.models import uploaded_archives + import json + + data = request.get_json() + + insert_data = { + "filename": data.get("filename"), + "original_filename": data.get("original_filename"), + "file_size": data.get("file_size", 0), + "status": data.get("status", "uploaded"), + "is_valid": data.get("is_valid", False), + "needs_conversion": data.get("needs_conversion", False), + "validation_data": data.get("validation_data"), + "feature_file": data.get("feature_file"), + "project_id": data.get("project_id"), + "error_message": data.get("error_message"), + "metadata": data.get("metadata", "{}"), + } + + with database.engine.connect() as conn: + with conn.begin(): + result = conn.execute(uploaded_archives.insert().values(**insert_data)) + archive_id = result.inserted_primary_key[0] + + # Return the created archive with ID + row = conn.execute( + uploaded_archives.select().where(uploaded_archives.c.id == archive_id) + ).fetchone() + + return jsonify( + { + "id": row.id, + "filename": row.filename, + "original_filename": row.original_filename, + "file_size": row.file_size, + "upload_date": ( + row.upload_date.isoformat() if row.upload_date else None + ), + "status": row.status, + "is_valid": row.is_valid, + "needs_conversion": row.needs_conversion, + "validation_data": row.validation_data, + "feature_file": row.feature_file, + "project_id": row.project_id, + "error_message": row.error_message, + "metadata": row.metadata or "{}", + } + ) + + +@api.route("/uploaded-archives/", methods=["PUT"]) +def update_uploaded_archive(archive_id): + """Update an uploaded archive record.""" + from morphocluster.models import uploaded_archives + import json + + data = request.get_json() + + update_data = {} + if "status" in data: + update_data["status"] = data["status"] + if "feature_file" in data: + update_data["feature_file"] = data["feature_file"] + if "project_id" in data: + update_data["project_id"] = data["project_id"] + if "error" in data: + update_data["error_message"] = data["error"] + if "metadata" in data: + # Handle metadata - if it's already a string, use it directly + # If it's an object, JSON encode it + metadata = data["metadata"] + if isinstance(metadata, str): + update_data["metadata"] = metadata + else: + update_data["metadata"] = json.dumps(metadata) + if "needs_conversion" in data: + update_data["needs_conversion"] = data["needs_conversion"] + if "filename" in data: + update_data["filename"] = data["filename"] + + with database.engine.connect() as conn: + with conn.begin(): + conn.execute( + uploaded_archives.update() + .where(uploaded_archives.c.id == archive_id) + .values(**update_data) + ) + + # Return updated archive + row = conn.execute( + uploaded_archives.select().where(uploaded_archives.c.id == archive_id) + ).fetchone() + + if not row: + raise werkzeug.exceptions.NotFound("Archive not found") + + return jsonify( + { + "id": row.id, + "filename": row.filename, + "original_filename": row.original_filename, + "file_size": row.file_size, + "upload_date": ( + row.upload_date.isoformat() if row.upload_date else None + ), + "status": row.status, + "is_valid": row.is_valid, + "needs_conversion": row.needs_conversion, + "validation_data": row.validation_data, + "feature_file": row.feature_file, + "project_id": row.project_id, + "error_message": row.error_message, + "metadata": row.metadata or "{}", + } + ) diff --git a/morphocluster/background.py b/morphocluster/background.py index a212510..f99d779 100644 --- a/morphocluster/background.py +++ b/morphocluster/background.py @@ -1,12 +1,63 @@ import datetime as dt import os +import zipfile +import csv +import time +from pathlib import Path import flask_rq2 from flask import current_app as app from morphocluster.extensions import database, rq from morphocluster.processing.recluster import Recluster -from morphocluster.processing.tree import Tree +from morphocluster.processing.tree import Tree as ProcessingTree +from morphocluster.tree import Tree + + +class JobLogger: + """Logger for background jobs that stores logs in job metadata""" + + def __init__(self, job): + self.job = job + # Initialize logs list if not exists + if "logs" not in self.job.meta: + self.job.meta["logs"] = [] + + def log(self, message, level="info"): + """Add a log entry with timestamp""" + log_entry = { + "timestamp": dt.datetime.now().isoformat(), + "level": level, + "message": str(message), + } + + # Add to logs array + if "logs" not in self.job.meta: + self.job.meta["logs"] = [] + + self.job.meta["logs"].append(log_entry) + + # Keep only last 50 logs to prevent metadata bloat + if len(self.job.meta["logs"]) > 50: + self.job.meta["logs"] = self.job.meta["logs"][-50:] + + # Also print for console output + print(f"[{level.upper()}] {message}") + + # Save metadata + self.job.save_meta() + + def info(self, message): + self.log(message, "info") + + def warning(self, message): + self.log(message, "warning") + + def error(self, message): + self.log(message, "error") + + def success(self, message): + self.log(message, "success") def validate_background_job(fun): @@ -97,3 +148,832 @@ def recluster_project(project_id, min_cluster_size): print("Project ID: {}".format(project_id)) print("Done.") + + +# =============================================================================== +# Upload Pipeline Background Jobs +# =============================================================================== + + +@rq.job(timeout=3600) # 1 hour timeout +def extract_features_job(filename, parameters=None): + """ + Background job for extracting features from uploaded archive using MorphoCluster's real feature extraction. + """ + from rq import get_current_job + + job = get_current_job() + logger = JobLogger(job) + + logger.info(f"Starting feature extraction for {filename}") + + # Import required modules + from morphocluster.processing.extract_features import extract_features + import zipfile + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + + app_instance = create_app() + with app_instance.app_context(): + try: + files_dir = Path(app_instance.config["FILES_DIR"]) + archive_path = files_dir / filename + + if not archive_path.exists(): + raise FileNotFoundError(f"Archive {filename} not found") + + # Create features output filename + features_filename = f"{archive_path.stem}_features.h5" + features_path = archive_path.parent / features_filename + + # Step 1: Validate archive + job.meta["progress"] = 5 + job.meta["current_step"] = "Validating archive structure..." + job.save_meta() + + logger.info("Validating archive structure and contents") + + # Check if archive has index.csv + with zipfile.ZipFile(archive_path, "r") as zip_file: + file_list = zip_file.namelist() + logger.info( + f"Archive contents: {file_list[:10]}..." + ) # Show first 10 files for debugging + + if "index.csv" not in file_list: + # Check if this is an unconverted EcoTaxa file - suggest conversion + ecotaxa_files = [ + f + for f in file_list + if f.startswith("ecotaxa_") and f.endswith(".tsv") + ] + if ecotaxa_files: + raise ValueError( + f"Archive appears to be in EcoTaxa format (found {ecotaxa_files[0]}). Please convert it first." + ) + else: + raise ValueError( + f"Archive must contain index.csv file. Found files: {', '.join(file_list[:5])}" + ) + + image_files = [ + f + for f in file_list + if f.lower().endswith((".jpg", ".jpeg", ".png", ".tiff", ".tif")) + ] + total_images = len(image_files) + + logger.success(f"Archive validation passed. Found {total_images} images") + + # Step 2: Setup parameters + job.meta["progress"] = 10 + job.meta["current_step"] = "Setting up feature extraction parameters..." + job.save_meta() + + # Extract parameters with defaults + normalize = parameters.get("normalize", True) + batch_size = parameters.get("batch_size", 512) + model_file = parameters.get("model_file", None) + + # Set default model file if not specified + if model_file is None: + model_file = "/code/data/model_state.pth" + + # Parse input_mean and input_std - handle both string and list formats + def parse_mean_std(value, default): + if isinstance(value, str): + if value.strip(): + return tuple(map(float, value.split(","))) + else: + return default + elif isinstance(value, (list, tuple)): + return tuple(value) + else: + return default + + input_mean = parse_mean_std(parameters.get("input_mean"), (0, 0, 0)) + input_std = parse_mean_std(parameters.get("input_std"), (1, 1, 1)) + + logger.info( + f"Using parameters: normalize={normalize}, batch_size={batch_size}, model_file={model_file}" + ) + logger.info(f"Input normalization: mean={input_mean}, std={input_std}") + + # Step 3: Start feature extraction with progress tracking + job.meta["progress"] = 15 + job.meta["current_step"] = ( + "Starting feature extraction (this may take several minutes)..." + ) + job.meta["total_images"] = total_images + job.save_meta() + + # Define progress callback for feature extraction + def update_extraction_progress(current_batch, total_batches): + """Update job progress during feature extraction""" + # Map from 15% to 95% based on batch progress + progress = 15 + int((current_batch / total_batches) * 80) + job.meta["progress"] = progress + job.meta["current_step"] = ( + f"Extracting features: batch {current_batch}/{total_batches}" + ) + job.save_meta() + + # Run MorphoCluster's real feature extraction + extract_features( + archive_fn=str(archive_path), + features_fn=str(features_path), + parameters_fn=model_file, # None for pretrained ImageNet + normalize=normalize, + batch_size=batch_size, + cuda=True, # Use GPU if available + input_mean=input_mean, + input_std=input_std, + progress_callback=update_extraction_progress, + ) + + # Step 4: Complete + job.meta["status"] = "completed" + job.meta["progress"] = 100 + job.meta["current_step"] = "Feature extraction completed" + job.meta["completed_at"] = dt.datetime.now().isoformat() + + # Create result with actual feature file info + result = { + "feature_file": features_filename, + "feature_path": str(features_path), + "total_images": total_images, + "feature_dimensions": 32, # ResNet18 with 32-dim bottleneck + "model_used": f"ResNet18 with 32-dim bottleneck: {model_file}", + "normalize": normalize, + "batch_size": batch_size, + } + + job.meta["result"] = result + job.save_meta() + + logger.success(f"Feature extraction completed for {filename}") + logger.info(f"Features saved to: {features_path}") + return result + + except Exception as e: + logger.error(f"Feature extraction failed: {str(e)}") + job.meta["status"] = "failed" + job.meta["error_message"] = str(e) + job.meta["failed_at"] = dt.datetime.now().isoformat() + job.save_meta() + raise + + +@rq.job(timeout=1800) # 30 minutes timeout +def convert_ecotaxa_job(filename, parameters=None): + """ + Background job for converting EcoTaxa format to standard format. + Uses MorphoCluster's existing fix_ecotaxa functionality. + """ + from rq import get_current_job + import shutil + + job = get_current_job() + logger = JobLogger(job) + + logger.info(f"Starting EcoTaxa conversion for {filename}") + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + + app_instance = create_app() + with app_instance.app_context(): + try: + # Always use the original file for conversion, not the _converted version + original_filename = filename + if filename.endswith("_converted.zip"): + original_filename = filename.replace("_converted.zip", ".zip") + logger.info( + f"Converting from original file: {original_filename} instead of {filename}" + ) + + archive_path = Path(app_instance.config["FILES_DIR"]) / original_filename + + if not archive_path.exists(): + raise FileNotFoundError( + f"Original archive {original_filename} not found" + ) + + # Step 1: Analyze parameters + job.meta["progress"] = 10 + job.meta["current_step"] = "Analyzing EcoTaxa format and parameters..." + job.save_meta() + + encoding = parameters.get("encoding") + delimiter = parameters.get("delimiter") + force = parameters.get("force_overwrite", parameters.get("force", False)) + + logger.info(f"Conversion parameters:") + logger.info(f" encoding: {encoding}") + logger.info(f" delimiter: {delimiter}") + logger.info(f" force: {force}") + logger.info(f" raw parameters: {parameters}") + + # Step 2: Create working copy for conversion + job.meta["progress"] = 20 + job.meta["current_step"] = "Creating working copy..." + job.save_meta() + + # Create a copy to work on (fix_ecotaxa modifies in place) + work_path = archive_path.with_suffix(".converting.zip") + shutil.copy2(archive_path, work_path) + + # Step 3: Run EcoTaxa conversion using existing MorphoCluster function + job.meta["progress"] = 40 + job.meta["current_step"] = "Converting EcoTaxa format to standard format..." + job.save_meta() + + try: + # Call fix_ecotaxa function directly (it's a Click command) + from click.testing import CliRunner + from morphocluster.scripts import fix_ecotaxa + + runner = CliRunner() + args = [str(work_path)] + if encoding: + args.extend(["--encoding", encoding]) + if delimiter: + args.extend(["--delimiter", delimiter]) + if force: + args.append("--force") + + logger.info(f"About to call fix_ecotaxa with:") + logger.info(f" work_path: {work_path}") + logger.info(f" work_path exists: {work_path.exists()}") + logger.info(f" args: {args}") + logger.info(f" encoding: {encoding}") + logger.info(f" delimiter: {delimiter}") + + # Debug: Check the actual file contents before conversion + try: + import zipfile + + with zipfile.ZipFile(work_path, "r") as zf: + ecotaxa_files = [ + f for f in zf.namelist() if "ecotaxa" in f.lower() + ] + logger.info(f" ecotaxa files in work zip: {ecotaxa_files}") + + if ecotaxa_files: + with zf.open(ecotaxa_files[0]) as fp: + first_line = ( + fp.readline().decode(encoding or "ascii").strip() + ) + logger.info(f" first line: {repr(first_line)}") + + actual_delimiter = delimiter or "\t" + columns = first_line.split(actual_delimiter) + logger.info(f" actual columns found: {columns}") + logger.info(f" number of columns: {len(columns)}") + logger.info( + f" delimiter used: {repr(actual_delimiter)}" + ) + logger.info( + f" has object_id: {'object_id' in columns}" + ) + logger.info( + f" has img_file_name: {'img_file_name' in columns}" + ) + + # Show each column individually for debugging + for i, col in enumerate(columns): + logger.info(f" column {i}: {repr(col)}") + except Exception as debug_error: + logger.error(f" debug file inspection failed: {debug_error}") + + result = runner.invoke(fix_ecotaxa, args) + + # Log full output for debugging + logger.info(f"fix_ecotaxa exit_code: {result.exit_code}") + logger.info(f"fix_ecotaxa output: {result.output}") + if result.exception: + logger.error(f"fix_ecotaxa exception: {result.exception}") + + if result.exit_code != 0: + error_msg = f"EcoTaxa conversion failed (exit code {result.exit_code}): {result.output}" + if result.exception: + error_msg += f"\nException: {result.exception}" + raise RuntimeError(error_msg) + except Exception as conversion_error: + # Clean up working file + if work_path.exists(): + work_path.unlink() + raise conversion_error + + # Step 4: Validate conversion result + job.meta["progress"] = 80 + job.meta["current_step"] = "Validating converted archive..." + job.save_meta() + + # Check that index.csv was created + import zipfile + + with zipfile.ZipFile(work_path, "r") as zf: + if "index.csv" not in zf.namelist(): + raise ValueError("Conversion failed: index.csv not created") + + # Step 5: Replace original with converted version + job.meta["progress"] = 95 + job.meta["current_step"] = "Finalizing converted archive..." + job.save_meta() + + # Move converted file to final location + converted_path = archive_path.with_name( + f"{archive_path.stem}_converted{archive_path.suffix}" + ) + work_path.rename(converted_path) + + # Complete + job.meta["status"] = "completed" + job.meta["progress"] = 100 + job.meta["current_step"] = "EcoTaxa conversion completed" + job.meta["completed_at"] = dt.datetime.now().isoformat() + job.meta["result"] = { + "converted_file": converted_path.name, + "original_file": filename, + "encoding": encoding, + "delimiter": delimiter, + "conversion_method": "morphocluster.scripts.fix_ecotaxa", + } + job.save_meta() + + print(f"EcoTaxa conversion completed: {filename} -> {converted_path.name}") + return job.meta["result"] + + except Exception as e: + print(f"EcoTaxa conversion failed: {str(e)}") + job.meta["status"] = "failed" + job.meta["error_message"] = str(e) + job.meta["failed_at"] = dt.datetime.now().isoformat() + job.save_meta() + raise + + +@rq.job(timeout=7200) # 2 hours timeout +def initial_clustering_job(archive_name, feature_file, parameters=None): + """ + Background job for initial clustering to create a new MorphoCluster project. + """ + from rq import get_current_job + + job = get_current_job() + logger = JobLogger(job) + + logger.info(f"Starting initial clustering for {archive_name}") + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + + app_instance = create_app() + with app_instance.app_context(): + try: + files_dir = Path(app_instance.config["FILES_DIR"]) + archive_path = files_dir / archive_name + feature_path = files_dir / feature_file + + if not archive_path.exists(): + raise FileNotFoundError(f"Archive {archive_name} not found") + if not feature_path.exists(): + raise FileNotFoundError(f"Feature file {feature_file} not found") + + # Step 1: Setup parameters + job.meta["progress"] = 10 + job.meta["current_step"] = "Setting up clustering parameters..." + job.save_meta() + + # Extract parameters with defaults + project_name = parameters.get( + "project_name", f"Project-{archive_path.stem}" + ) + description = parameters.get("description", "") + min_cluster_size = parameters.get("min_cluster_size", 128) + min_samples = parameters.get("min_samples", 1) + cluster_selection_method = parameters.get( + "cluster_selection_method", "leaf" + ) + sample_size = parameters.get("sample_size", 0) # 0 = use all + keep_unexplored_ratio = parameters.get("keep_unexplored_ratio", 0.0) + + print( + f"Clustering parameters: min_cluster_size={min_cluster_size}, method={cluster_selection_method}" + ) + + # Step 2: Extract images from archive + job.meta["progress"] = 15 + job.meta["current_step"] = "Extracting images from archive..." + job.save_meta() + + import zipfile + import pandas as pd + import h5py + import shutil + from morphocluster import models + + # Create images directory for this archive + images_dir = Path(app_instance.config["IMAGES_DIR"]) + archive_images_dir = images_dir / archive_path.stem + archive_images_dir.mkdir(parents=True, exist_ok=True) + + # Read index.csv from archive to get object_id and path mappings + with zipfile.ZipFile(archive_path, "r") as zf: + with zf.open("index.csv") as fp: + archive_df = pd.read_csv( + fp, dtype=str, usecols=["object_id", "path"] + ) + + # Extract image files + print(f"Extracting {len(archive_df)} images to {archive_images_dir}") + for _, row in archive_df.iterrows(): + image_path = row["path"] + if image_path in zf.namelist(): + # Extract to the archive-specific directory + extracted_path = zf.extract(image_path, archive_images_dir) + + # Move to flat structure if needed (some archives have subdirectories) + final_path = archive_images_dir / Path(image_path).name + if Path(extracted_path) != final_path: + shutil.move(extracted_path, final_path) + + # Step 3: Load objects from archive into database + job.meta["progress"] = 25 + job.meta["current_step"] = "Loading objects into database..." + job.save_meta() + + # Load feature vectors from H5 file + with h5py.File(feature_path, "r") as h5f: + feature_object_ids = h5f["object_id"][:] + features = h5f["features"][:] + + # Convert bytes to strings if necessary + if hasattr(feature_object_ids[0], "decode"): + feature_object_ids = [ + oid.decode("utf-8") for oid in feature_object_ids + ] + else: + feature_object_ids = list(feature_object_ids) + + feature_dims = ( + features.shape[1] + if len(features.shape) > 1 + else len(features[0]) if len(features) > 0 else 0 + ) + print( + f"Archive contains {len(archive_df)} objects, features for {len(feature_object_ids)} objects" + ) + print(f"Feature dimensions: {feature_dims}") + + # Step 3: Insert objects into database with vectors + job.meta["progress"] = 30 + job.meta["current_step"] = "Inserting objects into database..." + job.save_meta() + + # Create object data for database insertion + object_data = [] + feature_dict = dict(zip(feature_object_ids, features)) + + for _, row in archive_df.iterrows(): + object_id = row["object_id"] + original_path = row["path"] + # Update path to point to extracted image in archive subdirectory + extracted_path = f"{archive_path.stem}/{Path(original_path).name}" + vector = feature_dict.get(object_id) + + if vector is not None: + object_data.append( + { + "object_id": object_id, + "path": extracted_path, # Path relative to IMAGES_DIR + "vector": vector, # Keep as numpy array - should be 32 dimensions now + } + ) + + # Insert objects into database + with database.engine.connect() as conn: + with conn.begin(): + # Check if objects already exist to avoid duplicates + existing_objects = conn.execute( + models.objects.select().where( + models.objects.c.object_id.in_( + [obj["object_id"] for obj in object_data] + ) + ) + ).fetchall() + existing_object_ids = {obj.object_id for obj in existing_objects} + + # Only insert new objects + new_objects = [ + obj + for obj in object_data + if obj["object_id"] not in existing_object_ids + ] + + if new_objects: + print(f"Inserting {len(new_objects)} new objects into database") + conn.execute(models.objects.insert(), new_objects) + else: + print("All objects already exist in database") + + # Step 4: Initialize clustering + job.meta["progress"] = 40 + job.meta["current_step"] = "Initializing clustering algorithm..." + job.save_meta() + + recluster = Recluster() + + # Step 5: Load features + job.meta["progress"] = 50 + job.meta["current_step"] = "Loading extracted features..." + job.save_meta() + + recluster.load_features(str(feature_path)) + + # Step 6: Skip init_tree() - let clustering create the tree structure + job.meta["progress"] = 60 + job.meta["current_step"] = "Preparing clustering..." + job.save_meta() + + # Note: Not calling recluster.init_tree() - this was interfering with clustering + + # Step 7: Run clustering + job.meta["progress"] = 70 + job.meta["current_step"] = ( + "Running HDBSCAN clustering (this may take several minutes)..." + ) + job.save_meta() + + # Apply sample size and keep_unexplored_ratio if specified + cluster_kwargs = { + "min_cluster_size": min_cluster_size, + "min_samples": min_samples, + "cluster_selection_method": cluster_selection_method, + } + + if sample_size > 0: + cluster_kwargs["sample_size"] = sample_size + print(f"Using sample size: {sample_size}") + + if keep_unexplored_ratio > 0: + cluster_kwargs["keep_unexplored"] = keep_unexplored_ratio + + recluster.cluster(**cluster_kwargs) + + # Step 8: Get the clustered tree + job.meta["progress"] = 80 + job.meta["current_step"] = "Building project tree structure..." + job.save_meta() + + # Get the first (and only) tree from recluster + tree = recluster.trees[0] + + # Step 9: Load into database + job.meta["progress"] = 90 + job.meta["current_step"] = "Creating project in database..." + job.save_meta() + + with database.engine.connect() as conn: + db_tree = Tree(conn) + + with conn.begin(): + project_id = db_tree.load_project(project_name, tree) + root_id = db_tree.get_root_id(project_id) + + print("Consolidating tree structure...") + db_tree.consolidate_node(root_id) + + # Step 10: Complete + job.meta["status"] = "completed" + job.meta["progress"] = 100 + job.meta["current_step"] = "Project created successfully" + job.meta["completed_at"] = dt.datetime.now().isoformat() + + # Get final statistics + cluster_count = len(tree.nodes) # Number of nodes/clusters + object_count = len(tree.objects) # Number of objects + + result = { + "project_id": project_id, + "project_name": project_name, + "root_id": root_id, + "cluster_count": cluster_count, + "object_count": object_count, + "min_cluster_size": min_cluster_size, + "cluster_selection_method": cluster_selection_method, + "project_url": f"/projects/{project_id}", + } + + job.meta["result"] = result + job.save_meta() + + logger.success(f"Initial clustering completed for {archive_name}") + logger.info( + f"Created project '{project_name}' with {cluster_count} clusters and {object_count} objects" + ) + return result + + except Exception as e: + logger.error(f"Initial clustering failed: {str(e)}") + job.meta["status"] = "failed" + job.meta["error_message"] = str(e) + job.meta["failed_at"] = dt.datetime.now().isoformat() + job.save_meta() + raise + + +@rq.job(timeout=3600) # 1 hour timeout +def reclustering_job(project_id, parameters=None): + """ + Background job for re-clustering an existing project. + """ + print(f"Starting re-clustering for project {project_id}") + from rq import get_current_job + + job = get_current_job() + logger = JobLogger(job) + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + + app_instance = create_app() + + with app_instance.app_context(): + try: + from morphocluster.processing.recluster import Recluster + from morphocluster.tree import Tree + from morphocluster import models + import datetime as dt + from pathlib import Path + import h5py + + # Step 1: Setup parameters + job.meta["progress"] = 10 + job.meta["current_step"] = "Setting up re-clustering parameters..." + job.save_meta() + + # Extract parameters with defaults + new_project_name = parameters.get( + "project_name", f"Re-clustered Project {project_id}" + ) + min_cluster_size = parameters.get("min_cluster_size", 32) + min_samples = parameters.get("min_samples", 1) + cluster_selection_method = parameters.get( + "cluster_selection_method", "leaf" + ) + sample_size = parameters.get("sample_size", 0) # 0 = use all + keep_unexplored_ratio = parameters.get("keep_unexplored_ratio", 0.0) + + print( + f"Re-clustering parameters: min_cluster_size={min_cluster_size}, method={cluster_selection_method}" + ) + + # Step 2: Load the existing project and export it + job.meta["progress"] = 20 + job.meta["current_step"] = "Loading existing project..." + job.save_meta() + + with database.engine.connect() as conn: + db_tree = Tree(conn) + existing_project = db_tree.get_project(project_id) + root_id = db_tree.get_root_id(project_id) + + # Export existing tree to temporary file + temp_tree_path = f"/tmp/temp_tree_{project_id}.zip" + db_tree.export_tree(root_id, temp_tree_path) + + # Step 3: Find the feature file (look for existing feature files) + job.meta["progress"] = 30 + job.meta["current_step"] = "Finding feature file..." + job.save_meta() + + files_dir = Path(app_instance.config["FILES_DIR"]) + # Look for feature files that might match this project + feature_files = list(files_dir.glob("*_features.h5")) + + if not feature_files: + raise FileNotFoundError("No feature files found for re-clustering") + + # Use the most recent feature file (or implement better matching logic) + feature_path = max(feature_files, key=lambda x: x.stat().st_mtime) + print(f"Using feature file: {feature_path}") + + # Step 4: Initialize clustering + job.meta["progress"] = 40 + job.meta["current_step"] = "Initializing re-clustering algorithm..." + job.save_meta() + + recluster = Recluster() + + # Step 5: Load features + job.meta["progress"] = 50 + job.meta["current_step"] = "Loading features..." + job.save_meta() + + recluster.load_features(str(feature_path)) + + # Step 6: Load existing tree + job.meta["progress"] = 60 + job.meta["current_step"] = "Loading existing project tree..." + job.save_meta() + + recluster.load_tree(temp_tree_path) + + # Step 7: Run clustering + job.meta["progress"] = 70 + job.meta["current_step"] = "Running HDBSCAN re-clustering..." + job.save_meta() + + cluster_kwargs = { + "min_cluster_size": min_cluster_size, + "min_samples": min_samples, + "cluster_selection_method": cluster_selection_method, + } + + if sample_size > 0: + cluster_kwargs["sample_size"] = sample_size + + if keep_unexplored_ratio > 0: + cluster_kwargs["keep_unexplored"] = keep_unexplored_ratio + + recluster.cluster(**cluster_kwargs) + + # Step 8: Create new project from re-clustered tree + job.meta["progress"] = 80 + job.meta["current_step"] = "Creating new project..." + job.save_meta() + + # Get the new clustered tree (should be the second tree) + new_tree = recluster.trees[-1] # Get the most recent tree + + # Step 9: Load into database as new project + job.meta["progress"] = 90 + job.meta["current_step"] = "Saving new project to database..." + job.save_meta() + + with database.engine.connect() as conn: + db_tree = Tree(conn) + + with conn.begin(): + new_project_id = db_tree.load_project(new_project_name, new_tree) + new_root_id = db_tree.get_root_id(new_project_id) + + print("Consolidating new tree structure...") + db_tree.consolidate_node(new_root_id) + + # Clean up temporary file + Path(temp_tree_path).unlink(missing_ok=True) + + # Step 10: Complete + job.meta["status"] = "completed" + job.meta["progress"] = 100 + job.meta["current_step"] = "Re-clustering completed successfully" + job.meta["completed_at"] = dt.datetime.now().isoformat() + + # Get final statistics + cluster_count = len(new_tree.nodes) + object_count = len(new_tree.objects) + + result = { + "original_project_id": project_id, + "new_project_id": new_project_id, + "new_project_name": new_project_name, + "new_root_id": new_root_id, + "cluster_count": cluster_count, + "object_count": object_count, + "min_cluster_size": min_cluster_size, + "cluster_selection_method": cluster_selection_method, + "project_url": f"/projects/{new_project_id}", + } + + job.meta["result"] = result + job.save_meta() + + print(f"Re-clustering completed for project {project_id}") + print( + f"Created new project '{new_project_name}' (ID: {new_project_id}) with {cluster_count} clusters" + ) + return result + + except Exception as e: + print(f"Re-clustering failed: {str(e)}") + job.meta["status"] = "failed" + job.meta["error_message"] = str(e) + job.meta["failed_at"] = dt.datetime.now().isoformat() + job.save_meta() + raise diff --git a/morphocluster/frontend/.gitignore b/morphocluster/frontend/.gitignore index c6e316b..e5fa33f 100644 --- a/morphocluster/frontend/.gitignore +++ b/morphocluster/frontend/.gitignore @@ -1,5 +1,6 @@ .DS_Store node_modules +/dist # local env files .env.local @@ -9,6 +10,7 @@ node_modules npm-debug.log* yarn-debug.log* yarn-error.log* +pnpm-debug.log* # Editor directories and files .idea diff --git a/morphocluster/frontend/public/index.html b/morphocluster/frontend/public/index.html index ef21f04..876d0de 100644 --- a/morphocluster/frontend/public/index.html +++ b/morphocluster/frontend/public/index.html @@ -5,7 +5,9 @@ - + + + MorphoCluster diff --git a/morphocluster/frontend/src/assets/styles.css b/morphocluster/frontend/src/assets/styles.css index 54a0b36..73a6675 100644 --- a/morphocluster/frontend/src/assets/styles.css +++ b/morphocluster/frontend/src/assets/styles.css @@ -4,73 +4,380 @@ html, body { overflow: hidden; } -.tooltip kbd { - background-color: #f7f7f9; - color: #212529; -} - .info-hint { position: absolute; top: 0.5em; right: 1em; - /*color: gray;*/ } -/* Dark/Light Mode */ +/* ============================================================================ + Dark/Light Mode System + ============================================================================ */ + +/* Light Mode (Default) */ :root { - --background-color: white; - --color: #212529; + /* Background colors */ + --bg-primary: #ffffff; + --bg-secondary: #f8f9fa; + --bg-tertiary: #e9ecef; + + /* Text colors */ + --text-primary: #212529; + --text-secondary: #6c757d; + --text-muted: #adb5bd; + + /* Border colors */ + --border-color: #dee2e6; + --border-light: #e9ecef; + + /* Card/Panel colors */ + --card-bg: #ffffff; + --card-border: #dee2e6; + + /* Navbar */ + --navbar-bg: #343a40; + --navbar-color: #ffffff; + --navbar-hover-color: #f8f9fa; + + /* Input colors */ + --input-bg: #ffffff; + --input-border: #ced4da; + --input-text: #495057; + --input-placeholder: #6c757d; + + /* Button colors */ + --btn-bg: #ffffff; + --btn-text: #212529; + --btn-border: #dee2e6; + + /* Table colors */ + --table-bg: #ffffff; + --table-hover-bg: #f8f9fa; + --table-border: #dee2e6; + + /* Alert/Badge colors */ + --alert-bg: #f8f9fa; + --badge-bg: #e9ecef; } +/* Dark Mode */ :root.dark-mode { - --background-color: black; - --color: #f7f7f9; + /* Background colors */ + --bg-primary: #1a1a1a; + --bg-secondary: #2d2d2d; + --bg-tertiary: #3a3a3a; + + /* Text colors */ + --text-primary: #e9ecef; + --text-secondary: #adb5bd; + --text-muted: #6c757d; + + /* Border colors */ + --border-color: #495057; + --border-light: #3a3a3a; + + /* Card/Panel colors */ + --card-bg: #2d2d2d; + --card-border: #495057; + + /* Navbar */ + --navbar-bg: #0d0d0d; + --navbar-color: #e9ecef; + --navbar-hover-color: #ffffff; + + /* Input colors */ + --input-bg: #2d2d2d; + --input-border: #495057; + --input-text: #e9ecef; + --input-placeholder: #6c757d; + + /* Button colors */ + --btn-bg: #2d2d2d; + --btn-text: #e9ecef; + --btn-border: #495057; + + /* Table colors */ + --table-bg: #2d2d2d; + --table-hover-bg: #3a3a3a; + --table-border: #495057; + + /* Alert/Badge colors */ + --alert-bg: #2d2d2d; + --badge-bg: #3a3a3a; } +/* ============================================================================ + Apply Theme Variables + ============================================================================ */ + body { - color: var(--color); - background-color: var(--background-color); + color: var(--text-primary); + background-color: var(--bg-primary); + transition: background-color 0.3s ease, color 0.3s ease; } -table, .table { - color: var(--color); +/* Navbar */ +.navbar { + background-color: var(--navbar-bg) !important; + color: var(--navbar-color) !important; + border-bottom: 1px solid var(--border-color); + padding-left: 1rem !important; } -.modal-dialog { - color: black; +.navbar .navbar-brand { + color: var(--navbar-color) !important; + display: flex; + align-items: center; + gap: 0.5rem; } -/* Navbar styling for both modes */ -.navbar { - background-color: var(--navbar-bg, #343a40) !important; - color: var(--navbar-color, #ffffff) !important; +.navbar .navbar-brand .navbar-logo { + height: 32px; + width: 32px; + object-fit: contain; +} + +:root.dark-mode .navbar .navbar-brand .navbar-logo { + filter: brightness(0) invert(1); } -.navbar .navbar-brand, .navbar .nav-link { - color: var(--navbar-color, #ffffff) !important; + color: var(--navbar-color) !important; } .navbar .navbar-brand:hover, .navbar .nav-link:hover { - color: var(--navbar-hover-color, #f8f9fa) !important; + color: var(--navbar-hover-color) !important; } -/* Dark mode navbar */ -:root.dark-mode { - --navbar-bg: #1a1a1a; - --navbar-color: #f7f7f9; - --navbar-hover-color: #ffffff; +/* Cards & Panels */ +.card, +.upload-section, +.job-status-section, +.archive-item, +.job-item { + background-color: var(--card-bg) !important; + border-color: var(--card-border) !important; + color: var(--text-primary) !important; } -/* Light mode navbar */ -:root { - --navbar-bg: #343a40; - --navbar-color: #ffffff; - --navbar-hover-color: #f8f9fa; +/* Tables */ +table, .table { + color: var(--text-primary); + background-color: var(--table-bg); +} + +.table thead th { + border-color: var(--table-border); + background-color: var(--bg-secondary); + color: var(--text-primary); +} + +.table td, .table th { + border-color: var(--table-border); +} + +.table-hover tbody tr:hover { + background-color: var(--table-hover-bg); + color: var(--text-primary); +} + +.table-striped tbody tr:nth-of-type(odd) { + background-color: var(--bg-secondary); +} + +/* Forms & Inputs */ +.form-control, +.custom-select, +input[type="text"], +input[type="number"], +input[type="email"], +textarea, +select { + background-color: var(--input-bg) !important; + border-color: var(--input-border) !important; + color: var(--input-text) !important; +} + +.form-control::placeholder { + color: var(--input-placeholder) !important; +} + +.form-control:focus { + background-color: var(--input-bg) !important; + color: var(--input-text) !important; + border-color: #80bdff !important; +} + +/* Buttons */ +.btn-outline-secondary { + color: var(--text-primary) !important; + border-color: var(--btn-border) !important; + background-color: var(--btn-bg) !important; +} + +.btn-outline-secondary:hover { + background-color: var(--bg-tertiary) !important; + color: var(--text-primary) !important; +} + +/* Modals */ +.modal-content { + background-color: var(--card-bg) !important; + color: var(--text-primary) !important; + border-color: var(--card-border) !important; +} + +.modal-header, +.modal-footer { + border-color: var(--border-color) !important; +} + +.modal-header .close { + color: var(--text-primary) !important; +} + +/* Alerts */ +.alert { + background-color: var(--alert-bg) !important; + border-color: var(--border-color) !important; +} + +.alert-warning { + background-color: #fff3cd !important; + color: #856404 !important; +} + +:root.dark-mode .alert-warning { + background-color: #3d3417 !important; + color: #ffecb3 !important; +} + +.alert-danger { + background-color: #f8d7da !important; + color: #721c24 !important; +} + +:root.dark-mode .alert-danger { + background-color: #3d1a1f !important; + color: #f8d7da !important; +} + +/* Badges */ +.badge { + background-color: var(--badge-bg) !important; + color: var(--text-primary) !important; +} + +/* Containers & Sections */ +.container, +.container-fluid { + color: var(--text-primary); +} + +/* Text colors */ +.text-muted { + color: var(--text-secondary) !important; +} + +h1, h2, h3, h4, h5, h6 { + color: var(--text-primary); +} + +small { + color: var(--text-secondary); +} + +/* Borders */ +.border { + border-color: var(--border-color) !important; +} + +hr { + border-color: var(--border-color); +} + +/* Dropdown menus */ +.dropdown-menu { + background-color: var(--card-bg); + border-color: var(--card-border); +} + +.dropdown-item { + color: var(--text-primary); +} + +.dropdown-item:hover { + background-color: var(--bg-secondary); + color: var(--text-primary); +} + +/* Tooltips */ +.tooltip kbd { + background-color: var(--bg-tertiary); + color: var(--text-primary); +} + +/* Upload page specific */ +.upload-container { + background-color: var(--bg-secondary) !important; +} + +.archives-list { + background-color: var(--card-bg) !important; + border-color: var(--card-border) !important; +} + +/* Job Status specific */ +.job-status-container { + background-color: var(--card-bg) !important; + border-color: var(--card-border) !important; +} + +.no-jobs { + color: var(--text-secondary); +} + +/* Progress bars - keep colors for status indication */ +.progress { + background-color: var(--bg-tertiary); +} + +/* List groups */ +.list-group-item { + background-color: var(--card-bg) !important; + border-color: var(--card-border) !important; + color: var(--text-primary) !important; +} + +.list-group-item:hover { + background-color: var(--bg-secondary) !important; +} + +/* Ensure light text on dark buttons */ +:root.dark-mode .btn-primary, +:root.dark-mode .btn-success, +:root.dark-mode .btn-info, +:root.dark-mode .btn-warning, +:root.dark-mode .btn-danger { + color: #ffffff !important; +} + +/* Make button text brighter in dark mode */ +:root.dark-mode .btn-primary { + background-color: #0d6efd !important; + border-color: #0d6efd !important; + color: #ffffff !important; +} + +:root.dark-mode .btn-info { + background-color: #0dcaf0 !important; + border-color: #0dcaf0 !important; + color: #000000 !important; } -/* Ensure text visibility in all contexts */ -.text-light { - color: var(--navbar-color, #ffffff) !important; +:root.dark-mode .btn-secondary { + background-color: #6c757d !important; + border-color: #6c757d !important; + color: #ffffff !important; } \ No newline at end of file diff --git a/morphocluster/frontend/src/components/ClusterModal.vue b/morphocluster/frontend/src/components/ClusterModal.vue new file mode 100644 index 0000000..c615b50 --- /dev/null +++ b/morphocluster/frontend/src/components/ClusterModal.vue @@ -0,0 +1,454 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/DarkModeControl.vue b/morphocluster/frontend/src/components/DarkModeControl.vue index 1444ff6..5d14844 100644 --- a/morphocluster/frontend/src/components/DarkModeControl.vue +++ b/morphocluster/frontend/src/components/DarkModeControl.vue @@ -1,40 +1,80 @@ + + diff --git a/morphocluster/frontend/src/components/FeatureModal.vue b/morphocluster/frontend/src/components/FeatureModal.vue new file mode 100644 index 0000000..788d81b --- /dev/null +++ b/morphocluster/frontend/src/components/FeatureModal.vue @@ -0,0 +1,598 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/FormatModal.vue b/morphocluster/frontend/src/components/FormatModal.vue new file mode 100644 index 0000000..027c83e --- /dev/null +++ b/morphocluster/frontend/src/components/FormatModal.vue @@ -0,0 +1,432 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/JobStatus.vue b/morphocluster/frontend/src/components/JobStatus.vue new file mode 100644 index 0000000..1e747cb --- /dev/null +++ b/morphocluster/frontend/src/components/JobStatus.vue @@ -0,0 +1,592 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/UploadZone.vue b/morphocluster/frontend/src/components/UploadZone.vue new file mode 100644 index 0000000..e04760a --- /dev/null +++ b/morphocluster/frontend/src/components/UploadZone.vue @@ -0,0 +1,490 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/helpers/api.js b/morphocluster/frontend/src/helpers/api.js index 7f9929d..e48fa0e 100644 --- a/morphocluster/frontend/src/helpers/api.js +++ b/morphocluster/frontend/src/helpers/api.js @@ -142,4 +142,84 @@ export function getUnfilledNodes(project_id) { export function log(action, node_id = null, reverse_action = null, data = null) { return axios.post(`/api/log`, { action, node_id, reverse_action, data }); +} + +// Upload and Processing Pipeline + +export function uploadArchives(files) { + const formData = new FormData(); + files.forEach(file => { + formData.append('files', file); + }); + return axios.post('/api/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }).then(response => response.data); +} + +export function validateArchive(fileName) { + return axios.get(`/api/files/${fileName}/validate`) + .then(response => response.data); +} + +export function previewArchive(fileName) { + return axios.get(`/api/files/${fileName}/preview`) + .then(response => response.data); +} + +export function convertEcoTaxaFormat(fileName, parameters) { + return axios.post(`/api/files/${fileName}/convert`, parameters) + .then(response => response.data); +} + +export function extractFeatures(fileName, parameters) { + return axios.post(`/api/files/${fileName}/extract`, parameters) + .then(response => response.data); +} + +export function createProjectFromFeatures(featuresId, parameters) { + return axios.post(`/api/features/${featuresId}/cluster`, parameters) + .then(response => response.data); +} + +export function reclusterProject(projectId, parameters) { + return axios.post(`/api/projects/${projectId}/recluster`, parameters) + .then(response => response.data); +} + +// Job Management + +export function getUserJobs() { + return axios.get('/api/jobs/user') + .then(response => response.data); +} + +export function getJobStatus(jobId) { + return axios.get(`/api/jobs/${jobId}/status`) + .then(response => response.data); +} + +export function cancelJob(jobId) { + return axios.delete(`/api/jobs/${jobId}`) + .then(response => response.data); +} + +// =============================================================================== +// Uploaded Archives Management +// =============================================================================== + +export function getUploadedArchives() { + return axios.get('/api/uploaded-archives') + .then(response => response.data); +} + +export function saveUploadedArchive(archiveData) { + return axios.post('/api/uploaded-archives', archiveData) + .then(response => response.data); +} + +export function updateUploadedArchive(archiveId, updates) { + return axios.put(`/api/uploaded-archives/${archiveId}`, updates) + .then(response => response.data); } \ No newline at end of file diff --git a/morphocluster/frontend/src/main.js b/morphocluster/frontend/src/main.js index 9061aac..4ac7433 100644 --- a/morphocluster/frontend/src/main.js +++ b/morphocluster/frontend/src/main.js @@ -1,6 +1,7 @@ import { createApp } from 'vue' import App from './App.vue' import router from './router' +import axios from 'axios' // Bootstrap Vue 3 import BootstrapVueNext from 'bootstrap-vue-next' @@ -12,6 +13,9 @@ import './assets/styles.css' const app = createApp(App) +// Configure axios +app.config.globalProperties.$axios = axios + app.use(router) app.use(BootstrapVueNext) diff --git a/morphocluster/frontend/src/router.js b/morphocluster/frontend/src/router.js index d2dafd4..994b1b9 100644 --- a/morphocluster/frontend/src/router.js +++ b/morphocluster/frontend/src/router.js @@ -29,6 +29,16 @@ const routes = [ path: '/p', component: () => import(/* webpackChunkName: "projects" */ './views/Projects.vue'), }, + { + name: 'upload', + path: '/upload', + component: () => import(/* webpackChunkName: "upload" */ './views/Upload.vue'), + }, + { + name: 'jobs', + path: '/jobs', + component: () => import(/* webpackChunkName: "jobs" */ './views/JobQueue.vue'), + }, { name: 'files', path: '/files/:file_path?', @@ -48,7 +58,7 @@ const routes = [ }, { path: '/', - redirect: '/home' + redirect: '/p' }, { path: '/:pathMatch(.*)*', component: NotFound } ] diff --git a/morphocluster/frontend/src/views/Approve.vue b/morphocluster/frontend/src/views/Approve.vue index 5d313ec..f9ff9b1 100644 --- a/morphocluster/frontend/src/views/Approve.vue +++ b/morphocluster/frontend/src/views/Approve.vue @@ -1,9 +1,10 @@ @@ -281,6 +283,14 @@ export default { // Should members_url be updated (with unique id etc.) on response? var updateMembersUrl = false; + // Guard against null node + if (!this.node) { + if ($state && $state.complete) { + $state.complete(); + } + return; + } + if (!this.members_url) { const nodes = this.node.children; this.members_url = `/api/nodes/${ @@ -402,6 +412,12 @@ export default { moveupMember(member) { console.log("Remove", this.getUniqueId(member)); + // Guard against null node + if (!this.node || !this.node.parent_id) { + console.error("Cannot move member: node or parent_id is null"); + return; + } + // TODO: Also reject members. api.nodeAdoptMembers(this.node.parent_id, [member]) .then(() => { @@ -426,6 +442,7 @@ export default { align-items: stretch; flex: 1; overflow: hidden; + background-color: var(--bg-primary); } #node-info { @@ -448,6 +465,17 @@ export default { margin: 0 1em; } +/* Dark mode support */ +#approve .navbar-brand .navbar-logo { + height: 32px; + width: 32px; + object-fit: contain; +} + +:root.dark-mode #approve .navbar-brand .navbar-logo { + filter: brightness(0) invert(1); +} + #progress { display: flex; flex-wrap: nowrap; diff --git a/morphocluster/frontend/src/views/Bisect.vue b/morphocluster/frontend/src/views/Bisect.vue index b625757..8be6906 100644 --- a/morphocluster/frontend/src/views/Bisect.vue +++ b/morphocluster/frontend/src/views/Bisect.vue @@ -1,9 +1,10 @@ @@ -756,6 +758,16 @@ export default { \ No newline at end of file diff --git a/morphocluster/frontend/src/views/Home.vue b/morphocluster/frontend/src/views/Home.vue index 2d87fb8..30e1654 100644 --- a/morphocluster/frontend/src/views/Home.vue +++ b/morphocluster/frontend/src/views/Home.vue @@ -12,6 +12,11 @@ }"> Files + + Upload Data + diff --git a/morphocluster/frontend/src/views/JobQueue.vue b/morphocluster/frontend/src/views/JobQueue.vue new file mode 100644 index 0000000..abef0e4 --- /dev/null +++ b/morphocluster/frontend/src/views/JobQueue.vue @@ -0,0 +1,445 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/views/Project.vue b/morphocluster/frontend/src/views/Project.vue index 98952e7..b7c4141 100644 --- a/morphocluster/frontend/src/views/Project.vue +++ b/morphocluster/frontend/src/views/Project.vue @@ -1,13 +1,22 @@