From d7b70fceb320c7d1097cbdd5aa5fd58b30882353 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Mon, 22 Sep 2025 15:16:10 -0400 Subject: [PATCH 01/13] initial ui changes --- morphocluster/api.py | 266 ++++++++ morphocluster/frontend/.gitignore | 2 + .../frontend/src/components/FeatureModal.vue | 596 ++++++++++++++++++ .../frontend/src/components/FormatModal.vue | 399 ++++++++++++ .../frontend/src/components/JobStatus.vue | 512 +++++++++++++++ .../frontend/src/components/UploadZone.vue | 484 ++++++++++++++ morphocluster/frontend/src/helpers/api.js | 61 ++ morphocluster/frontend/src/router.js | 5 + morphocluster/frontend/src/views/Home.vue | 5 + morphocluster/frontend/src/views/Upload.vue | 562 +++++++++++++++++ 10 files changed, 2892 insertions(+) create mode 100644 morphocluster/frontend/src/components/FeatureModal.vue create mode 100644 morphocluster/frontend/src/components/FormatModal.vue create mode 100644 morphocluster/frontend/src/components/JobStatus.vue create mode 100644 morphocluster/frontend/src/components/UploadZone.vue create mode 100644 morphocluster/frontend/src/views/Upload.vue diff --git a/morphocluster/api.py b/morphocluster/api.py index 74cbe4c..2f453c4 100644 --- a/morphocluster/api.py +++ b/morphocluster/api.py @@ -325,6 +325,272 @@ def upload_files(path=""): raise werkzeug.exceptions.BadRequest() +# =============================================================================== +# /upload - Data Pipeline Upload Interface +# =============================================================================== + +@api.route("/upload", methods=["POST"]) +def upload_archives(): + """ + Mock endpoint for uploading data archives. + Returns mock response for frontend testing. + """ + uploaded_files = request.files.getlist("files") + + if not uploaded_files: + raise werkzeug.exceptions.BadRequest("No files provided") + + # Mock response + result = { + "message": "Files uploaded successfully", + "files": [ + { + "name": file.filename, + "size": file.content_length or 1024000, # Mock size if not available + "id": str(uuid.uuid4()), + "status": "uploaded" + } + for file in uploaded_files + ] + } + + return jsonify(result), 200 + + +@api.route("/files//validate", methods=["GET"]) +def validate_archive(file_id): + """ + Mock endpoint for archive validation. + Returns mock validation data for frontend testing. + """ + # Mock validation response + result = { + "is_valid": True, + "format": "ecotaxa" if "ecotaxa" in file_id.lower() else "standard", + "needs_conversion": "ecotaxa" in file_id.lower(), + "file_count": 1250, + "image_count": 1200, + "detected_encoding": "utf-8", + "detected_delimiter": "\t" if "ecotaxa" in file_id.lower() else ",", + "validation_warnings": [] + } + + return jsonify(result), 200 + + +@api.route("/files//preview", methods=["GET"]) +def preview_archive(file_id): + """ + Mock endpoint for archive preview. + Returns mock preview data for frontend testing. + """ + # Mock preview response + result = { + "files": [ + "index.csv", + "images/img001.jpg", + "images/img002.jpg", + "images/img003.jpg", + "images/img004.jpg", + "images/img005.jpg" + ], + "total_rows": 1200, + "detected_encoding": "utf-8", + "detected_delimiter": "," if "standard" in file_id.lower() else "\t", + "columns": [ + {"key": "object_id", "label": "Object ID"}, + {"key": "img_file_name", "label": "Image File"}, + {"key": "object_lat", "label": "Latitude"}, + {"key": "object_lon", "label": "Longitude"}, + {"key": "object_depth", "label": "Depth"} + ], + "sample_rows": [ + { + "object_id": "obj_001", + "img_file_name": "images/img001.jpg", + "object_lat": "45.123", + "object_lon": "-125.456", + "object_depth": "10.5" + }, + { + "object_id": "obj_002", + "img_file_name": "images/img002.jpg", + "object_lat": "45.124", + "object_lon": "-125.457", + "object_depth": "12.1" + }, + { + "object_id": "obj_003", + "img_file_name": "images/img003.jpg", + "object_lat": "45.125", + "object_lon": "-125.458", + "object_depth": "8.9" + } + ] + } + + return jsonify(result), 200 + + +@api.route("/files//convert", methods=["POST"]) +def convert_ecotaxa_format(file_id): + """ + Mock endpoint for EcoTaxa format conversion. + Creates a mock background job for frontend testing. + """ + data = request.get_json() or {} + + # Mock job creation + job_id = str(uuid.uuid4()) + + result = { + "job_id": job_id, + "status": "started", + "message": "EcoTaxa conversion job started", + "parameters": data + } + + return jsonify(result), 202 + + +@api.route("/files//extract", methods=["POST"]) +def extract_features(file_id): + """ + Mock endpoint for feature extraction. + Creates a mock background job for frontend testing. + """ + data = request.get_json() or {} + + # Mock job creation + job_id = str(uuid.uuid4()) + + result = { + "job_id": job_id, + "status": "started", + "message": "Feature extraction job started", + "parameters": data + } + + return jsonify(result), 202 + + +@api.route("/jobs/user", methods=["GET"]) +def get_user_jobs(): + """ + Mock endpoint for getting user jobs. + Returns mock job data for frontend testing. + """ + # Mock jobs with different statuses + jobs = [ + { + "id": "job_001", + "job_type": "format_conversion", + "status": "completed", + "progress": 100, + "created_at": "2024-01-15T10:30:00Z", + "completed_at": "2024-01-15T10:32:15Z", + "parameters": { + "archive_name": "sample_ecotaxa.zip", + "encoding": "utf-8", + "delimiter": "\t" + }, + "result_url": "/files/converted_sample" + }, + { + "id": "job_002", + "job_type": "feature_extraction", + "status": "running", + "progress": 45, + "created_at": "2024-01-15T11:00:00Z", + "current_step": "Processing batch 450/1000", + "eta": 300, + "parameters": { + "archive_name": "marine_plankton.zip", + "model": "resnet50", + "batch_size": 512 + }, + "logs": [ + { + "timestamp": "2024-01-15T11:00:00Z", + "level": "info", + "message": "Starting feature extraction..." + }, + { + "timestamp": "2024-01-15T11:05:30Z", + "level": "info", + "message": "Processed 200 images" + }, + { + "timestamp": "2024-01-15T11:10:15Z", + "level": "info", + "message": "Processed 450 images" + } + ] + }, + { + "id": "job_003", + "job_type": "initial_clustering", + "status": "failed", + "progress": 25, + "created_at": "2024-01-15T09:15:00Z", + "failed_at": "2024-01-15T09:45:30Z", + "error_message": "Insufficient memory for clustering. Try reducing batch size.", + "parameters": { + "min_cluster_size": 128, + "method": "EOM" + } + } + ] + + return jsonify(jobs), 200 + + +@api.route("/jobs//status", methods=["GET"]) +def get_job_status(job_id): + """ + Mock endpoint for getting individual job status. + Returns mock job status for frontend testing. + """ + # Mock job status based on job_id + if job_id == "job_001": + job = { + "id": job_id, + "status": "completed", + "progress": 100, + "result_url": "/files/converted_sample" + } + elif job_id == "job_002": + job = { + "id": job_id, + "status": "running", + "progress": 65, + "current_step": "Processing batch 650/1000", + "eta": 180 + } + else: + job = { + "id": job_id, + "status": "pending", + "progress": 0 + } + + return jsonify(job), 200 + + +@api.route("/jobs/", methods=["DELETE"]) +def cancel_job(job_id): + """ + Mock endpoint for cancelling a job. + Returns mock cancellation response for frontend testing. + """ + result = { + "message": f"Job {job_id} cancellation requested", + "status": "cancelling" + } + + return jsonify(result), 200 + + # =============================================================================== # /projects # =============================================================================== diff --git a/morphocluster/frontend/.gitignore b/morphocluster/frontend/.gitignore index c6e316b..e5fa33f 100644 --- a/morphocluster/frontend/.gitignore +++ b/morphocluster/frontend/.gitignore @@ -1,5 +1,6 @@ .DS_Store node_modules +/dist # local env files .env.local @@ -9,6 +10,7 @@ node_modules npm-debug.log* yarn-debug.log* yarn-error.log* +pnpm-debug.log* # Editor directories and files .idea diff --git a/morphocluster/frontend/src/components/FeatureModal.vue b/morphocluster/frontend/src/components/FeatureModal.vue new file mode 100644 index 0000000..1cd0cce --- /dev/null +++ b/morphocluster/frontend/src/components/FeatureModal.vue @@ -0,0 +1,596 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/FormatModal.vue b/morphocluster/frontend/src/components/FormatModal.vue new file mode 100644 index 0000000..5adc17e --- /dev/null +++ b/morphocluster/frontend/src/components/FormatModal.vue @@ -0,0 +1,399 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/JobStatus.vue b/morphocluster/frontend/src/components/JobStatus.vue new file mode 100644 index 0000000..fbd8150 --- /dev/null +++ b/morphocluster/frontend/src/components/JobStatus.vue @@ -0,0 +1,512 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/UploadZone.vue b/morphocluster/frontend/src/components/UploadZone.vue new file mode 100644 index 0000000..bb61135 --- /dev/null +++ b/morphocluster/frontend/src/components/UploadZone.vue @@ -0,0 +1,484 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/helpers/api.js b/morphocluster/frontend/src/helpers/api.js index 7f9929d..d0bfe3c 100644 --- a/morphocluster/frontend/src/helpers/api.js +++ b/morphocluster/frontend/src/helpers/api.js @@ -142,4 +142,65 @@ export function getUnfilledNodes(project_id) { export function log(action, node_id = null, reverse_action = null, data = null) { return axios.post(`/api/log`, { action, node_id, reverse_action, data }); +} + +// Upload and Processing Pipeline + +export function uploadArchives(files) { + const formData = new FormData(); + files.forEach(file => { + formData.append('files', file); + }); + return axios.post('/api/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }).then(response => response.data); +} + +export function validateArchive(fileName) { + return axios.get(`/api/files/${fileName}/validate`) + .then(response => response.data); +} + +export function previewArchive(fileName) { + return axios.get(`/api/files/${fileName}/preview`) + .then(response => response.data); +} + +export function convertEcoTaxaFormat(fileName, parameters) { + return axios.post(`/api/files/${fileName}/convert`, parameters) + .then(response => response.data); +} + +export function extractFeatures(fileName, parameters) { + return axios.post(`/api/files/${fileName}/extract`, parameters) + .then(response => response.data); +} + +export function createProjectFromFeatures(featuresId, parameters) { + return axios.post(`/api/features/${featuresId}/cluster`, parameters) + .then(response => response.data); +} + +export function reclusterProject(projectId, parameters) { + return axios.post(`/api/projects/${projectId}/recluster`, parameters) + .then(response => response.data); +} + +// Job Management + +export function getUserJobs() { + return axios.get('/api/jobs/user') + .then(response => response.data); +} + +export function getJobStatus(jobId) { + return axios.get(`/api/jobs/${jobId}/status`) + .then(response => response.data); +} + +export function cancelJob(jobId) { + return axios.delete(`/api/jobs/${jobId}`) + .then(response => response.data); } \ No newline at end of file diff --git a/morphocluster/frontend/src/router.js b/morphocluster/frontend/src/router.js index d2dafd4..3ecd376 100644 --- a/morphocluster/frontend/src/router.js +++ b/morphocluster/frontend/src/router.js @@ -29,6 +29,11 @@ const routes = [ path: '/p', component: () => import(/* webpackChunkName: "projects" */ './views/Projects.vue'), }, + { + name: 'upload', + path: '/upload', + component: () => import(/* webpackChunkName: "upload" */ './views/Upload.vue'), + }, { name: 'files', path: '/files/:file_path?', diff --git a/morphocluster/frontend/src/views/Home.vue b/morphocluster/frontend/src/views/Home.vue index 2d87fb8..30e1654 100644 --- a/morphocluster/frontend/src/views/Home.vue +++ b/morphocluster/frontend/src/views/Home.vue @@ -12,6 +12,11 @@ }"> Files + + Upload Data + diff --git a/morphocluster/frontend/src/views/Upload.vue b/morphocluster/frontend/src/views/Upload.vue new file mode 100644 index 0000000..1d13e8c --- /dev/null +++ b/morphocluster/frontend/src/views/Upload.vue @@ -0,0 +1,562 @@ + + + + + \ No newline at end of file From e79e7ed88be90fb47dbfe5d1417727e8c146e939 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Mon, 22 Sep 2025 15:27:54 -0400 Subject: [PATCH 02/13] simplify upload buttons --- morphocluster/frontend/src/components/UploadZone.vue | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/morphocluster/frontend/src/components/UploadZone.vue b/morphocluster/frontend/src/components/UploadZone.vue index bb61135..0cc327f 100644 --- a/morphocluster/frontend/src/components/UploadZone.vue +++ b/morphocluster/frontend/src/components/UploadZone.vue @@ -57,11 +57,9 @@
- - Process Files - - - Upload More + + + Upload More Files
From e0afef4ec7d036d2d86cd249d1cf774138d0fc68 Mon Sep 17 00:00:00 2001 From: John Walsh Date: Wed, 24 Sep 2025 17:18:02 -0400 Subject: [PATCH 03/13] complete upload -> ecotaxa conversion -> feature extraction -> cluster functionality --- docker/morphocluster/Dockerfile | 1 + morphocluster/api.py | 644 +++++++++++++----- morphocluster/background.py | 513 +++++++++++++- .../frontend/src/components/ClusterModal.vue | 360 ++++++++++ .../frontend/src/components/FeatureModal.vue | 14 +- .../frontend/src/components/JobStatus.vue | 74 +- .../frontend/src/components/UploadZone.vue | 14 +- morphocluster/frontend/src/main.js | 4 + morphocluster/frontend/src/views/Approve.vue | 14 + morphocluster/frontend/src/views/Project.vue | 57 +- morphocluster/frontend/src/views/Upload.vue | 311 +++++++-- 11 files changed, 1727 insertions(+), 279 deletions(-) create mode 100644 morphocluster/frontend/src/components/ClusterModal.vue diff --git a/docker/morphocluster/Dockerfile b/docker/morphocluster/Dockerfile index 77afe45..05cacca 100644 --- a/docker/morphocluster/Dockerfile +++ b/docker/morphocluster/Dockerfile @@ -39,6 +39,7 @@ COPY pyproject.toml uv.lock versioneer.py setup.cfg MANIFEST.in README.rst ./ COPY tests ./tests COPY morphocluster ./morphocluster COPY migrations ./migrations +COPY data ./data # Install the application with dependencies from lockfile RUN uv sync --frozen diff --git a/morphocluster/api.py b/morphocluster/api.py index 2f453c4..c5689b5 100644 --- a/morphocluster/api.py +++ b/morphocluster/api.py @@ -332,217 +332,541 @@ def upload_files(path=""): @api.route("/upload", methods=["POST"]) def upload_archives(): """ - Mock endpoint for uploading data archives. - Returns mock response for frontend testing. + Upload data archives for processing pipeline. + Saves files to FILES_DIR and returns file information. """ uploaded_files = request.files.getlist("files") if not uploaded_files: raise werkzeug.exceptions.BadRequest("No files provided") - # Mock response result = { "message": "Files uploaded successfully", - "files": [ - { - "name": file.filename, - "size": file.content_length or 1024000, # Mock size if not available - "id": str(uuid.uuid4()), - "status": "uploaded" - } - for file in uploaded_files - ] + "files": [] } + for upload_file in uploaded_files: + if upload_file.filename: + # Use the same security function as the existing upload + filename = secure_path_and_name(upload_file.filename) + + # Save to FILES_DIR (same location validation expects) + server_path = os.path.join(app.config["FILES_DIR"], filename) + + # Ensure directory exists + os.makedirs(os.path.dirname(server_path), exist_ok=True) + + # Save the file + upload_file.save(server_path) + + # Get actual file size + file_size = os.path.getsize(server_path) + + result["files"].append({ + "name": filename, + "size": file_size, + "id": filename, # Use filename as ID for validation + "status": "uploaded" + }) + return jsonify(result), 200 @api.route("/files//validate", methods=["GET"]) def validate_archive(file_id): """ - Mock endpoint for archive validation. - Returns mock validation data for frontend testing. + Validate an uploaded archive file. + Checks ZIP structure, required files, and detects format. """ - # Mock validation response - result = { - "is_valid": True, - "format": "ecotaxa" if "ecotaxa" in file_id.lower() else "standard", - "needs_conversion": "ecotaxa" in file_id.lower(), - "file_count": 1250, - "image_count": 1200, - "detected_encoding": "utf-8", - "detected_delimiter": "\t" if "ecotaxa" in file_id.lower() else ",", - "validation_warnings": [] - } + import zipfile + import csv + import chardet + from pathlib import Path - return jsonify(result), 200 + try: + # Decode URL-encoded filename + from urllib.parse import unquote + filename = unquote(file_id) + + # Find the uploaded file + upload_path = Path(app.config["FILES_DIR"]) / filename + + if not upload_path.exists(): + return jsonify({ + "is_valid": False, + "error": f"File {filename} not found", + "validation_warnings": [] + }), 404 + + result = { + "is_valid": False, + "format": "unknown", + "needs_conversion": False, + "file_count": 0, + "image_count": 0, + "detected_encoding": None, + "detected_delimiter": None, + "validation_warnings": [] + } + + # Check if it's a ZIP file + if not zipfile.is_zipfile(upload_path): + result["error"] = "File is not a valid ZIP archive" + return jsonify(result), 200 + + # Examine ZIP contents + with zipfile.ZipFile(upload_path, 'r') as zip_file: + file_list = zip_file.namelist() + result["file_count"] = len(file_list) + + # Count image files + image_extensions = ('.jpg', '.jpeg', '.png', '.tiff', '.tif') + image_files = [f for f in file_list if f.lower().endswith(image_extensions)] + result["image_count"] = len(image_files) + + # Look for metadata files + csv_files = [f for f in file_list if f.endswith('.csv')] + tsv_files = [f for f in file_list if f.endswith('.tsv')] + + # Detect format based on files present + if 'index.csv' in file_list: + result["format"] = "standard" + result["needs_conversion"] = False + metadata_file = 'index.csv' + elif tsv_files or any('ecotaxa' in f.lower() for f in csv_files): + result["format"] = "ecotaxa" + result["needs_conversion"] = True + metadata_file = tsv_files[0] if tsv_files else csv_files[0] + elif csv_files: + result["format"] = "csv" + result["needs_conversion"] = True + metadata_file = csv_files[0] + else: + result["validation_warnings"].append("No metadata file (CSV/TSV) found") + metadata_file = None + + # Analyze metadata file if found + if metadata_file: + try: + with zip_file.open(metadata_file) as csv_data: + # Detect encoding + raw_data = csv_data.read(10000) # Read first 10KB + encoding_result = chardet.detect(raw_data) + result["detected_encoding"] = encoding_result.get('encoding', 'utf-8') + + # Detect delimiter + sample_text = raw_data.decode(result["detected_encoding"], errors='ignore') + sample_lines = sample_text.split('\n')[:5] + + if sample_lines: + # Count delimiters in first few lines + delimiters = [',', '\t', ';', '|'] + delimiter_counts = {} + + for line in sample_lines: + for delim in delimiters: + delimiter_counts[delim] = delimiter_counts.get(delim, 0) + line.count(delim) + + # Choose most common delimiter + if delimiter_counts: + result["detected_delimiter"] = max(delimiter_counts, key=delimiter_counts.get) + + except Exception as e: + result["validation_warnings"].append(f"Could not analyze metadata file: {str(e)}") + + # Validation checks + if result["image_count"] == 0: + result["validation_warnings"].append("No image files found") + + if metadata_file is None: + result["validation_warnings"].append("No metadata file found") + + # Archive is valid if it has images and metadata + result["is_valid"] = (result["image_count"] > 0 and metadata_file is not None) + + return jsonify(result), 200 + + except Exception as e: + return jsonify({ + "is_valid": False, + "error": f"Validation failed: {str(e)}", + "validation_warnings": [] + }), 500 @api.route("/files//preview", methods=["GET"]) def preview_archive(file_id): """ - Mock endpoint for archive preview. - Returns mock preview data for frontend testing. + Preview archive contents and extract sample data from CSV/TSV files. """ - # Mock preview response - result = { - "files": [ - "index.csv", - "images/img001.jpg", - "images/img002.jpg", - "images/img003.jpg", - "images/img004.jpg", - "images/img005.jpg" - ], - "total_rows": 1200, - "detected_encoding": "utf-8", - "detected_delimiter": "," if "standard" in file_id.lower() else "\t", - "columns": [ - {"key": "object_id", "label": "Object ID"}, - {"key": "img_file_name", "label": "Image File"}, - {"key": "object_lat", "label": "Latitude"}, - {"key": "object_lon", "label": "Longitude"}, - {"key": "object_depth", "label": "Depth"} - ], - "sample_rows": [ - { - "object_id": "obj_001", - "img_file_name": "images/img001.jpg", - "object_lat": "45.123", - "object_lon": "-125.456", - "object_depth": "10.5" - }, - { - "object_id": "obj_002", - "img_file_name": "images/img002.jpg", - "object_lat": "45.124", - "object_lon": "-125.457", - "object_depth": "12.1" - }, - { - "object_id": "obj_003", - "img_file_name": "images/img003.jpg", - "object_lat": "45.125", - "object_lon": "-125.458", - "object_depth": "8.9" - } - ] - } + import zipfile + import csv + import chardet + from pathlib import Path + from urllib.parse import unquote - return jsonify(result), 200 + try: + filename = unquote(file_id) + upload_path = Path(app.config["FILES_DIR"]) / filename + + if not upload_path.exists(): + return jsonify({"error": f"File {filename} not found"}), 404 + + if not zipfile.is_zipfile(upload_path): + return jsonify({"error": "File is not a valid ZIP archive"}), 400 + + result = { + "files": [], + "total_rows": 0, + "detected_encoding": None, + "detected_delimiter": None, + "columns": [], + "sample_rows": [] + } + + with zipfile.ZipFile(upload_path, 'r') as zip_file: + # Get all files in archive + file_list = zip_file.namelist() + result["files"] = sorted(file_list) + + # Find metadata file (CSV/TSV) + csv_files = [f for f in file_list if f.endswith('.csv')] + tsv_files = [f for f in file_list if f.endswith('.tsv')] + + metadata_file = None + if 'index.csv' in file_list: + metadata_file = 'index.csv' + elif tsv_files: + metadata_file = tsv_files[0] + elif csv_files: + metadata_file = csv_files[0] + + if metadata_file: + try: + with zip_file.open(metadata_file) as csv_data: + # Detect encoding + raw_data = csv_data.read(50000) # Read first 50KB for better detection + encoding_result = chardet.detect(raw_data) + detected_encoding = encoding_result.get('encoding', 'utf-8') + + # Handle common encoding issues + if detected_encoding.lower() in ['ascii', 'windows-1252', 'iso-8859-1']: + detected_encoding = 'utf-8' + + result["detected_encoding"] = detected_encoding + + # Decode text and detect delimiter + try: + text = raw_data.decode(detected_encoding, errors='replace') + except UnicodeDecodeError: + text = raw_data.decode('utf-8', errors='replace') + + # Detect delimiter by analyzing first few lines + lines = text.split('\n')[:10] + non_empty_lines = [line.strip() for line in lines if line.strip()] + + if non_empty_lines: + # Count delimiters in header and first few data rows + delimiters = [',', '\t', ';', '|'] + delimiter_scores = {} + + for delim in delimiters: + scores = [] + for line in non_empty_lines[:5]: # Check first 5 lines + count = line.count(delim) + scores.append(count) + + # Prefer delimiters that appear consistently + if scores and max(scores) > 0: + consistency = len(set(scores)) == 1 # All lines have same count + delimiter_scores[delim] = (max(scores), consistency) + + if delimiter_scores: + # Choose delimiter with highest count and consistency + best_delim = max(delimiter_scores, + key=lambda x: (delimiter_scores[x][1], delimiter_scores[x][0])) + result["detected_delimiter"] = best_delim + + # Parse CSV and extract sample data + if result["detected_delimiter"]: + # Re-read file from beginning for CSV parsing + zip_file.seek(0) # Reset zip file position + with zip_file.open(metadata_file) as csv_data: + text_data = csv_data.read().decode(detected_encoding, errors='replace') + lines = text_data.split('\n') + + # Parse with detected delimiter + csv_reader = csv.DictReader( + lines, + delimiter=result["detected_delimiter"] + ) + + # Get column names + if csv_reader.fieldnames: + result["columns"] = [ + {"key": col.strip(), "label": col.strip()} + for col in csv_reader.fieldnames if col + ] + + # Get sample rows (first 5) + sample_rows = [] + row_count = 0 + + for row in csv_reader: + row_count += 1 + if len(sample_rows) < 5: + # Clean up row data + clean_row = {} + for key, value in row.items(): + if key: # Skip empty keys + clean_row[key.strip()] = str(value).strip() if value else "" + if clean_row: # Only add non-empty rows + sample_rows.append(clean_row) + + result["sample_rows"] = sample_rows + result["total_rows"] = row_count + + except Exception as e: + result["error"] = f"Could not parse metadata file: {str(e)}" + + return jsonify(result), 200 + + except Exception as e: + return jsonify({ + "error": f"Preview failed: {str(e)}" + }), 500 @api.route("/files//convert", methods=["POST"]) def convert_ecotaxa_format(file_id): """ - Mock endpoint for EcoTaxa format conversion. - Creates a mock background job for frontend testing. + Start EcoTaxa format conversion background job for uploaded archive. """ - data = request.get_json() or {} + from urllib.parse import unquote + from morphocluster.background import convert_ecotaxa_job - # Mock job creation - job_id = str(uuid.uuid4()) + filename = unquote(file_id) + parameters = request.get_json() or {} - result = { - "job_id": job_id, - "status": "started", - "message": "EcoTaxa conversion job started", - "parameters": data - } + try: + # Queue the background job + job = convert_ecotaxa_job.queue(filename, parameters) + + # Initialize job metadata + job.meta['status'] = 'queued' + job.meta['progress'] = 0 + job.meta['current_step'] = 'Waiting in queue...' + job.meta['created_at'] = datetime.now().isoformat() + job.meta['job_type'] = 'format_conversion' + job.meta['archive_name'] = filename + job.meta['parameters'] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "EcoTaxa conversion job queued", + "parameters": parameters + } - return jsonify(result), 202 + return jsonify(result), 202 + + except Exception as e: + return jsonify({ + "error": f"Failed to queue EcoTaxa conversion job: {str(e)}" + }), 500 @api.route("/files//extract", methods=["POST"]) def extract_features(file_id): """ - Mock endpoint for feature extraction. - Creates a mock background job for frontend testing. + Start feature extraction background job for uploaded archive. """ - data = request.get_json() or {} + from urllib.parse import unquote + from morphocluster.background import extract_features_job - # Mock job creation - job_id = str(uuid.uuid4()) + filename = unquote(file_id) + parameters = request.get_json() or {} - result = { - "job_id": job_id, - "status": "started", - "message": "Feature extraction job started", - "parameters": data - } + try: + # Queue the background job + job = extract_features_job.queue(filename, parameters) + + # Initialize job metadata + job.meta['status'] = 'queued' + job.meta['progress'] = 0 + job.meta['current_step'] = 'Waiting in queue...' + job.meta['created_at'] = datetime.now().isoformat() + job.meta['job_type'] = 'feature_extraction' + job.meta['archive_name'] = filename + job.meta['parameters'] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "Feature extraction job queued", + "parameters": parameters + } + + return jsonify(result), 202 + + except Exception as e: + return jsonify({ + "error": f"Failed to queue feature extraction job: {str(e)}" + }), 500 + + +@api.route("/files//cluster", methods=["POST"]) +def create_clustering_project(file_id): + """ + Start initial clustering background job to create a new MorphoCluster project. + """ + from urllib.parse import unquote + from morphocluster.background import initial_clustering_job + + filename = unquote(file_id) + parameters = request.get_json() or {} + + # Extract feature file from parameters or construct default name + feature_file = parameters.get('feature_file') + if not feature_file: + # Construct feature file name based on archive name + archive_stem = pathlib.Path(filename).stem + feature_file = f"{archive_stem}_features.h5" + + try: + # Queue the background job + job = initial_clustering_job.queue(filename, feature_file, parameters) + + # Initialize job metadata + job.meta['status'] = 'queued' + job.meta['progress'] = 0 + job.meta['current_step'] = 'Waiting in queue...' + job.meta['created_at'] = datetime.now().isoformat() + job.meta['job_type'] = 'initial_clustering' + job.meta['archive_name'] = filename + job.meta['feature_file'] = feature_file + job.meta['parameters'] = parameters + job.save_meta() + + result = { + "job_id": job.id, + "status": "queued", + "message": "Initial clustering job queued", + "parameters": parameters, + "feature_file": feature_file + } + + return jsonify(result), 202 - return jsonify(result), 202 + except Exception as e: + return jsonify({ + "error": f"Failed to queue initial clustering job: {str(e)}" + }), 500 @api.route("/jobs/user", methods=["GET"]) def get_user_jobs(): """ - Mock endpoint for getting user jobs. - Returns mock job data for frontend testing. + Get all jobs from the RQ queue. + Returns jobs with status, progress, and metadata. """ - # Mock jobs with different statuses - jobs = [ - { - "id": "job_001", - "job_type": "format_conversion", - "status": "completed", - "progress": 100, - "created_at": "2024-01-15T10:30:00Z", - "completed_at": "2024-01-15T10:32:15Z", - "parameters": { - "archive_name": "sample_ecotaxa.zip", - "encoding": "utf-8", - "delimiter": "\t" - }, - "result_url": "/files/converted_sample" - }, - { - "id": "job_002", - "job_type": "feature_extraction", - "status": "running", - "progress": 45, - "created_at": "2024-01-15T11:00:00Z", - "current_step": "Processing batch 450/1000", - "eta": 300, - "parameters": { - "archive_name": "marine_plankton.zip", - "model": "resnet50", - "batch_size": 512 - }, - "logs": [ - { - "timestamp": "2024-01-15T11:00:00Z", - "level": "info", - "message": "Starting feature extraction..." - }, - { - "timestamp": "2024-01-15T11:05:30Z", - "level": "info", - "message": "Processed 200 images" - }, - { - "timestamp": "2024-01-15T11:10:15Z", - "level": "info", - "message": "Processed 450 images" - } - ] - }, - { - "id": "job_003", - "job_type": "initial_clustering", - "status": "failed", - "progress": 25, - "created_at": "2024-01-15T09:15:00Z", - "failed_at": "2024-01-15T09:45:30Z", - "error_message": "Insufficient memory for clustering. Try reducing batch size.", - "parameters": { - "min_cluster_size": 128, - "method": "EOM" - } + try: + from rq import Queue + from morphocluster.extensions import rq + + queue = rq.get_queue() + all_jobs = [] + + # Get jobs from different registries + try: + # Active/queued jobs + for job in queue.jobs: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + # Recently finished jobs + finished_registry = queue.finished_job_registry + for job_id in finished_registry.get_job_ids(0, 20): + job = queue.fetch_job(job_id) + if job: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + # Failed jobs + failed_registry = queue.failed_job_registry + for job_id in failed_registry.get_job_ids(0, 20): + job = queue.fetch_job(job_id) + if job: + job_data = _format_job_data(job) + if job_data: + all_jobs.append(job_data) + + except Exception as e: + print(f"Error fetching jobs: {e}") + + # Sort by creation time (newest first) + all_jobs.sort(key=lambda x: x.get('created_at', ''), reverse=True) + + return jsonify(all_jobs), 200 + + except Exception as e: + print(f"Error in get_user_jobs: {e}") + return jsonify([]), 200 + + +def _format_job_data(job): + """Format RQ job data for frontend consumption.""" + try: + if not job: + return None + + # Determine job status + if job.is_queued: + status = 'queued' + elif job.is_started: + status = job.meta.get('status', 'running') + elif job.is_finished: + status = job.meta.get('status', 'completed') + elif job.is_failed: + status = 'failed' + else: + status = 'unknown' + + job_data = { + "id": job.id, + "job_type": job.meta.get('job_type', 'unknown'), + "status": status, + "progress": job.meta.get('progress', 0), + "created_at": job.meta.get('created_at'), + "current_step": job.meta.get('current_step'), + "parameters": job.meta.get('parameters', {}), + "archive_name": job.meta.get('archive_name') } - ] - return jsonify(jobs), 200 + # Add completion/failure details + if job.meta.get('completed_at'): + job_data['completed_at'] = job.meta['completed_at'] + job_data['result'] = job.meta.get('result') + + if job.meta.get('failed_at'): + job_data['failed_at'] = job.meta['failed_at'] + job_data['error_message'] = job.meta.get('error_message') + + # Add timing info + if job.started_at: + job_data['started_at'] = job.started_at.isoformat() + if job.ended_at: + job_data['ended_at'] = job.ended_at.isoformat() + + return job_data + + except Exception as e: + print(f"Error formatting job {job.id if job else 'None'}: {e}") + return None @api.route("/jobs//status", methods=["GET"]) diff --git a/morphocluster/background.py b/morphocluster/background.py index a212510..675f354 100644 --- a/morphocluster/background.py +++ b/morphocluster/background.py @@ -1,12 +1,17 @@ import datetime as dt import os +import zipfile +import csv +import time +from pathlib import Path import flask_rq2 from flask import current_app as app from morphocluster.extensions import database, rq from morphocluster.processing.recluster import Recluster -from morphocluster.processing.tree import Tree +from morphocluster.processing.tree import Tree as ProcessingTree +from morphocluster.tree import Tree def validate_background_job(fun): @@ -97,3 +102,509 @@ def recluster_project(project_id, min_cluster_size): print("Project ID: {}".format(project_id)) print("Done.") + + +# =============================================================================== +# Upload Pipeline Background Jobs +# =============================================================================== + +@rq.job(timeout=3600) # 1 hour timeout +def extract_features_job(filename, parameters=None): + """ + Background job for extracting features from uploaded archive using MorphoCluster's real feature extraction. + """ + print(f"Starting feature extraction for {filename}") + + # Get current job for progress updates + from rq import get_current_job + from morphocluster.processing.extract_features import extract_features + import zipfile + + job = get_current_job() + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + app_instance = create_app() + with app_instance.app_context(): + try: + files_dir = Path(app_instance.config["FILES_DIR"]) + archive_path = files_dir / filename + + if not archive_path.exists(): + raise FileNotFoundError(f"Archive {filename} not found") + + # Create features output filename + features_filename = f"{archive_path.stem}_features.h5" + features_path = archive_path.parent / features_filename + + # Step 1: Validate archive + job.meta['status'] = 'validating' + job.meta['progress'] = 5 + job.meta['current_step'] = 'Validating archive structure...' + job.save_meta() + + # Check if archive has index.csv + with zipfile.ZipFile(archive_path, 'r') as zip_file: + file_list = zip_file.namelist() + print(f"Archive contents: {file_list[:10]}...") # Show first 10 files for debugging + + if 'index.csv' not in file_list: + # Check if this is an unconverted EcoTaxa file - suggest conversion + ecotaxa_files = [f for f in file_list if f.startswith('ecotaxa_') and f.endswith('.tsv')] + if ecotaxa_files: + raise ValueError(f"Archive appears to be in EcoTaxa format (found {ecotaxa_files[0]}). Please convert it first.") + else: + raise ValueError(f"Archive must contain index.csv file. Found files: {', '.join(file_list[:5])}") + + image_files = [f for f in file_list if f.lower().endswith(('.jpg', '.jpeg', '.png', '.tiff', '.tif'))] + total_images = len(image_files) + + print(f"Archive validation passed. Found {total_images} images") + + # Step 2: Setup parameters + job.meta['progress'] = 10 + job.meta['current_step'] = 'Setting up feature extraction parameters...' + job.save_meta() + + # Extract parameters with defaults + normalize = parameters.get('normalize', True) + batch_size = parameters.get('batch_size', 512) + model_file = parameters.get('model_file', None) + + # Set default model file if not specified + if model_file is None: + model_file = '/code/data/model_state.pth' + + # Parse input_mean and input_std - handle both string and list formats + def parse_mean_std(value, default): + if isinstance(value, str): + if value.strip(): + return tuple(map(float, value.split(','))) + else: + return default + elif isinstance(value, (list, tuple)): + return tuple(value) + else: + return default + + input_mean = parse_mean_std(parameters.get('input_mean'), (0, 0, 0)) + input_std = parse_mean_std(parameters.get('input_std'), (1, 1, 1)) + + print(f"Using parameters: normalize={normalize}, batch_size={batch_size}, model_file={model_file}") + print(f"Input normalization: mean={input_mean}, std={input_std}") + + # Step 3: Start feature extraction + job.meta['progress'] = 15 + job.meta['current_step'] = 'Starting feature extraction (this may take several minutes)...' + job.meta['total_images'] = total_images + job.save_meta() + + # Run MorphoCluster's real feature extraction + extract_features( + archive_fn=str(archive_path), + features_fn=str(features_path), + parameters_fn=model_file, # None for pretrained ImageNet + normalize=normalize, + batch_size=batch_size, + cuda=True, # Use GPU if available + input_mean=input_mean, + input_std=input_std + ) + + # Step 4: Complete + job.meta['status'] = 'completed' + job.meta['progress'] = 100 + job.meta['current_step'] = 'Feature extraction completed' + job.meta['completed_at'] = dt.datetime.now().isoformat() + + # Create result with actual feature file info + result = { + 'feature_file': features_filename, + 'feature_path': str(features_path), + 'total_images': total_images, + 'feature_dimensions': 32, # ResNet18 with 32-dim bottleneck + 'model_used': f'ResNet18 with 32-dim bottleneck: {model_file}', + 'normalize': normalize, + 'batch_size': batch_size + } + + job.meta['result'] = result + job.save_meta() + + print(f"Feature extraction completed for {filename}") + print(f"Features saved to: {features_path}") + return result + + except Exception as e: + print(f"Feature extraction failed: {str(e)}") + job.meta['status'] = 'failed' + job.meta['error_message'] = str(e) + job.meta['failed_at'] = dt.datetime.now().isoformat() + job.save_meta() + raise + + +@rq.job(timeout=1800) # 30 minutes timeout +def convert_ecotaxa_job(filename, parameters=None): + """ + Background job for converting EcoTaxa format to standard format. + Uses MorphoCluster's existing fix_ecotaxa functionality. + """ + print(f"Starting EcoTaxa conversion for {filename}") + + from rq import get_current_job + import shutil + + job = get_current_job() + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + app_instance = create_app() + with app_instance.app_context(): + try: + archive_path = Path(app_instance.config["FILES_DIR"]) / filename + + if not archive_path.exists(): + raise FileNotFoundError(f"Archive {filename} not found") + + # Step 1: Analyze parameters + job.meta['status'] = 'analyzing' + job.meta['progress'] = 10 + job.meta['current_step'] = 'Analyzing EcoTaxa format and parameters...' + job.save_meta() + + encoding = parameters.get('encoding') + delimiter = parameters.get('delimiter') + force = parameters.get('force', False) + + # Step 2: Create working copy for conversion + job.meta['progress'] = 20 + job.meta['current_step'] = 'Creating working copy...' + job.save_meta() + + # Create a copy to work on (fix_ecotaxa modifies in place) + work_path = archive_path.with_suffix('.converting.zip') + shutil.copy2(archive_path, work_path) + + # Step 3: Run EcoTaxa conversion using existing MorphoCluster function + job.meta['progress'] = 40 + job.meta['current_step'] = 'Converting EcoTaxa format to standard format...' + job.save_meta() + + try: + # Call fix_ecotaxa function directly (it's a Click command) + from click.testing import CliRunner + from morphocluster.scripts import fix_ecotaxa + + runner = CliRunner() + args = [str(work_path)] + if encoding: + args.extend(['--encoding', encoding]) + if delimiter: + args.extend(['--delimiter', delimiter]) + if force: + args.append('--force') + + result = runner.invoke(fix_ecotaxa, args) + if result.exit_code != 0: + raise RuntimeError(f"EcoTaxa conversion failed: {result.output}") + except Exception as conversion_error: + # Clean up working file + if work_path.exists(): + work_path.unlink() + raise conversion_error + + # Step 4: Validate conversion result + job.meta['progress'] = 80 + job.meta['current_step'] = 'Validating converted archive...' + job.save_meta() + + # Check that index.csv was created + import zipfile + with zipfile.ZipFile(work_path, 'r') as zf: + if 'index.csv' not in zf.namelist(): + raise ValueError("Conversion failed: index.csv not created") + + # Step 5: Replace original with converted version + job.meta['progress'] = 95 + job.meta['current_step'] = 'Finalizing converted archive...' + job.save_meta() + + # Move converted file to final location + converted_path = archive_path.with_name(f"{archive_path.stem}_converted{archive_path.suffix}") + work_path.rename(converted_path) + + # Complete + job.meta['status'] = 'completed' + job.meta['progress'] = 100 + job.meta['current_step'] = 'EcoTaxa conversion completed' + job.meta['completed_at'] = dt.datetime.now().isoformat() + job.meta['result'] = { + 'converted_file': converted_path.name, + 'original_file': filename, + 'encoding': encoding, + 'delimiter': delimiter, + 'conversion_method': 'morphocluster.scripts.fix_ecotaxa' + } + job.save_meta() + + print(f"EcoTaxa conversion completed: {filename} -> {converted_path.name}") + return job.meta['result'] + + except Exception as e: + print(f"EcoTaxa conversion failed: {str(e)}") + job.meta['status'] = 'failed' + job.meta['error_message'] = str(e) + job.meta['failed_at'] = dt.datetime.now().isoformat() + job.save_meta() + raise + + +@rq.job(timeout=7200) # 2 hours timeout +def initial_clustering_job(archive_name, feature_file, parameters=None): + """ + Background job for initial clustering to create a new MorphoCluster project. + """ + print(f"Starting initial clustering for {archive_name}") + + from rq import get_current_job + + job = get_current_job() + + if parameters is None: + parameters = {} + + # Create application context for Flask app access + from morphocluster import create_app + app_instance = create_app() + with app_instance.app_context(): + try: + files_dir = Path(app_instance.config["FILES_DIR"]) + archive_path = files_dir / archive_name + feature_path = files_dir / feature_file + + if not archive_path.exists(): + raise FileNotFoundError(f"Archive {archive_name} not found") + if not feature_path.exists(): + raise FileNotFoundError(f"Feature file {feature_file} not found") + + # Step 1: Setup parameters + job.meta['status'] = 'setting_up' + job.meta['progress'] = 10 + job.meta['current_step'] = 'Setting up clustering parameters...' + job.save_meta() + + # Extract parameters with defaults + project_name = parameters.get('project_name', f"Project-{archive_path.stem}") + description = parameters.get('description', '') + min_cluster_size = parameters.get('min_cluster_size', 128) + min_samples = parameters.get('min_samples', 1) + cluster_selection_method = parameters.get('cluster_selection_method', 'leaf') + sample_size = parameters.get('sample_size', 0) # 0 = use all + keep_unexplored_ratio = parameters.get('keep_unexplored_ratio', 0.0) + + print(f"Clustering parameters: min_cluster_size={min_cluster_size}, method={cluster_selection_method}") + + # Step 2: Extract images from archive + job.meta['progress'] = 15 + job.meta['current_step'] = 'Extracting images from archive...' + job.save_meta() + + import zipfile + import pandas as pd + import h5py + import shutil + from morphocluster import models + + # Create images directory for this archive + images_dir = Path(app_instance.config["IMAGES_DIR"]) + archive_images_dir = images_dir / archive_path.stem + archive_images_dir.mkdir(parents=True, exist_ok=True) + + # Read index.csv from archive to get object_id and path mappings + with zipfile.ZipFile(archive_path, 'r') as zf: + with zf.open('index.csv') as fp: + archive_df = pd.read_csv(fp, dtype=str, usecols=["object_id", "path"]) + + # Extract image files + print(f"Extracting {len(archive_df)} images to {archive_images_dir}") + for _, row in archive_df.iterrows(): + image_path = row['path'] + if image_path in zf.namelist(): + # Extract to the archive-specific directory + extracted_path = zf.extract(image_path, archive_images_dir) + + # Move to flat structure if needed (some archives have subdirectories) + final_path = archive_images_dir / Path(image_path).name + if Path(extracted_path) != final_path: + shutil.move(extracted_path, final_path) + + # Step 3: Load objects from archive into database + job.meta['progress'] = 25 + job.meta['current_step'] = 'Loading objects into database...' + job.save_meta() + + # Load feature vectors from H5 file + with h5py.File(feature_path, 'r') as h5f: + feature_object_ids = h5f['object_id'][:] + features = h5f['features'][:] + + # Convert bytes to strings if necessary + if hasattr(feature_object_ids[0], 'decode'): + feature_object_ids = [oid.decode('utf-8') for oid in feature_object_ids] + else: + feature_object_ids = list(feature_object_ids) + + feature_dims = features.shape[1] if len(features.shape) > 1 else len(features[0]) if len(features) > 0 else 0 + print(f"Archive contains {len(archive_df)} objects, features for {len(feature_object_ids)} objects") + print(f"Feature dimensions: {feature_dims}") + + # Step 3: Insert objects into database with vectors + job.meta['progress'] = 30 + job.meta['current_step'] = 'Inserting objects into database...' + job.save_meta() + + # Create object data for database insertion + object_data = [] + feature_dict = dict(zip(feature_object_ids, features)) + + for _, row in archive_df.iterrows(): + object_id = row['object_id'] + original_path = row['path'] + # Update path to point to extracted image in archive subdirectory + extracted_path = f"{archive_path.stem}/{Path(original_path).name}" + vector = feature_dict.get(object_id) + + if vector is not None: + object_data.append({ + 'object_id': object_id, + 'path': extracted_path, # Path relative to IMAGES_DIR + 'vector': vector # Keep as numpy array - should be 32 dimensions now + }) + + # Insert objects into database + with database.engine.connect() as conn: + with conn.begin(): + # Check if objects already exist to avoid duplicates + existing_objects = conn.execute( + models.objects.select().where( + models.objects.c.object_id.in_([obj['object_id'] for obj in object_data]) + ) + ).fetchall() + existing_object_ids = {obj.object_id for obj in existing_objects} + + # Only insert new objects + new_objects = [obj for obj in object_data if obj['object_id'] not in existing_object_ids] + + if new_objects: + print(f"Inserting {len(new_objects)} new objects into database") + conn.execute(models.objects.insert(), new_objects) + else: + print("All objects already exist in database") + + # Step 4: Initialize clustering + job.meta['progress'] = 40 + job.meta['current_step'] = 'Initializing clustering algorithm...' + job.save_meta() + + recluster = Recluster() + + # Step 5: Load features + job.meta['progress'] = 50 + job.meta['current_step'] = 'Loading extracted features...' + job.save_meta() + + recluster.load_features(str(feature_path)) + + # Step 6: Skip init_tree() - let clustering create the tree structure + job.meta['progress'] = 60 + job.meta['current_step'] = 'Preparing clustering...' + job.save_meta() + + # Note: Not calling recluster.init_tree() - this was interfering with clustering + + # Step 7: Run clustering + job.meta['progress'] = 70 + job.meta['current_step'] = 'Running HDBSCAN clustering (this may take several minutes)...' + job.save_meta() + + # Apply sample size and keep_unexplored_ratio if specified + cluster_kwargs = { + 'min_cluster_size': min_cluster_size, + 'min_samples': min_samples, + 'cluster_selection_method': cluster_selection_method, + } + + if sample_size > 0: + cluster_kwargs['sample_size'] = sample_size + print(f"Using sample size: {sample_size}") + + if keep_unexplored_ratio > 0: + cluster_kwargs['keep_unexplored'] = keep_unexplored_ratio + + recluster.cluster(**cluster_kwargs) + + # Step 8: Get the clustered tree + job.meta['progress'] = 80 + job.meta['current_step'] = 'Building project tree structure...' + job.save_meta() + + # Get the first (and only) tree from recluster + tree = recluster.trees[0] + + # Step 9: Load into database + job.meta['progress'] = 90 + job.meta['current_step'] = 'Creating project in database...' + job.save_meta() + + with database.engine.connect() as conn: + db_tree = Tree(conn) + + with conn.begin(): + project_id = db_tree.load_project(project_name, tree) + root_id = db_tree.get_root_id(project_id) + + print("Consolidating tree structure...") + db_tree.consolidate_node(root_id) + + # Step 10: Complete + job.meta['status'] = 'completed' + job.meta['progress'] = 100 + job.meta['current_step'] = 'Project created successfully' + job.meta['completed_at'] = dt.datetime.now().isoformat() + + # Get final statistics + cluster_count = len(tree.nodes) # Number of nodes/clusters + object_count = len(tree.objects) # Number of objects + + result = { + 'project_id': project_id, + 'project_name': project_name, + 'root_id': root_id, + 'cluster_count': cluster_count, + 'object_count': object_count, + 'min_cluster_size': min_cluster_size, + 'cluster_selection_method': cluster_selection_method, + 'project_url': f'/projects/{project_id}' + } + + job.meta['result'] = result + job.save_meta() + + print(f"Initial clustering completed for {archive_name}") + print(f"Created project '{project_name}' with {cluster_count} clusters and {object_count} objects") + return result + + except Exception as e: + print(f"Initial clustering failed: {str(e)}") + job.meta['status'] = 'failed' + job.meta['error_message'] = str(e) + job.meta['failed_at'] = dt.datetime.now().isoformat() + job.save_meta() + raise diff --git a/morphocluster/frontend/src/components/ClusterModal.vue b/morphocluster/frontend/src/components/ClusterModal.vue new file mode 100644 index 0000000..3ccb84d --- /dev/null +++ b/morphocluster/frontend/src/components/ClusterModal.vue @@ -0,0 +1,360 @@ + + + + + \ No newline at end of file diff --git a/morphocluster/frontend/src/components/FeatureModal.vue b/morphocluster/frontend/src/components/FeatureModal.vue index 1cd0cce..9813d2c 100644 --- a/morphocluster/frontend/src/components/FeatureModal.vue +++ b/morphocluster/frontend/src/components/FeatureModal.vue @@ -273,25 +273,19 @@ export default { showAdvanced: false, customModelFile: null, parameters: { - model: 'resnet50', + model: 'resnet18', custom_architecture: '', batch_size: 512, normalize: true, device: 'auto', - input_mean: '', - input_std: '', + input_mean: '0.485,0.456,0.406', + input_std: '0.229,0.224,0.225', image_size: 224, num_workers: 0, output_format: 'hdf5' }, modelOptions: [ - { value: 'resnet50', text: 'ResNet-50 (ImageNet)' }, - { value: 'resnet101', text: 'ResNet-101 (ImageNet)' }, - { value: 'vgg16', text: 'VGG-16 (ImageNet)' }, - { value: 'vgg19', text: 'VGG-19 (ImageNet)' }, - { value: 'densenet121', text: 'DenseNet-121 (ImageNet)' }, - { value: 'mobilenet_v2', text: 'MobileNet V2 (ImageNet)' }, - { value: 'efficientnet_b0', text: 'EfficientNet B0 (ImageNet)' }, + { value: 'resnet18', text: 'ResNet-18 (ImageNet) - Default' }, { value: 'custom', text: 'Upload Custom Model...' } ], architectureOptions: [ diff --git a/morphocluster/frontend/src/components/JobStatus.vue b/morphocluster/frontend/src/components/JobStatus.vue index fbd8150..22714a7 100644 --- a/morphocluster/frontend/src/components/JobStatus.vue +++ b/morphocluster/frontend/src/components/JobStatus.vue @@ -10,10 +10,11 @@ v-for="job in jobs" :key="job.id" class="job-item" - :class="{ + :class="{ 'job-completed': job.status === 'completed', 'job-failed': job.status === 'failed', - 'job-running': job.status === 'running' + 'job-running': job.status === 'running', + 'job-queued': job.status === 'queued' }" >
@@ -38,7 +39,7 @@ variant="outline-secondary" @click="removeJob(job.id)" > - + ×
@@ -141,7 +142,8 @@ export default { jobs: [], showLogs: false, refreshTimer: null, - isLoading: false + isLoading: false, + removedJobIds: new Set() }; }, computed: { @@ -150,6 +152,7 @@ export default { } }, async mounted() { + this.loadRemovedJobIds(); await this.fetchJobs(); if (this.autoRefresh) { this.startRefresh(); @@ -159,14 +162,36 @@ export default { this.stopRefresh(); }, methods: { + loadRemovedJobIds() { + try { + const stored = localStorage.getItem('removedJobIds'); + if (stored) { + this.removedJobIds = new Set(JSON.parse(stored)); + } + } catch (error) { + console.error('Failed to load removed job IDs:', error); + this.removedJobIds = new Set(); + } + }, + + saveRemovedJobIds() { + try { + localStorage.setItem('removedJobIds', JSON.stringify([...this.removedJobIds])); + } catch (error) { + console.error('Failed to save removed job IDs:', error); + } + }, async fetchJobs() { if (this.isLoading) return; this.isLoading = true; try { const response = await this.$axios.get('/api/jobs/user'); - const newJobs = response.data; - + const allJobs = response.data; + + // Filter out manually removed jobs + const newJobs = allJobs.filter(job => !this.removedJobIds.has(job.id)); + // Check for status changes this.jobs.forEach(oldJob => { const newJob = newJobs.find(j => j.id === oldJob.id); @@ -178,7 +203,17 @@ export default { } } }); - + + // Also check for newly completed jobs that weren't in the old list + newJobs.forEach(newJob => { + const oldJob = this.jobs.find(j => j.id === newJob.id); + if (!oldJob && newJob.status === 'completed') { + this.$emit('job-completed', newJob); + } else if (!oldJob && newJob.status === 'failed') { + this.$emit('job-failed', newJob); + } + }); + this.jobs = newJobs; } catch (error) { console.error('Failed to fetch jobs:', error); @@ -202,6 +237,11 @@ export default { }, removeJob(jobId) { + // Add to removed set so it doesn't reappear + this.removedJobIds.add(jobId); + this.saveRemovedJobIds(); + + // Remove from current jobs array const index = this.jobs.findIndex(j => j.id === jobId); if (index !== -1) { this.jobs.splice(index, 1); @@ -209,6 +249,15 @@ export default { }, clearCompletedJobs() { + // Add completed/failed jobs to removed set + this.jobs.forEach(job => { + if (['completed', 'failed'].includes(job.status)) { + this.removedJobIds.add(job.id); + } + }); + this.saveRemovedJobIds(); + + // Filter out completed/failed jobs this.jobs = this.jobs.filter(job => !['completed', 'failed'].includes(job.status)); }, @@ -250,7 +299,7 @@ export default { getJobDetails(job) { if (job.job_type === 'format_conversion') { - return `Archive: ${job.parameters?.archive_name || 'Unknown'}`; + return `Archive: ${job.archive_name || job.parameters?.archive_name || 'Unknown'}`; } else if (job.job_type === 'feature_extraction') { return `Model: ${job.parameters?.model || 'ImageNet'}, Batch: ${job.parameters?.batch_size || 512}`; } else if (job.job_type === 'initial_clustering' || job.job_type === 'reclustering') { @@ -262,6 +311,7 @@ export default { getStatusIcon(status) { const icons = { 'pending': 'mdi mdi-clock-outline text-warning', + 'queued': 'mdi mdi-clock-outline text-info', 'running': 'mdi mdi-loading mdi-spin text-primary', 'completed': 'mdi mdi-check-circle text-success', 'failed': 'mdi mdi-alert-circle text-danger', @@ -273,6 +323,7 @@ export default { getStatusText(status) { const texts = { 'pending': 'Pending', + 'queued': 'Queued', 'running': 'Running', 'completed': 'Completed', 'failed': 'Failed', @@ -308,6 +359,8 @@ export default { border: 1px solid #dee2e6; border-radius: 8px; background: white; + max-height: 600px; + overflow-y: auto; } .no-jobs { @@ -345,6 +398,11 @@ export default { background-color: #f8f9ff; } +.job-item.job-queued { + border-left: 4px solid #17a2b8; + background-color: #f0f9ff; +} + .job-header { display: flex; justify-content: space-between; diff --git a/morphocluster/frontend/src/components/UploadZone.vue b/morphocluster/frontend/src/components/UploadZone.vue index 0cc327f..10671b0 100644 --- a/morphocluster/frontend/src/components/UploadZone.vue +++ b/morphocluster/frontend/src/components/UploadZone.vue @@ -220,6 +220,7 @@ export default { this.$emit('upload-start', files); try { + console.log('UploadZone: Starting axios post to', this.uploadUrl); const response = await axios.post(this.uploadUrl, formData, { headers: { 'Content-Type': 'multipart/form-data', @@ -229,22 +230,29 @@ export default { this.updateProgress(progressEvent); }, }); - + + console.log('UploadZone: Axios post completed, response:', response); + this.uploadedFiles = files.map(file => ({ name: file.name, size: file.size, type: file.type })); - + + console.log('UploadZone: Set uploadedFiles:', this.uploadedFiles); + this.uploadComplete = true; this.isUploading = false; - + + console.log('UploadZone: About to emit upload-complete event'); this.$emit('upload-complete', { files: this.uploadedFiles, response: response.data }); + console.log('UploadZone: Emitted upload-complete event'); } catch (error) { + console.error('UploadZone: Error during upload:', error); this.isUploading = false; if (axios.isCancel(error)) { diff --git a/morphocluster/frontend/src/main.js b/morphocluster/frontend/src/main.js index 9061aac..4ac7433 100644 --- a/morphocluster/frontend/src/main.js +++ b/morphocluster/frontend/src/main.js @@ -1,6 +1,7 @@ import { createApp } from 'vue' import App from './App.vue' import router from './router' +import axios from 'axios' // Bootstrap Vue 3 import BootstrapVueNext from 'bootstrap-vue-next' @@ -12,6 +13,9 @@ import './assets/styles.css' const app = createApp(App) +// Configure axios +app.config.globalProperties.$axios = axios + app.use(router) app.use(BootstrapVueNext) diff --git a/morphocluster/frontend/src/views/Approve.vue b/morphocluster/frontend/src/views/Approve.vue index 5d313ec..776b77e 100644 --- a/morphocluster/frontend/src/views/Approve.vue +++ b/morphocluster/frontend/src/views/Approve.vue @@ -281,6 +281,14 @@ export default { // Should members_url be updated (with unique id etc.) on response? var updateMembersUrl = false; + // Guard against null node + if (!this.node) { + if ($state && $state.complete) { + $state.complete(); + } + return; + } + if (!this.members_url) { const nodes = this.node.children; this.members_url = `/api/nodes/${ @@ -402,6 +410,12 @@ export default { moveupMember(member) { console.log("Remove", this.getUniqueId(member)); + // Guard against null node + if (!this.node || !this.node.parent_id) { + console.error("Cannot move member: node or parent_id is null"); + return; + } + // TODO: Also reject members. api.nodeAdoptMembers(this.node.parent_id, [member]) .then(() => { diff --git a/morphocluster/frontend/src/views/Project.vue b/morphocluster/frontend/src/views/Project.vue index 98952e7..9660b58 100644 --- a/morphocluster/frontend/src/views/Project.vue +++ b/morphocluster/frontend/src/views/Project.vue @@ -7,7 +7,7 @@ - + + diff --git a/morphocluster/frontend/src/views/Upload.vue b/morphocluster/frontend/src/views/Upload.vue index f3c9ffc..1f09ca4 100644 --- a/morphocluster/frontend/src/views/Upload.vue +++ b/morphocluster/frontend/src/views/Upload.vue @@ -13,6 +13,9 @@ + @@ -49,12 +52,12 @@ v-for="archive in uploadedArchives" :key="archive.id" class="archive-item" - :class="{ 'needs-conversion': archive.needsConversion }" + :class="{ 'needs-conversion': archive.needs_conversion }" >
-
{{ archive.name }}
- {{ formatBytes(archive.size) }} +
{{ archive.original_filename || archive.filename || 'Unknown' }}
+ {{ formatBytes(archive.file_size) || 'Size unknown' }}
{{ getArchiveStatusText(archive.status) }} - - {{ archive.format }} + + {{ getArchiveFormat(archive) }}
-
+
This archive appears to be in EcoTaxa format and needs conversion.