diff --git a/packtools/sps/validation/aff.py b/packtools/sps/validation/aff.py index d45196b49..fba1584c0 100644 --- a/packtools/sps/validation/aff.py +++ b/packtools/sps/validation/aff.py @@ -1,5 +1,6 @@ from copy import deepcopy from difflib import SequenceMatcher +from gettext import gettext as _ from packtools.sps.models.v2.aff import FulltextAffiliations from packtools.sps.validation.utils import build_response @@ -39,6 +40,7 @@ def get_default_params(self): "country_code_error_level": "CRITICAL", "state_error_level": "WARNING", "city_error_level": "WARNING", + "email_in_original_error_level": "ERROR", "translation_aff_rules": { "id_error_level": "CRITICAL", "label_error_level": "ERROR", @@ -98,15 +100,21 @@ def validate_translations_consistency(self): sub_item="quantity", validation_type="match", is_valid=False, - expected=f"{len(main_affs)} affiliations", - obtained=f"{len(trans_affs)} affiliations", - advice="Ensure translation has same number of affiliations as main text", + expected=_("{} affiliations").format(len(main_affs)), + obtained=_("{} affiliations").format(len(trans_affs)), + advice=_("Ensure translation has same number of affiliations as main text"), error_level=self.params["translation_qty_error_level"], data={ "main_count": len(main_affs), "translation_count": len(trans_affs), "language": lang, }, + advice_text=_('Ensure translation has same number of affiliations ({expected_count}) as main text'), + advice_params={ + "expected_count": len(main_affs), + "obtained_count": len(trans_affs), + "language": lang + } ) def validate_not_translation_affiliations(self): @@ -183,6 +191,7 @@ def get_default_params(self): "country_code_error_level": "CRITICAL", "state_error_level": "WARNING", "city_error_level": "WARNING", + "email_in_original_error_level": "ERROR", "translation_aff_rules": { "id_error_level": "CRITICAL", "label_error_level": "ERROR", @@ -206,12 +215,69 @@ def get_default_params(self): "country_code": 1, } } + @property def info(self): aff_id = self.affiliation.get("id") or self.affiliation.get("original") parent = self.affiliation.get("parent_id") or self.affiliation.get("parent") return f'({parent} - {aff_id})' - + + def is_autonomous_researcher(self): + """ + Check if affiliation is for an autonomous/independent researcher. + + IMPORTANT: This is a heuristic solution based on text patterns and structure. + A semantic solution using XML attributes would be more robust and is + recommended for future SciELO specification updates. + + Current approach: + 1. Checks for multilingual text patterns (PT/ES/EN) + 2. Falls back to structural heuristics (country without institution) + + Supports: + - Portuguese: "Pesquisador Autônomo" + - Spanish: "Investigador Autónomo/Independiente" + - English: "Independent/Autonomous Researcher" + + Returns + ------- + bool + True if affiliation appears to be for an autonomous researcher + + See Also + -------- + GitHub issue: Proposal for semantic autonomous researcher marking + """ + if not self.original: + return False + + original_lower = self.original.lower() + + # Method 1: Text pattern matching (multilingual) + autonomous_patterns = self.params.get("autonomous_researcher_patterns", [ + # Portuguese + "pesquisador autônomo", "pesquisador autonomo", + # Spanish + "investigador autónomo", "investigador autonomo", + "investigador independiente", + # English + "independent researcher", "autonomous researcher", + ]) + + if any(pattern.lower() in original_lower for pattern in autonomous_patterns): + return True + + # Method 2: Structural heuristic (country present, no institution) + # This catches cases where editors use different terminology + # DISABLED BY DEFAULT to avoid false positives (e.g., affiliations missing orgname) + # Enable explicitly via enable_autonomous_heuristics=True if needed + if self.params.get("enable_autonomous_heuristics", False): + has_country = bool(self.affiliation.get("country_name")) + has_orgname = bool(self.affiliation.get("orgname")) + return has_country and not has_orgname + + return False + def validate_original(self): error_level = self.params["original_error_level"] @@ -222,17 +288,23 @@ def validate_original(self): sub_item='@content-type="original"', validation_type="exist", is_valid=bool(self.original), - expected="original affiliation", + expected=_("original affiliation"), obtained=self.original, - advice=f'Mark the complete original affiliation text with in for {self.original}', + advice=_('Mark the complete original affiliation text with in for {}').format(self.original), data=self.affiliation, error_level=error_level, + advice_text=_('Mark the complete original affiliation text with in '), + advice_params={} ) def validate_orgname(self): orgname = self.affiliation.get("orgname") error_level = self.params["orgname_error_level"] + # Pesquisador Autônomo não requer orgname + if self.is_autonomous_researcher(): + return + yield build_response( title="orgname", parent=self.affiliation, @@ -242,9 +314,11 @@ def validate_orgname(self): is_valid=bool(orgname), expected="orgname", obtained=orgname, - advice=f'Mark the main institution with in for {self.original}', + advice=_('Mark the main institution with in for {}').format(self.original), data=self.affiliation, error_level=error_level, + advice_text=_('Mark the main institution with in for {original}'), + advice_params={"original": self.original or ""} ) def validate_orgdiv1(self): @@ -258,11 +332,13 @@ def validate_orgdiv1(self): sub_item='@content-type="orgdiv1"', validation_type="exist", is_valid=bool(orgdiv1), - expected="orgdiv1 affiliation", + expected=_("orgdiv1 affiliation"), obtained=orgdiv1, - advice=f'Mark the first hierarchical subdivision with in for {self.original}', + advice=_('Mark the first hierarchical subdivision with in for {}').format(self.original), data=self.affiliation, error_level=error_level, + advice_text=_('Mark the first hierarchical subdivision with in for {original}'), + advice_params={"original": self.original or ""} ) def validate_orgdiv2(self): @@ -276,11 +352,13 @@ def validate_orgdiv2(self): sub_item='@content-type="orgdiv2"', validation_type="exist", is_valid=bool(orgdiv2), - expected="orgdiv2 affiliation", + expected=_("orgdiv2 affiliation"), obtained=orgdiv2, - advice=f'Mark the second hierarchical subdivision with in for {self.original}', + advice=_('Mark the second hierarchical subdivision with in for {}').format(self.original), data=self.affiliation, error_level=error_level, + advice_text=_('Mark the second hierarchical subdivision with in for {original}'), + advice_params={"original": self.original or ""} ) def validate_label(self): @@ -296,9 +374,11 @@ def validate_label(self): is_valid=bool(label), expected="label", obtained=label, - advice=f'Mark affiliation label with