diff --git a/demo/webcam.py b/demo/webcam.py new file mode 100644 index 0000000..acb265d --- /dev/null +++ b/demo/webcam.py @@ -0,0 +1,39 @@ +from functools import partial + +import capybara as cb +from fire import Fire + +import pyface as pf + +cur_dir = cb.get_curdir(__file__) + + +def process_frame(frame, face_service: pf.FaceService): + timer = cb.Timer() + timer.tic() + faces = face_service([frame])[0] + delta = timer.toc() + frame = faces.gen_info_img() + frame = cb.draw_text(frame, f"FPS: {1 / delta:.0f}", (0, 0), color=(255, 255, 255), text_size=24) + return frame + + +def main(camera_ip: str = 0): + kwargs = {"backend": "cuda"} + face_service = pf.FaceService( + enable_depth=True, + enable_landmark=True, + enable_recognition=True, + enable_gender=True, + detect_kwargs=kwargs, + landmark_kwargs=kwargs, + depth_kwargs=kwargs, + recognition_kwargs=kwargs, + face_bank=cur_dir / "data" / "face_bank", + ) + demo = cb.WebDemo(camera_ip=camera_ip, pipelines=[partial(process_frame, face_service=face_service)]) + demo.run() + + +if __name__ == "__main__": + Fire(main) diff --git a/pyface/components/enums.py b/pyface/components/enums.py index a976b30..56b1ab6 100644 --- a/pyface/components/enums.py +++ b/pyface/components/enums.py @@ -3,23 +3,11 @@ import capybara as cb __all__ = [ - "MouthStatus", - "EyeStatus", "FacePose", "FakeType", ] -class MouthStatus(cb.EnumCheckMixin, Enum): - Close = 0 - Open = 1 - - -class EyeStatus(cb.EnumCheckMixin, Enum): - Close = 0 - Open = 1 - - class FacePose(cb.EnumCheckMixin, Enum): LeftProfile = 0 LeftFrontal = 1 diff --git a/pyface/components/face_depth/tddfav2.py b/pyface/components/face_depth/tddfav2.py index 2dfbde6..6c16952 100644 --- a/pyface/components/face_depth/tddfav2.py +++ b/pyface/components/face_depth/tddfav2.py @@ -304,12 +304,7 @@ def _similar_transform(pts3d, scales): P, alpha_shp, alpha_exp, scales, shifts = [x for x in _parse_tffda_param(param)] if dense_flag: - pts3d = self.bfm_engine( - R=P[..., :3], - offset=P[..., 3:], - alpha_shp=alpha_shp, - alpha_exp=alpha_exp, - )["output"] + pts3d = self.bfm_engine(R=P[..., :3], offset=P[..., 3:], alpha_shp=alpha_shp, alpha_exp=alpha_exp)["output"] else: param = self._u + self._w_shp @ alpha_shp + self._w_exp @ alpha_exp pts3d = P[..., :3] @ param.reshape(3, -1, order="F") @@ -378,7 +373,9 @@ def __call__( # Ensure shapes are compatible for concatenation n = preds["params"].shape[0] if scales.shape[0] != n or shifts.shape[0] != n: - raise ValueError(f"Shape mismatch: preds['params'] has {n} rows, scales has {scales.shape[0]}, shifts has {shifts.shape[0]}") + raise ValueError( + f"Shape mismatch: preds['params'] has {n} rows, scales has {scales.shape[0]}, shifts has {shifts.shape[0]}" + ) params = np.concatenate((preds["params"], scales, shifts), axis=-1) lmk3d68pts = self._gen_3d_landmarks(params) pose_degrees = self._get_pose_degrees(params) diff --git a/pyface/components/face_landmark/coordinate_reg.py b/pyface/components/face_landmark/coordinate_reg.py index 5c612d9..075c705 100644 --- a/pyface/components/face_landmark/coordinate_reg.py +++ b/pyface/components/face_landmark/coordinate_reg.py @@ -41,8 +41,8 @@ def __init__( backend: str = "cuda", session_option: Dict[str, Any] = {}, provider_option: Dict[str, Any] = {}, - mouth_th: Optional[float] = 0.2, - eye_th: Optional[float] = 0.5, + mouth_th: Optional[float] = 0.5, + eye_th: Optional[float] = 0.2, ): if model_path is None: model_path = download_model_and_return_model_fpath( @@ -78,11 +78,7 @@ def preprocess(self, imgs: List[np.ndarray], boxes: List[cb.Box]) -> Dict[str, A Ms.append(M) return blobs, Ms - def postprocess( - self, - preds: np.ndarray, - Ms: List[np.ndarray], - ) -> List[np.ndarray]: + def postprocess(self, preds: np.ndarray, Ms: List[np.ndarray]) -> List[np.ndarray]: h, w = self.metadata["InputSize"][-2:] lmks = [] for pred, M in zip(preds, Ms): @@ -124,11 +120,7 @@ def _calc_eye_score(self, lmks: np.ndarray, eye_mode="left") -> Tuple[np.number, eye_ratio = norm_ratio(eye_w, eye_h, 1, 0) return eye_ratio - def __call__( - self, - imgs: List[np.ndarray], - boxes: List[cb.Box], - ) -> Union[Dict[str, Any], List[Dict[str, Any]]]: + def __call__(self, imgs: List[np.ndarray], boxes: List[cb.Box]) -> Union[Dict[str, Any], List[Dict[str, Any]]]: blobs, Ms = self.preprocess(imgs, boxes) preds = {k: [] for k in self.engine.output_infos.keys()} for blob in blobs: @@ -148,6 +140,9 @@ def __call__( "mouth_score": mouth_score[i], "right_eye_score": right_eye_score[i], "left_eye_score": left_eye_score[i], + "is_mouth_open": mouth_score[i] > self.metadata["Mouth_th"], + "is_right_eye_open": right_eye_score[i] > self.metadata["Eye_th"], + "is_left_eye_open": left_eye_score[i] > self.metadata["Eye_th"], "info": { "model_fpath": self.model_path, "thresholds": { @@ -176,12 +171,7 @@ def draw_result( f"Left Eye: {result['left_eye_score'].round(4)}\n" ) text_size = np.clip(round(face_box.height * 0.05) + 1, 1, max_text_size) - img = cb.draw_text( - img=img, - text=text, - location=face_box.left_bottom, - text_size=text_size, - ) + img = cb.draw_text(img=img, text=text, location=face_box.left_bottom, text_size=text_size) img = cb.draw_points(img, result["lmk"], scales=1, colors=(0, 255, 0)) @@ -195,12 +185,7 @@ def draw_result( text += f"{k}: {v}\n" text_size = np.clip(max(img.shape) // 100 + 1, 12, 20) location = (5, 5) - img = cb.draw_text( - img=img, - text=text, - location=location, - text_size=text_size, - ) + img = cb.draw_text(img=img, text=text, location=location, text_size=text_size) return img diff --git a/pyface/face_service.py b/pyface/face_service.py index ca011fa..03ae509 100644 --- a/pyface/face_service.py +++ b/pyface/face_service.py @@ -14,7 +14,7 @@ build_face_recognition, build_gender_detection, ) -from .object import TDDFA, Encode, Face, Faces, Who +from .object import TDDFA, Attribute, Encode, Eye, Face, FacePose, Faces, Mouth, Who __all__ = ["FaceService"] @@ -92,23 +92,37 @@ def _fill_results_to_faces_list( for box, lmk, score in zip(proposals["boxes"], proposals["lmk5pts"], proposals["scores"]) ] for face in faces: - if gender_results is not None: - face.gender = gender_results[i]["gender"] + if face.attribute is None: + face.attribute = Attribute() + if gender_results is not None: + face.attribute.gender = gender_results[i]["gender"] if lmk_results is not None: lmk_result = lmk_results[i] face.lmk106pt = cb.Keypoints(lmk_result["lmk"]) + face.attribute.right_eye = Eye( + is_open=lmk_result["is_right_eye_open"], + score=lmk_result["right_eye_score"], + ) + face.attribute.left_eye = Eye( + is_open=lmk_result["is_left_eye_open"], + score=lmk_result["left_eye_score"], + ) + face.attribute.mouth = Mouth( + is_open=lmk_result["is_mouth_open"], + score=lmk_result["mouth_score"], + ) if dep_results is not None: dep_result = dep_results[i] face.tddfa = TDDFA( param=dep_result["param"], lmk68pt=dep_result["lmk3d68pt"], depth_img=dep_result["depth_img"], - pose=dep_result["pose"], yaw=dep_result["pose_degree"][0], roll=dep_result["pose_degree"][1], pitch=dep_result["pose_degree"][2], ) + face.attribute.pose = FacePose(dep_result["pose"]) if enc_results is not None: enc_result = enc_results[i] face.encoding = Encode( @@ -116,14 +130,6 @@ def _fill_results_to_faces_list( version=enc_result["info"]["version"], ) face.norm_img = enc_result["norm_img"] - # if fas_results is not None: - # fas_results = fas_results[i] - # face.liveness = Liveness( - # is_true=fas_results["pred_label"] == "Live", - # value=fas_results["score"], - # threshold=fas_results["info"]["threshold"], - # label=fas_results["pred_label"], - # ) i += 1 return faces_list diff --git a/pyface/object.py b/pyface/object.py index 954666a..c48ea39 100644 --- a/pyface/object.py +++ b/pyface/object.py @@ -6,7 +6,7 @@ import numpy as np from pybase64 import b64encode -from .components.enums import EyeStatus, FacePose, FakeType +from .components.enums import FacePose, FakeType __all__ = [ "Eye", @@ -17,6 +17,7 @@ "Face", "Faces", "Liveness", + "Attribute", "sort_face_by_size", "drop_too_small_faces", # "faces_to_schema", @@ -26,8 +27,14 @@ @dataclass() class Eye(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): - left: Optional[EyeStatus] = field(default=None) - right: Optional[EyeStatus] = field(default=None) + is_open: Optional[bool] = field(default=None) + score: Optional[float] = field(default=None) + + +@dataclass() +class Mouth(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): + is_open: Optional[bool] = field(default=None) + score: Optional[float] = field(default=None) @dataclass() @@ -50,7 +57,6 @@ class TDDFA(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): param: Optional[np.ndarray] = field(default=None) lmk68pt: Optional[np.ndarray] = field(default=None) depth_img: Optional[np.ndarray] = field(default=None) - pose: Optional[FacePose] = field(default=None) yaw: Optional[float] = field(default=None) roll: Optional[float] = field(default=None) pitch: Optional[float] = field(default=None) @@ -69,11 +75,21 @@ class Who(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): recognized_level: Optional[int] = field(default=None) +@dataclass() +class Attribute(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): + age: Optional[int] = field(default=None) + gender: Optional[str] = field(default=None) + race: Optional[str] = field(default=None) + pose: Optional[FacePose] = field(default=None) + left_eye: Optional[Eye] = field(default=None) + right_eye: Optional[Eye] = field(default=None) + mouth: Optional[Mouth] = field(default=None) + + @dataclass() class Face(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): box: cb.Box score: Union[float, np.number] = field(default=1.0) - gender: Optional[str] = field(default=None) lmk5pt: Optional[cb.Keypoints] = field(default=None) norm_img: Optional[np.ndarray] = field(default=None) tddfa: Optional[TDDFA] = field(default=None) @@ -81,14 +97,14 @@ class Face(cb.DataclassToJsonMixin, cb.DataclassCopyMixin): who: Optional[Who] = field(default=None) lmk106pt: Optional[cb.Keypoints] = field(default=None) liveness: Optional[Liveness] = field(default=None) + attribute: Optional[Attribute] = field(default=None) jsonable_func = { "vector": lambda x: b64encode(x.astype("float32").tobytes()).decode("utf-8") if x is not None else None, - "norm_img": lambda x: cb.img_to_b64str(x, cb.ImgCode.PNG) if x is not None else None, + "norm_img": lambda x: cb.img_to_b64str(x, cb.IMGTYP.PNG) if x is not None else None, } # pose: Optional[FacePose] = field(default=None) # blur: Optional[WhetherOrNot] = field(default=None) # occlusion: Optional[Occlusion] = field(default=None) - # attribute: Optional[Attribute] = field(default=None) # lmk68pt: Optional[cb.Keypoints] = field(default=None) # analysis_infos: Optional[dict] = field(default=None) @@ -159,14 +175,14 @@ def gen_info_img(self, mosaic_face: bool = False): zipped = zip( self.box, self.score, - self.gender, self.lmk5pt, + self.attribute, self.tddfa, self.who, self.lmk106pt, self.liveness, ) - for box, score, gender, lmk5pt, tddfa, who, lmk106pt, liveness in zipped: + for box, score, lmk5pt, attribute, tddfa, who, lmk106pt, liveness in zipped: text_size = np.clip(round(box.height / 5), 8, 32) box_line_scale = (box.width / 128).clip(1, 3) point_scale = (box.width / 256).clip(0.3, 2) @@ -193,24 +209,30 @@ def gen_info_img(self, mosaic_face: bool = False): loc = box.left_bottom text_to_draw = "" - if gender is not None: - text_to_draw += f"Gender: {gender}\n" - else: - text_to_draw += "Gender: Unknown\n" - - if who is not None: - who = who.be_jsonable() if isinstance(who, Who) else who - text_to_draw += f"Who: {who['name']}\n" - else: - text_to_draw += "Who: Unknown\n" - - if liveness is not None: - liveness = liveness.be_jsonable() if isinstance(liveness, Liveness) else liveness - text_to_draw += f"FAS: like {liveness['label']}\n" - - if tddfa is not None: - tddfa = tddfa.be_jsonable() if isinstance(tddfa, TDDFA) else tddfa - text_to_draw += f"Yaw: {tddfa['yaw']:.2f}, Roll: {tddfa['roll']:.2f}, Pitch: {tddfa['pitch']:.2f}\n" + if isinstance(attribute, Attribute): + if attribute.gender is not None: + text_to_draw += f"Gender: {attribute.gender}\n" + if attribute.age is not None: + text_to_draw += f"Age: {attribute.age}\n" + if attribute.race is not None: + text_to_draw += f"Race: {attribute.race}\n" + if isinstance(attribute.pose, FacePose): + text_to_draw += f"Pose: {attribute.pose.name}\n" + if isinstance(attribute.right_eye, Eye): + text_to_draw += f"REye: {'open' if attribute.right_eye.is_open else 'close'} " + if isinstance(attribute.left_eye, Eye): + text_to_draw += f"LEye: {'open' if attribute.left_eye.is_open else 'close'} " + if isinstance(attribute.mouth, Mouth): + text_to_draw += f"Mouth: {'open' if attribute.mouth.is_open else 'close'}\n" + + if isinstance(who, Who): + text_to_draw += f"Who: {who.name}\n" + + if isinstance(liveness, Liveness): + text_to_draw += f"FAS: like {liveness.label}\n" + + if isinstance(tddfa, TDDFA): + text_to_draw += f"Yaw: {tddfa.yaw:.2f}, Roll: {tddfa.roll:.2f}, Pitch: {tddfa.pitch:.2f}\n" if lmk106pt is not None: img = cb.draw_points(img, lmk106pt.numpy(), point_scale, colors=(100, 220, 0))