From e2e893965e5ebcaa32887e3b9ac2cb4180b7b365 Mon Sep 17 00:00:00 2001 From: lujunsan Date: Wed, 12 Feb 2025 14:23:02 +0100 Subject: [PATCH 1/3] Test new GHA --- .github/workflows/import_packages.yml | 73 +++++++++++++-------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/.github/workflows/import_packages.yml b/.github/workflows/import_packages.yml index 400951c8..a5a2495f 100644 --- a/.github/workflows/import_packages.yml +++ b/.github/workflows/import_packages.yml @@ -1,26 +1,19 @@ -# This workflow syncs the vector database -name: Sync vector DB +name: Temp Import Packages on: - workflow_dispatch: - inputs: - enable_artifact_download: - description: 'Enable artifact download step' - type: boolean - required: false - default: true + pull_request: + jobs: - # This workflow contains a single job called "greet" - sync_db: - # The type of runner that the job will run on + temp_sync_db: runs-on: ubuntu-latest + env: + AWS_REGION: us-east-1 - # Steps represent a sequence of tasks that will be executed as part of the job steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5 with: - python-version: '3.12' + python-version: '3.12' - name: Install dependencies run: | python -m pip install --upgrade pip @@ -31,32 +24,34 @@ jobs: git lfs install git lfs pull - - name: Download json data - id: download-json-data - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 + - name: Configure AWS Credentials for S3 + uses: aws-actions/configure-aws-credentials@49f33fe638c0cba4fb16037a27915a7ab7740259 with: - repo: stacklok/codegate-data - workflow: ".github/workflows/generate-artifact.yml" - workflow_conclusion: success - name: jsonl-files - path: /tmp/ - name_is_regexp: true - skip_unpack: false - if_no_artifact_found: ignore + role-to-assume: arn:aws:iam::781189302813:role/github_actions_codegate_role + aws-region: ${{ env.AWS_REGION }} - - name: Download artifact - if: ${{ github.event.inputs.enable_artifact_download == 'true' }} - id: download-artifact - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 - with: - github_token: ${{ github.token }} - workflow: ".github/workflows/import_packages.yml" - workflow_conclusion: success - name: sqlite_data - path: /tmp/ - name_is_regexp: true - skip_unpack: false - if_no_artifact_found: ignore + - name: Download JSONL files from S3 + run: | + echo "Downloading manifest.json from S3..." + aws s3 cp s3://codegate-data-prod/manifest.json ./manifest.json --region $AWS_REGION + echo "Manifest content:" + cat manifest.json + + echo "Parsing manifest..." + MALICIOUS_KEY=$(jq -r '.latest.malicious_packages' manifest.json) + DEPRECATED_KEY=$(jq -r '.latest.deprecated_packages' manifest.json) + ARCHIVED_KEY=$(jq -r '.latest.archived_packages' manifest.json) + + echo "Malicious key: $MALICIOUS_KEY" + echo "Deprecated key: $DEPRECATED_KEY" + echo "Archived key: $ARCHIVED_KEY" + + mkdir -p /tmp/jsonl-files + + # Download and map the S3 files to fixed names in /tmp/jsonl-files + aws s3 cp s3://codegate-data-staging/$MALICIOUS_KEY /tmp/jsonl-files/malicious.jsonl --region $AWS_REGION + aws s3 cp s3://codegate-data-staging/$DEPRECATED_KEY /tmp/jsonl-files/deprecated.jsonl --region $AWS_REGION + aws s3 cp s3://codegate-data-staging/$ARCHIVED_KEY /tmp/jsonl-files/archived.jsonl --region $AWS_REGION - name: Install Poetry run: | From 14144cc86f1467768bfe816b6af7ee81f225ea0b Mon Sep 17 00:00:00 2001 From: lujunsan Date: Wed, 12 Feb 2025 14:30:42 +0100 Subject: [PATCH 2/3] Add permisions --- .github/workflows/import_packages.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/import_packages.yml b/.github/workflows/import_packages.yml index a5a2495f..3d2db28d 100644 --- a/.github/workflows/import_packages.yml +++ b/.github/workflows/import_packages.yml @@ -6,6 +6,9 @@ on: jobs: temp_sync_db: runs-on: ubuntu-latest + permissions: + contents: read + id-token: write env: AWS_REGION: us-east-1 From f4f89dda852c0e2d561559c77538129e473021a2 Mon Sep 17 00:00:00 2001 From: lujunsan Date: Wed, 12 Feb 2025 14:36:02 +0100 Subject: [PATCH 3/3] Update staging to prod refs --- .github/workflows/import_packages.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/import_packages.yml b/.github/workflows/import_packages.yml index 3d2db28d..74bcd86d 100644 --- a/.github/workflows/import_packages.yml +++ b/.github/workflows/import_packages.yml @@ -52,9 +52,9 @@ jobs: mkdir -p /tmp/jsonl-files # Download and map the S3 files to fixed names in /tmp/jsonl-files - aws s3 cp s3://codegate-data-staging/$MALICIOUS_KEY /tmp/jsonl-files/malicious.jsonl --region $AWS_REGION - aws s3 cp s3://codegate-data-staging/$DEPRECATED_KEY /tmp/jsonl-files/deprecated.jsonl --region $AWS_REGION - aws s3 cp s3://codegate-data-staging/$ARCHIVED_KEY /tmp/jsonl-files/archived.jsonl --region $AWS_REGION + aws s3 cp s3://codegate-data-prod/$MALICIOUS_KEY /tmp/jsonl-files/malicious.jsonl --region $AWS_REGION + aws s3 cp s3://codegate-data-prod/$DEPRECATED_KEY /tmp/jsonl-files/deprecated.jsonl --region $AWS_REGION + aws s3 cp s3://codegate-data-prod/$ARCHIVED_KEY /tmp/jsonl-files/archived.jsonl --region $AWS_REGION - name: Install Poetry run: |