diff --git a/.github/workflows/lambda.yml b/.github/workflows/lambda.yml index f06708b9..3019aba9 100644 --- a/.github/workflows/lambda.yml +++ b/.github/workflows/lambda.yml @@ -1,18 +1,30 @@ name: Post-Deploy Lambda - on: deployment_status: - jobs: deploy-lambdas: runs-on: ubuntu-latest + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: us-east-1 steps: - uses: actions/checkout@v3 - + with: + lfs: true + - name: Install Git LFS and pull files + run: | + sudo apt-get install -y git-lfs + git lfs install + git lfs fetch --all + git lfs checkout + - name: Set up Python 3.12 + uses: actions/setup-python@v4 + with: + python-version: "3.12" - name: Show deployment info run: | echo "Deployment environment: ${{ github.event.deployment.environment }}" - - name: Run Lambda production deploy if: > github.event.deployment.environment == 'documentcloud-prod' && @@ -20,7 +32,6 @@ jobs: run: | echo "Deploying production lambda updates" bash config/aws/lambda/codeship_deploy_lambdas.sh prod-lambda - - name: Run Lambda staging deploy if: > github.event.deployment.environment == 'documentcloud-staging' && diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cf291c04..c4fff926 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,4 @@ name: CI - on: workflow_dispatch: pull_request: @@ -7,9 +6,8 @@ on: branches: - master - staging - env: - PYENV_VERSION: '3.10' + PYENV_VERSION: '3.12' DJANGO_SECRET_KEY: ${{secrets.SECRET_KEY}} DJANGO_SETTINGS_MODULE: config.settings.test DATABASE_URL: postgres://${PGUSER}:${PGPASSWORD}@127.0.0.1:5432/test @@ -38,15 +36,10 @@ env: DOCUMENT_BUCKET: ${{secrets.DOCUMENT_BUCKET}} REDIS_PROCESSING_PASSWORD: "" SAM_CLI_TELEMETRY: 0 - - - - jobs: check: runs-on: ubuntu-latest - services: redis: image: redis @@ -63,30 +56,19 @@ jobs: submodules: recursive - uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.12" cache: "pip" # caching pip dependencies - - - name: Install pip - run: | - wget https://bootstrap.pypa.io/pip/3.6/get-pip.py - python3 get-pip.py - - name: Install run: | pip install -r requirements/local.txt - - name: Lint run: pylint documentcloud - - name: Isort - run: isort --check-only -rc documentcloud - + run: isort --check-only --diff documentcloud - name: Formatting run: black --check documentcloud --exclude migrations - test: runs-on: ubuntu-latest - services: redis: image: redis @@ -97,7 +79,6 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 5 - postgres: image: postgres env: @@ -113,28 +94,17 @@ jobs: ports: # Maps tcp port 5432 on service container to the host - 5432:5432 - steps: - - name: Checkout uses: actions/checkout@v4 - - name: Install Python uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.12" cache: "pip" # caching pip dependencies - - - name: Install pip - run: | - wget https://bootstrap.pypa.io/pip/3.6/get-pip.py - python3 get-pip.py - - name: Install requirements run: | pip install -r requirements/local.txt - - - name: Test run: pytest documentcloud -m "not solr" env: @@ -142,4 +112,3 @@ jobs: PG_USER: test PG_PASSWORD: ${{ secrets.PG_PASSWORD }} DATABASE_URL: postgres://test:postgres@127.0.0.1:5432/test - diff --git a/.gitignore b/.gitignore index b1d474b4..83882349 100644 --- a/.gitignore +++ b/.gitignore @@ -306,3 +306,4 @@ CLAUDE.md .claude rootCA.pem +tests.txt diff --git a/.pylintrc b/.pylintrc index 151fce40..69a4dd17 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,7 +1,6 @@ [MASTER] -# https://stackoverflow.com/a/39207275/10952222 -init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.join(os.path.dirname(find_pylintrc()), 'documentcloud'))" -load-plugins=pylint_django, pylint_celery +init-hook="import sys, os; sys.path.insert(0, '/app'); os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.local')" +load-plugins=pylint_django ignore=migrations [FORMAT] @@ -11,7 +10,7 @@ max-args=6 [MESSAGES CONTROL] enable=useless-suppression -disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured +disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,raise-missing-from,django-not-configured,wrong-import-order,wrong-import-position [TYPECHECK] generated-members=REQUEST,acl_users,aq_parent,"[a-zA-Z]+_set{1,2}",save,delete diff --git a/README.md b/README.md index baffe897..7b8f77de 100644 --- a/README.md +++ b/README.md @@ -56,10 +56,7 @@ You must first have these set up and ready to go: ``` 12. Go to [Django admin for DocumentCloud](https://api.dev.documentcloud.org/admin) and add the required static [flat page](https://api.dev.documentcloud.org/admin/flatpages/flatpage/) called `/tipofday/`. It can be blank. Do not prefix the URL with `/pages/`. Specifying the `Site` as `example.com` is alright. 13. Create an initial Minio bucket to simulate AWS S3 locally: - - Reference your DocumentCloud `.django` file for these variables: - - Visit the `MINIO_URL` with a browser, likely at [this address](http://minio.documentcloud.org:9000), and login with the minio `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` - - At the bottom right corner click the round plus button and then click the first circle that appears above it to "create bucket". - - Create a bucket called `documents` + - Run `inv initialize-minio` 14. Upload a document: - **Check your memory allocation on Docker is at least 7gb.** A sign that you do not have enough memory allocated is if containers are randomly failing or if your system is swapping heavily, especially when uploading documents. - The "upload" button should not be grayed out (if it is, check your user organization Verified Journalist status above) diff --git a/compose/local/django/Dockerfile b/compose/local/django/Dockerfile index b82c1d06..e895be46 100644 --- a/compose/local/django/Dockerfile +++ b/compose/local/django/Dockerfile @@ -1,33 +1,27 @@ -# FROM python:3.6-alpine -FROM matthewfeickert/docker-python3-ubuntu:3.10.5 - +FROM python:3.12-slim ENV PYTHONUNBUFFERED 1 - USER root RUN apt-get -qq -y update && \ apt-get -qq -y install \ - # Pip dependencies - python3-pip \ + # Build dependencies + build-essential \ # Postgres dependencies libpq-dev postgresql-client python3-psycopg2 \ # Tesseract dependencies - libjpeg-turbo8 libtiff5 \ + libjpeg62-turbo libtiff6 \ # LibreOffice dependencies - libnss3-dev libcurl4-nss-dev libxslt1-dev libpixman-1-0 libxcb-render0-dev && \ - # Symlink bash and python - ln -sf bash /bin/sh && rm -f /usr/bin/python && \ - ln -s /usr/bin/python3 /usr/bin/python && \ - curl https://bootstrap.pypa.io/pip/3.6/get-pip.py -o get-pip.py && \ - python3 get-pip.py --force-reinstall + libnss3-dev libcurl4-openssl-dev libxslt1-dev libpixman-1-0 libxcb-render0-dev \ + # Utilities + curl git # set up makecert root CA RUN curl http://localhost/rootCA.pem > /usr/local/share/ca-certificates/rootCA.crt && update-ca-certificates # Requirements are installed here to ensure they will be cached. COPY ./requirements /requirements -# RUN pip install --upgrade pip && pip install -r /requirements/local.txt -RUN pip install -r /requirements/local.txt + +RUN pip install --upgrade "pip>=26,<27" && pip install -r /requirements/local.txt COPY ./compose/production/django/entrypoint /entrypoint RUN sed -i 's/\r//' /entrypoint && chmod +x /entrypoint @@ -48,11 +42,8 @@ RUN sed -i 's/\r//' /start-flower && chmod +x /start-flower # Entry point # =-=-=-=-=-= -# Temporary measure to get pip-compile to work -# RUN pip install 'pip<19.2' - WORKDIR /app ENV LD_LIBRARY_PATH /app/documentcloud/documents/processing/ocr/tesseract -ENTRYPOINT ["/entrypoint"] +ENTRYPOINT ["/entrypoint"] \ No newline at end of file diff --git a/compose/production/django/Dockerfile b/compose/production/django/Dockerfile index 055b12be..874943e9 100644 --- a/compose/production/django/Dockerfile +++ b/compose/production/django/Dockerfile @@ -1,23 +1,20 @@ - -FROM python:3.6-alpine - +FROM python:3.12-slim ENV PYTHONUNBUFFERED 1 -RUN apk update \ +RUN apt-get -qq -y update && apt-get -qq -y install \ # psycopg2 dependencies - && apk add --virtual build-deps gcc python3-dev musl-dev \ - && apk add postgresql-dev \ + gcc python3-dev libpq-dev \ # Pillow dependencies - && apk add jpeg-dev zlib-dev freetype-dev lcms2-dev openjpeg-dev tiff-dev tk-dev tcl-dev \ + libjpeg62-turbo zlib1g-dev libfreetype6-dev liblcms2-dev libopenjp2-7-dev libtiff6 tk-dev tcl-dev \ # CFFI dependencies - && apk add libffi-dev py-cffi + libffi-dev python3-cffi -RUN addgroup -S django \ - && adduser -S -G django django +RUN groupadd -r django \ + && useradd -r -g django django -# Requirements are installed here to ensure they will be cached. +# Requirements are installed here to ensure they will be cached. COPY ./requirements /requirements -RUN pip install --no-cache-dir -r /requirements/production.txt \ +RUN pip install --upgrade "pip>=26,<27" && pip install --no-cache-dir -r /requirements/production.txt \ && rm -rf /requirements COPY ./compose/production/django/entrypoint /entrypoint diff --git a/config/aws/lambda/cloud-requirements.txt b/config/aws/lambda/cloud-requirements.txt index 8ac82d03..f6d55e7b 100644 --- a/config/aws/lambda/cloud-requirements.txt +++ b/config/aws/lambda/cloud-requirements.txt @@ -1,2 +1,3 @@ -boto3==1.10.14 -smart-open==1.8.4 +boto3==1.40.61 +smart-open==7.5.1 +setuptools<81 diff --git a/config/aws/lambda/info-and-image-requirements.txt b/config/aws/lambda/info-and-image-requirements.txt index b0d872dc..d04b85bf 100644 --- a/config/aws/lambda/info-and-image-requirements.txt +++ b/config/aws/lambda/info-and-image-requirements.txt @@ -1,4 +1,5 @@ -boto3==1.21.21 -smart-open==1.8.4 -pdfplumber==0.5.28 -pikepdf==5.4.0 +boto3==1.40.61 +smart-open==7.5.1 +pdfplumber==0.11.9 +pikepdf==10.5.1 +setuptools<81 diff --git a/config/aws/lambda/template_params.yaml b/config/aws/lambda/template_params.yaml index 197738f5..8e03318d 100644 --- a/config/aws/lambda/template_params.yaml +++ b/config/aws/lambda/template_params.yaml @@ -56,7 +56,7 @@ Resources: ProcessDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.process_doc CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -74,7 +74,7 @@ Resources: GetProgressFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.get_progress CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -92,7 +92,7 @@ Resources: ImportDocumentsFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.import_documents CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -110,7 +110,7 @@ Resources: SidekickFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.sidekick CodeUri: ./awsbin/utils # Trigger function via HTTP @@ -128,7 +128,7 @@ Resources: ProcessPdfFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.process_pdf CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -145,7 +145,7 @@ Resources: ProcessPageCacheFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.process_page_cache CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -162,7 +162,7 @@ Resources: ExtractImageFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.extract_image CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -179,7 +179,7 @@ Resources: RedactDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.redact_doc CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -196,7 +196,7 @@ Resources: RunTesseractFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.run_tesseract CodeUri: ./awsbin/ocr Environment: @@ -215,7 +215,7 @@ Resources: ConvertDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.run_document_conversion CodeUri: ./awsbin/document_conversion # Trigger function via SNS @@ -232,7 +232,7 @@ Resources: AssembleTextFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.assemble_page_text CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -249,7 +249,7 @@ Resources: ExtractTextFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.extract_text_position CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -266,7 +266,7 @@ Resources: StartImportFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.start_import CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -283,7 +283,7 @@ Resources: ImportDocumentFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.import_document CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -300,7 +300,7 @@ Resources: FinishImportFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.finish_import CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -317,7 +317,7 @@ Resources: ModifyDocFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.modify_doc CodeUri: ./awsbin/info_and_image # Trigger function via SNS @@ -334,7 +334,7 @@ Resources: SidekickPreprocessFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.preprocess # https://github.com/model-zoo/scikit-learn-lambda/blob/master/layers.csv Layers: ["arn:aws:lambda:us-east-1:446751924810:layer:python-3-7-scikit-learn-0-23-1:2"] @@ -353,7 +353,7 @@ Resources: RetryErrorsFunction: Type: AWS::Serverless::Function Properties: - Runtime: python3.10 + Runtime: python3.12 Handler: main.retry_errors CodeUri: ./awsbin/utils # Trigger function via SNS diff --git a/config/settings/base.py b/config/settings/base.py index 3106e21f..bcfab1f2 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -93,7 +93,6 @@ "documentcloud.addons.apps.AddOnsConfig", "documentcloud.projects.apps.ProjectsConfig", "documentcloud.statistics.apps.StatisticsConfig", - "documentcloud.sidekick.apps.SidekickConfig", "documentcloud.users.apps.UsersConfig", "documentcloud.entities.apps.EntitiesConfig", ] @@ -464,7 +463,7 @@ BASE_URL = DOCCLOUD_URL PUBLIC_ASSET_URL = env( - "PUBLIC_ASSET_URL", default="http://minio.documentcloud.org:9000/documents/" + "PUBLIC_ASSET_URL", default="https://minio.documentcloud.org/documents/" ) PRIVATE_ASSET_URL = env("PRIVATE_ASSET_URL", default=f"{DOCCLOUD_API_URL}/files/") @@ -497,7 +496,6 @@ PROGRESS_URL = env("PROGRESS_URL", default="") IMPORT_URL = env("IMPORT_URL", default="") PROGRESS_TIMEOUT = env.int("PROGRESS_TIMEOUT", default=1) -SIDEKICK_PROCESSING_URL = env("SIDEKICK_PROCESSING_URL", default="") # Auth LOGIN_URL = "/accounts/login/squarelet" diff --git a/config/settings/local.py b/config/settings/local.py index a9cfe46e..7538f9f5 100644 --- a/config/settings/local.py +++ b/config/settings/local.py @@ -53,7 +53,7 @@ "debug_toolbar.panels.profiling.ProfilingPanel", ], "SHOW_TEMPLATE_CONTEXT": True, - "SHOW_TOOLBAR_CALLBACK": lambda _: True, + "SHOW_TOOLBAR_CALLBACK": lambda _: False, } # Celery diff --git a/config/settings/production.py b/config/settings/production.py index 568f72f3..84332a60 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -147,11 +147,6 @@ class StaticRootS3Boto3Storage(S3Boto3Storage): COMPRESS_STORAGE = "storages.backends.s3boto3.S3Boto3Storage" # https://django-compressor.readthedocs.io/en/latest/settings/#django.conf.settings.COMPRESS_URL COMPRESS_URL = STATIC_URL -# Collectfast -# ------------------------------------------------------------------------------ -# https://github.com/antonagestam/collectfast#installation -INSTALLED_APPS = ["collectfast"] + INSTALLED_APPS # noqa F405 -COLLECTFAST_STRATEGY = "collectfast.strategies.boto3.Boto3Strategy" # LOGGING # ------------------------------------------------------------------------------ diff --git a/config/urls.py b/config/urls.py index 37ebacc1..0727eded 100644 --- a/config/urls.py +++ b/config/urls.py @@ -49,8 +49,6 @@ ProjectMembershipViewSet, ProjectViewSet, ) -from documentcloud.sidekick.routers import SidekickRouter -from documentcloud.sidekick.views import SidekickViewSet from documentcloud.statistics.views import StatisticsViewSet from documentcloud.users.views import MessageView, UserViewSet @@ -76,8 +74,12 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): documents_router.register("notes", NoteViewSet) documents_router.register("sections", SectionViewSet) documents_router.register("entities", EntityOccurrenceViewSet) -documents_router.register("legacy_entities_2", LegacyEntity2ViewSet) -documents_router.register("legacy_entities", LegacyEntityViewSet) +documents_router.register( + "legacy_entities_2", LegacyEntity2ViewSet, basename="legacyentity2" +) +documents_router.register( + "legacy_entities", LegacyEntityViewSet, basename="legacyentity" +) documents_router.register("dates", EntityDateViewSet) documents_router.register("errors", DocumentErrorViewSet) documents_router.register("data", DataViewSet, basename="data") @@ -90,9 +92,6 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): projects_router.register("documents", ProjectMembershipViewSet) projects_router.register("users", CollaborationViewSet) -sidekick_router = SidekickRouter(router, "projects", lookup="project") -sidekick_router.register("sidekick", SidekickViewSet) - router.register("documents/search/saved", SavedSearchViewSet, basename="saved_search") urlpatterns = [ @@ -101,7 +100,6 @@ class BulkNestedDefaultRouter(BulkRouterMixin, NestedDefaultRouter): path("api/", include(router.urls)), path("api/", include(documents_router.urls)), path("api/", include(projects_router.urls)), - path("api/", include(sidekick_router.urls)), path("api/schema/", SpectacularAPIView.as_view(), name="schema"), path( "api/schema/redoc/", diff --git a/documentcloud/addons/choices.py b/documentcloud/addons/choices.py index 031b9e85..c4d2094a 100644 --- a/documentcloud/addons/choices.py +++ b/documentcloud/addons/choices.py @@ -1,14 +1,20 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class Event(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class Event(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API - disabled = ChoiceItem(0, _("Disabled"), api=True) - hourly = ChoiceItem(1, _("Hourly"), api=True) - daily = ChoiceItem(2, _("Daily"), api=True) - weekly = ChoiceItem(3, _("Weekly"), api=True) - upload = ChoiceItem(4, _("Upload"), api=True) + # pylint:disable = invalid-name + disabled = 0, _("Disabled"), True + hourly = 1, _("Hourly"), True + daily = 2, _("Daily"), True + weekly = 3, _("Weekly"), True + upload = 4, _("Upload"), True diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 71884a8e..ac640364 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -34,6 +34,7 @@ logger = logging.getLogger(__name__) +# pylint:disable=too-many-positional-arguments class AddOn(models.Model): objects = AddOnQuerySet.as_manager() @@ -190,6 +191,7 @@ def dispatch(self, uuid, user, documents, query, parameters, event_id): f"{self.api_url}/dispatches", headers=self.api_headers, json={"event_type": self.name, "client_payload": payload}, + timeout=10, ) resp.raise_for_status() @@ -198,6 +200,7 @@ def update_config(self): resp = requests.get( f"{self.api_url}/contents/config.yaml", headers={**self.api_headers, "Accept": "application/vnd.github.v3.raw"}, + timeout=10, ) if resp.status_code == 404: self.error = True @@ -388,7 +391,7 @@ def find_run_id(self): url = f"{self.addon.api_url}/actions/runs?created=%3E{date_filter}" while url is not None: logger.info("[FIND RUN ID] get %s", url) - resp = requests.get(url, headers=self.addon.api_headers) + resp = requests.get(url, headers=self.addon.api_headers, timeout=10) resp.raise_for_status() url = resp.links.get("next", {}).get("url") resp_json = resp.json() @@ -411,7 +414,9 @@ def find_run_id(self): else: continue - resp = requests.get(jobs_url, headers=self.addon.api_headers) + resp = requests.get( + jobs_url, headers=self.addon.api_headers, timeout=10 + ) resp.raise_for_status() jobs = resp.json()["jobs"] @@ -443,6 +448,7 @@ def set_status(self): resp = requests.get( f"{self.addon.api_url}/actions/runs/{self.run_id}", headers=self.addon.api_headers, + timeout=10, ) if resp.status_code != 200: logger.info( @@ -457,7 +463,7 @@ def set_status(self): # if we failed, check the job status to check for 'cancelled' # which means it timed out resp = requests.get( - resp.json()["jobs_url"], headers=self.addon.api_headers + resp.json()["jobs_url"], headers=self.addon.api_headers, timeout=10 ) if resp.status_code == 200 and len(resp.json()["jobs"]) > 0: status = resp.json()["jobs"][0]["conclusion"] @@ -518,6 +524,7 @@ def cancel(self): resp = requests.post( f"{self.addon.api_url}/actions/runs/{self.run_id}/cancel", headers=self.addon.api_headers, + timeout=10, ) if resp.status_code == 202: return "succeed" @@ -735,6 +742,7 @@ def token(self): "https://api.github.com/app/installations/" f"{self.iid}/access_tokens", headers=headers, + timeout=10, ) resp = resp.json() token = resp["token"] diff --git a/documentcloud/addons/tasks.py b/documentcloud/addons/tasks.py index bbd203a3..d2d4c5b9 100644 --- a/documentcloud/addons/tasks.py +++ b/documentcloud/addons/tasks.py @@ -62,6 +62,7 @@ def set_run_status(uuid): set_run_status.apply_async(args=[uuid], countdown=5) +# pylint:disable=too-many-positional-arguments @shared_task def dispatch(addon_id, uuid, user_id, documents, query, parameters, event_id=None): # pylint: disable=too-many-arguments diff --git a/documentcloud/addons/tests/test_views.py b/documentcloud/addons/tests/test_views.py index 21a285f4..1062d242 100644 --- a/documentcloud/addons/tests/test_views.py +++ b/documentcloud/addons/tests/test_views.py @@ -266,12 +266,12 @@ def test_update_no_addon(self, client): def test_destroy(self, client, mocker): """Destroying an addon run cancels it""" - cancel = mocker.patch.object(AddOnRun, "cancel") + cancel = mocker.patch("documentcloud.addons.views.cancel.delay") run = AddOnRunFactory() client.force_authenticate(user=run.user) response = client.delete(f"/api/addon_runs/{run.uuid}/") assert response.status_code == status.HTTP_204_NO_CONTENT - assert cancel.called_once() + cancel.assert_called_once() def test_filter_site(self, client): """Filter runs by event parameters.site""" diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index 5639626c..6a8a6b7e 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -1021,6 +1021,7 @@ def verify_signature(request): uid=data["sender"]["id"], defaults={"name": data["sender"]["login"]} ) if data.get("action") in ["added", "created"]: + repos = [] logger.info("[GITHUB WEBHOOK] %s", data["action"]) installation, _created = GitHubInstallation.objects.get_or_create( iid=data["installation"]["id"], @@ -1039,11 +1040,11 @@ def verify_signature(request): with transaction.atomic(): AddOn.objects.update_or_create( repository=repo["full_name"], - defaults=dict( - github_account=acct, - github_installation=installation, - removed=False, - ), + defaults={ + "github_account": acct, + "github_installation": installation, + "removed": False, + }, ) transaction.on_commit( lambda r=repo: update_config.delay(r["full_name"]) @@ -1182,7 +1183,7 @@ def get(self, request, *args, **kwargs): url += "/" url += kwargs.get("path", "") - response = requests.get(url) + response = requests.get(url, timeout=(10, 30)) return HttpResponse( content=response.content, status=response.status_code, diff --git a/documentcloud/common/environment/aws/processing_token.py b/documentcloud/common/environment/aws/processing_token.py index 739f9d02..69c0068b 100644 --- a/documentcloud/common/environment/aws/processing_token.py +++ b/documentcloud/common/environment/aws/processing_token.py @@ -17,7 +17,7 @@ def authenticate_token(*args, **kwargs): headers = event["headers"] if headers.get(AUTHORIZATION) != f"processing-token {PROCESSING_TOKEN}": - raise Exception("Authentication Failed.") + raise PermissionError("Authentication Failed.") # If all passes, auth succeeded return func(*args, **kwargs) diff --git a/documentcloud/common/environment/aws/storage.py b/documentcloud/common/environment/aws/storage.py index 812baab8..d4f5421b 100644 --- a/documentcloud/common/environment/aws/storage.py +++ b/documentcloud/common/environment/aws/storage.py @@ -58,23 +58,29 @@ def size(self, file_name): return bucket.Object(key).content_length def open(self, file_name, mode="rb", content_type=None, access=None): - + # This logic changed with smart_open 5.0 + # https://github.com/piskvorky/smart_open/blob/develop/CHANGELOG.md#500-30-mar-2021 + # See migration guide here: + # https://github.com/piskvorky/smart_open/blob/develop/MIGRATING_FROM_OLDER_VERSIONS.rst transport_params = { - "resource_kwargs": self.resource_kwargs, - "multipart_upload_kwargs": {}, + "client": self.s3_client, } - - if content_type is None: - # attempt to guess content type if not specified - content_type = mimetypes.guess_type(file_name)[0] - - if content_type is not None: - # set content type if we have one - transport_params["multipart_upload_kwargs"]["ContentType"] = content_type - - if access is not None: - transport_params["multipart_upload_kwargs"]["ACL"] = ACLS[access] - + if "w" in mode: # Setting these kwargs only make sense in a write context + writeable_kwargs = {} + if content_type is None: + # attempt to guess content type if not specified + content_type = mimetypes.guess_type(file_name)[0] + if content_type is not None: + # set content type if we have one + writeable_kwargs["ContentType"] = content_type + if access is not None: + writeable_kwargs["ACL"] = ACLS[access] + if writeable_kwargs: + # Guard against no writeable kwargs provided + transport_params["client_kwargs"] = { + "S3.Client.put_object": writeable_kwargs, + "S3.Client.create_multipart_upload": writeable_kwargs, + } return smart_open.open( f"s3://{file_name}", mode, transport_params=transport_params ) @@ -196,7 +202,7 @@ def exists(self, file_name): def fetch_url(self, url, file_name, access, auth=None): with self.open(file_name, "wb", access=access) as out_file, requests.get( - url, stream=True, auth=auth + url, stream=True, auth=auth, timeout=(10, 60) ) as response: response.raise_for_status() for chunk in response.iter_content(chunk_size=10 * 1024 * 1024): diff --git a/documentcloud/common/environment/gcp/httpsub.py b/documentcloud/common/environment/gcp/httpsub.py index f37ba4ab..02a7040a 100644 --- a/documentcloud/common/environment/gcp/httpsub.py +++ b/documentcloud/common/environment/gcp/httpsub.py @@ -1,4 +1,4 @@ -# pylint: disable=unused-import +# pylint: disable=unused-import, import-error # Third Party from common import session as httpsub diff --git a/documentcloud/common/environment/local/pubsub.py b/documentcloud/common/environment/local/pubsub.py index 5970808d..170ddabf 100644 --- a/documentcloud/common/environment/local/pubsub.py +++ b/documentcloud/common/environment/local/pubsub.py @@ -129,13 +129,6 @@ def finish_import_task(data): return finish_import_process.delay(data) -def sidekick_preprocess_task(data): - # DocumentCloud - from documentcloud.sidekick.tasks import sidekick_preprocess - - return sidekick_preprocess.delay(data) - - def retry_errors_task(data): # DocumentCloud from documentcloud.documents.tasks import retry_errors_local @@ -191,13 +184,6 @@ def retry_errors_task(data): ("documentcloud", env.str("FINISH_IMPORT_TOPIC", default="finish-import")), finish_import_task, ) -publisher.register_internal_callback( - ( - "documentcloud", - env.str("SIDEKICK_PREPROCESS_TOPIC", default="sidekick-preprocess-topic"), - ), - sidekick_preprocess_task, -) publisher.register_internal_callback( ("documentcloud", env.str("RETRY_ERROR_TOPIC", default="retry-error-topic")), retry_errors_task, diff --git a/documentcloud/common/environment/minio/storage.py b/documentcloud/common/environment/minio/storage.py index a2cc3c18..25781c67 100644 --- a/documentcloud/common/environment/minio/storage.py +++ b/documentcloud/common/environment/minio/storage.py @@ -13,8 +13,8 @@ def __init__(self, resource_kwargs=None, minio=True): if resource_kwargs is None: resource_kwargs = { "endpoint_url": env.str("MINIO_URL"), - "aws_access_key_id": env.str("MINIO_ACCESS_KEY"), - "aws_secret_access_key": env.str("MINIO_SECRET_KEY"), + "aws_access_key_id": env.str("MINIO_ROOT_USER"), + "aws_secret_access_key": env.str("MINIO_ROOT_PASSWORD"), "config": Config(signature_version="s3v4"), "region_name": "us-east-1", } diff --git a/documentcloud/common/serverless/tests/test_error_handling.py b/documentcloud/common/serverless/tests/test_error_handling.py index 23c0a8eb..db26b324 100644 --- a/documentcloud/common/serverless/tests/test_error_handling.py +++ b/documentcloud/common/serverless/tests/test_error_handling.py @@ -11,7 +11,6 @@ # Third Party import pytest -from sharedmock.mock import SharedMock # DocumentCloud from documentcloud.common import redis_fields @@ -27,6 +26,7 @@ StorageHandler, Workspace, ) +from sharedmock.mock import SharedMock # Since redis is used in the SharedMock calls, it needs to be pickle-able # in order to be sent across the process boundary. FakeRedis and Mock's both diff --git a/documentcloud/common/serverless/utils.py b/documentcloud/common/serverless/utils.py index 6bed1376..00782dcd 100644 --- a/documentcloud/common/serverless/utils.py +++ b/documentcloud/common/serverless/utils.py @@ -21,7 +21,7 @@ env = environ.Env() -# pylint: disable=import-error +# pylint: disable=import-error, too-many-positional-arguments if not env.str("ENVIRONMENT").startswith("local"): # in production, log errors to sentry diff --git a/documentcloud/conftest.py b/documentcloud/conftest.py index b2c7962f..9a81b0a0 100644 --- a/documentcloud/conftest.py +++ b/documentcloud/conftest.py @@ -24,12 +24,12 @@ from documentcloud.users.tests.factories import UserFactory -def pytest_ignore_collect(path, config): +def pytest_ignore_collect(collection_path, config): """Do not recurse into symlinks when collecting tests Used to ignore symlinks we have in processing to the common module """ # pylint: disable=unused-argument - return path.isdir() and path.islink() + return collection_path.is_dir() and collection_path.is_symlink() @pytest.fixture diff --git a/documentcloud/core/choices.py b/documentcloud/core/choices.py index 6acd9462..21fd5c18 100644 --- a/documentcloud/core/choices.py +++ b/documentcloud/core/choices.py @@ -1,122 +1,117 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class Language(models.TextChoices): + def __new__(cls, value, label=None, ocr_code=None): + obj = str.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.ocr_code = ocr_code if ocr_code is not None else value + return obj -class Language(DjangoChoices): - afrikaans = ChoiceItem("afr", _("Afrikaans"), ocr_code="afr") - amharic = ChoiceItem("amh", _("Amharic"), ocr_code="amh") - arabic = ChoiceItem("ara", _("Arabic"), ocr_code="ara") - assamese = ChoiceItem("asm", _("Assamese"), ocr_code="asm") - azerbaijani = ChoiceItem("aze", _("Azerbaijani"), ocr_code="aze") - azerbaijani_cyrillic = ChoiceItem( - "aze_cyrl", _("Azerbaijani - Cyrillic"), ocr_code="aze_cyrl" - ) - belarusian = ChoiceItem("bel", _("Belarusian"), ocr_code="bel") - bengali = ChoiceItem("ben", _("Bengali"), ocr_code="ben") - tibetan = ChoiceItem("bod", _("Tibetan"), ocr_code="bod") - bosnian = ChoiceItem("bos", _("Bosnian"), ocr_code="bos") - bulgarian = ChoiceItem("bul", _("Bulgarian"), ocr_code="bul") - catalan_valencian = ChoiceItem("cat", _("Catalan; Valencian"), ocr_code="cat") - cebuano = ChoiceItem("ceb", _("Cebuano"), ocr_code="ceb") - czech = ChoiceItem("ces", _("Czech"), ocr_code="ces") - chinese_simplified = ChoiceItem( - "zho", _("Chinese - Simplified"), ocr_code="chi_sim" - ) - chinese_traditional = ChoiceItem( - "tra", _("Chinese - Traditional"), ocr_code="chi_tra" - ) - cherokee = ChoiceItem("chr", _("Cherokee"), ocr_code="chr") - welsh = ChoiceItem("cym", _("Welsh"), ocr_code="cym") - danish = ChoiceItem("dan", _("Danish"), ocr_code="dan") - german = ChoiceItem("deu", _("German"), ocr_code="deu") - dzongkha = ChoiceItem("dzo", _("Dzongkha"), ocr_code="dzo") - greek = ChoiceItem("ell", _("Greek"), ocr_code="ell") - english = ChoiceItem("eng", _("English"), ocr_code="eng") - middle_english = ChoiceItem("enm", _("Middle English"), ocr_code="enm") - esperanto = ChoiceItem("epo", _("Esperanto"), ocr_code="epo") - estonian = ChoiceItem("est", _("Estonian"), ocr_code="est") - basque = ChoiceItem("eus", _("Basque"), ocr_code="eus") - persian = ChoiceItem("fas", _("Persian"), ocr_code="fas") - finnish = ChoiceItem("fin", _("Finnish"), ocr_code="fin") - french = ChoiceItem("fra", _("French"), ocr_code="fra") - german_fraktur = ChoiceItem("frk", _("German Fraktur"), ocr_code="frk") - middle_french = ChoiceItem("frm", _("Middle French"), ocr_code="frm") - irish = ChoiceItem("gle", _("Irish"), ocr_code="gle") - galician = ChoiceItem("glg", _("Galician"), ocr_code="glg") - ancient_greek = ChoiceItem("grc", _("Ancient Greek"), ocr_code="grc") - gujarati = ChoiceItem("guj", _("Gujarati"), ocr_code="guj") - haitian_haitian_creole = ChoiceItem( - "hat", _("Haitian; Haitian Creole"), ocr_code="hat" - ) - hebrew = ChoiceItem("heb", _("Hebrew"), ocr_code="heb") - hindi = ChoiceItem("hin", _("Hindi"), ocr_code="hin") - croatian = ChoiceItem("hrv", _("Croatian"), ocr_code="hrv") - hungarian = ChoiceItem("hun", _("Hungarian"), ocr_code="hun") - inuktitut = ChoiceItem("iku", _("Inuktitut"), ocr_code="iku") - indonesian = ChoiceItem("ind", _("Indonesian"), ocr_code="ind") - icelandic = ChoiceItem("isl", _("Icelandic"), ocr_code="isl") - italian = ChoiceItem("ita", _("Italian"), ocr_code="ita") - italian_old = ChoiceItem("ita_old", _("Italian - Old"), ocr_code="ita_old") - javanese = ChoiceItem("jav", _("Javanese"), ocr_code="jav") - japanese = ChoiceItem("jpn", _("Japanese"), ocr_code="jpn") - kannada = ChoiceItem("kan", _("Kannada"), ocr_code="kan") - georgian = ChoiceItem("kat", _("Georgian"), ocr_code="kat") - georgian_old = ChoiceItem("kat_old", _("Georgian - Old"), ocr_code="kat_old") - kazakh = ChoiceItem("kaz", _("Kazakh"), ocr_code="kaz") - central_khmer = ChoiceItem("khm", _("Central Khmer"), ocr_code="khm") - kirghiz_kyrgyz = ChoiceItem("kir", _("Kirghiz; Kyrgyz"), ocr_code="kir") - korean = ChoiceItem("kor", _("Korean"), ocr_code="kor") - kurdish = ChoiceItem("kur", _("Kurdish"), ocr_code="kur") - lao = ChoiceItem("lao", _("Lao"), ocr_code="lao") - latin = ChoiceItem("lat", _("Latin"), ocr_code="lat") - latvian = ChoiceItem("lav", _("Latvian"), ocr_code="lav") - lithuanian = ChoiceItem("lit", _("Lithuanian"), ocr_code="lit") - malayalam = ChoiceItem("mal", _("Malayalam"), ocr_code="mal") - marathi = ChoiceItem("mar", _("Marathi"), ocr_code="mar") - macedonian = ChoiceItem("mkd", _("Macedonian"), ocr_code="mkd") - maltese = ChoiceItem("mlt", _("Maltese"), ocr_code="mlt") - malay = ChoiceItem("msa", _("Malay"), ocr_code="msa") - burmese = ChoiceItem("mya", _("Burmese"), ocr_code="mya") - nepali = ChoiceItem("nep", _("Nepali"), ocr_code="nep") - dutch_flemish = ChoiceItem("nld", _("Dutch; Flemish"), ocr_code="nld") - norwegian = ChoiceItem("nor", _("Norwegian"), ocr_code="nor") - oriya = ChoiceItem("ori", _("Oriya"), ocr_code="ori") - panjabi_punjabi = ChoiceItem("pan", _("Panjabi; Punjabi"), ocr_code="pan") - polish = ChoiceItem("pol", _("Polish"), ocr_code="pol") - portuguese = ChoiceItem("por", _("Portuguese"), ocr_code="por") - pushto_pashto = ChoiceItem("pus", _("Pushto; Pashto"), ocr_code="pus") - romanian_moldavian_moldovan = ChoiceItem( - "ron", _("Romanian; Moldavian; Moldovan"), ocr_code="ron" - ) - russian = ChoiceItem("rus", _("Russian"), ocr_code="rus") - sanskrit = ChoiceItem("san", _("Sanskrit"), ocr_code="san") - sinhala_sinhalese = ChoiceItem("sin", _("Sinhala; Sinhalese"), ocr_code="sin") - slovak = ChoiceItem("slk", _("Slovak"), ocr_code="slk") - slovenian = ChoiceItem("slv", _("Slovenian"), ocr_code="slv") - spanish_castilian = ChoiceItem("spa", _("Spanish; Castilian"), ocr_code="spa") - spanish_castilian_old = ChoiceItem( - "spa_old", _("Spanish; Castilian - Old"), ocr_code="spa_old" - ) - albanian = ChoiceItem("sqi", _("Albanian"), ocr_code="sqi") - serbian = ChoiceItem("srp", _("Serbian"), ocr_code="srp") - serbian_latin = ChoiceItem("srp_latn", _("Serbian - Latin"), ocr_code="srp_latn") - swahili = ChoiceItem("swa", _("Swahili"), ocr_code="swa") - swedish = ChoiceItem("swe", _("Swedish"), ocr_code="swe") - syriac = ChoiceItem("syr", _("Syriac"), ocr_code="syr") - tamil = ChoiceItem("tam", _("Tamil"), ocr_code="tam") - telugu = ChoiceItem("tel", _("Telugu"), ocr_code="tel") - tajik = ChoiceItem("tgk", _("Tajik"), ocr_code="tgk") - tagalog = ChoiceItem("tgl", _("Tagalog"), ocr_code="tgl") - thai = ChoiceItem("tha", _("Thai"), ocr_code="tha") - tigrinya = ChoiceItem("tir", _("Tigrinya"), ocr_code="tir") - turkish = ChoiceItem("tur", _("Turkish"), ocr_code="tur") - uighur_uyghur = ChoiceItem("uig", _("Uighur; Uyghur"), ocr_code="uig") - ukrainian = ChoiceItem("ukr", _("Ukrainian"), ocr_code="ukr") - urdu = ChoiceItem("urd", _("Urdu"), ocr_code="urd") - uzbek = ChoiceItem("uzb", _("Uzbek"), ocr_code="uzb") - uzbek_cyrillic = ChoiceItem("uzb_cyrl", _("Uzbek - Cyrillic"), ocr_code="uzb_cyrl") - vietnamese = ChoiceItem("vie", _("Vietnamese"), ocr_code="vie") - yiddish = ChoiceItem("yid", _("Yiddish"), ocr_code="yid") + # pylint: disable = invalid-name + afrikaans = "afr", _("Afrikaans") + amharic = "amh", _("Amharic") + arabic = "ara", _("Arabic") + assamese = "asm", _("Assamese") + azerbaijani = "aze", _("Azerbaijani") + azerbaijani_cyrillic = "aze_cyrl", _("Azerbaijani - Cyrillic") + belarusian = "bel", _("Belarusian") + bengali = "ben", _("Bengali") + tibetan = "bod", _("Tibetan") + bosnian = "bos", _("Bosnian") + bulgarian = "bul", _("Bulgarian") + catalan_valencian = "cat", _("Catalan; Valencian") + cebuano = "ceb", _("Cebuano") + czech = "ces", _("Czech") + chinese_simplified = "zho", _("Chinese - Simplified"), "chi_sim" + chinese_traditional = "tra", _("Chinese - Traditional"), "chi_tra" + cherokee = "chr", _("Cherokee") + welsh = "cym", _("Welsh") + danish = "dan", _("Danish") + german = "deu", _("German") + dzongkha = "dzo", _("Dzongkha") + greek = "ell", _("Greek") + english = "eng", _("English") + middle_english = "enm", _("Middle English") + esperanto = "epo", _("Esperanto") + estonian = "est", _("Estonian") + basque = "eus", _("Basque") + persian = "fas", _("Persian") + finnish = "fin", _("Finnish") + french = "fra", _("French") + german_fraktur = "frk", _("German Fraktur") + middle_french = "frm", _("Middle French") + irish = "gle", _("Irish") + galician = "glg", _("Galician") + ancient_greek = "grc", _("Ancient Greek") + gujarati = "guj", _("Gujarati") + haitian_haitian_creole = "hat", _("Haitian; Haitian Creole") + hebrew = "heb", _("Hebrew") + hindi = "hin", _("Hindi") + croatian = "hrv", _("Croatian") + hungarian = "hun", _("Hungarian") + inuktitut = "iku", _("Inuktitut") + indonesian = "ind", _("Indonesian") + icelandic = "isl", _("Icelandic") + italian = "ita", _("Italian") + italian_old = "ita_old", _("Italian - Old") + javanese = "jav", _("Javanese") + japanese = "jpn", _("Japanese") + kannada = "kan", _("Kannada") + georgian = "kat", _("Georgian") + georgian_old = "kat_old", _("Georgian - Old") + kazakh = "kaz", _("Kazakh") + central_khmer = "khm", _("Central Khmer") + kirghiz_kyrgyz = "kir", _("Kirghiz; Kyrgyz") + korean = "kor", _("Korean") + kurdish = "kur", _("Kurdish") + lao = "lao", _("Lao") + latin = "lat", _("Latin") + latvian = "lav", _("Latvian") + lithuanian = "lit", _("Lithuanian") + malayalam = "mal", _("Malayalam") + marathi = "mar", _("Marathi") + macedonian = "mkd", _("Macedonian") + maltese = "mlt", _("Maltese") + malay = "msa", _("Malay") + burmese = "mya", _("Burmese") + nepali = "nep", _("Nepali") + dutch_flemish = "nld", _("Dutch; Flemish") + norwegian = "nor", _("Norwegian") + oriya = "ori", _("Oriya") + panjabi_punjabi = "pan", _("Panjabi; Punjabi") + polish = "pol", _("Polish") + portuguese = "por", _("Portuguese") + pushto_pashto = "pus", _("Pushto; Pashto") + romanian_moldavian_moldovan = "ron", _("Romanian; Moldavian; Moldovan") + russian = "rus", _("Russian") + sanskrit = "san", _("Sanskrit") + sinhala_sinhalese = "sin", _("Sinhala; Sinhalese") + slovak = "slk", _("Slovak") + slovenian = "slv", _("Slovenian") + spanish_castilian = "spa", _("Spanish; Castilian") + spanish_castilian_old = "spa_old", _("Spanish; Castilian - Old") + albanian = "sqi", _("Albanian") + serbian = "srp", _("Serbian") + serbian_latin = "srp_latn", _("Serbian - Latin") + swahili = "swa", _("Swahili") + swedish = "swe", _("Swedish") + syriac = "syr", _("Syriac") + tamil = "tam", _("Tamil") + telugu = "tel", _("Telugu") + tajik = "tgk", _("Tajik") + tagalog = "tgl", _("Tagalog") + thai = "tha", _("Thai") + tigrinya = "tir", _("Tigrinya") + turkish = "tur", _("Turkish") + uighur_uyghur = "uig", _("Uighur; Uyghur") + ukrainian = "ukr", _("Ukrainian") + urdu = "urd", _("Urdu") + uzbek = "uzb", _("Uzbek") + uzbek_cyrillic = "uzb_cyrl", _("Uzbek - Cyrillic") + vietnamese = "vie", _("Vietnamese") + yiddish = "yid", _("Yiddish") diff --git a/documentcloud/core/filters.py b/documentcloud/core/filters.py index 05b971d1..a2e68485 100644 --- a/documentcloud/core/filters.py +++ b/documentcloud/core/filters.py @@ -71,12 +71,12 @@ class ModelMultipleChoiceFilter( class ChoicesFilter(django_filters.TypedMultipleChoiceFilter): """A choices filter configured to work how we want our choice filters to work - `choices` kwarg should be an instanceof DjangoChoices + `choices` kwarg should be an instance of IntegerChoices/TextChoices """ def __init__(self, *args, **kwargs): choices = kwargs.pop("choices") - kwargs["choices"] = list(choices.labels.items()) - kwargs["coerce"] = lambda x: getattr(choices, x) + kwargs["choices"] = [(member.name, member.label) for member in choices] + kwargs["coerce"] = lambda x: next(m for m in choices if m.name == x) kwargs["widget"] = QueryArrayWidget super().__init__(*args, **kwargs) diff --git a/documentcloud/core/management/commands/initialize_minio.py b/documentcloud/core/management/commands/initialize_minio.py new file mode 100644 index 00000000..40c510fb --- /dev/null +++ b/documentcloud/core/management/commands/initialize_minio.py @@ -0,0 +1,57 @@ +# Django +from django.core.management.base import BaseCommand + +# Standard Library +import json + +# Third Party +import boto3 +import environ +from botocore.client import Config +from botocore.exceptions import ClientError + +env = environ.Env() + + +class Command(BaseCommand): + help = "Initialize Minio bucket and policies for local development" + + def handle(self, *args, **options): + if env.str("ENVIRONMENT") != "local-minio": + return + + client = boto3.client( + "s3", + endpoint_url=env.str("MINIO_URL"), + aws_access_key_id=env.str("MINIO_ROOT_USER"), + aws_secret_access_key=env.str("MINIO_ROOT_PASSWORD"), + config=Config(signature_version="s3v4"), + region_name="us-east-1", + ) + + # Create bucket if it doesn't exist + try: + client.head_bucket(Bucket="documents") + self.stdout.write("Bucket already exists") + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "404": # Bucket doesn't exist, create it + client.create_bucket(Bucket="documents") + self.stdout.write("Created documents bucket") + else: + raise + + # Set public read policy + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::documents/*", + } + ], + } + client.put_bucket_policy(Bucket="documents", Policy=json.dumps(policy)) + self.stdout.write("Minio initialized successfully") diff --git a/documentcloud/core/versioning.py b/documentcloud/core/versioning.py index 2c459b3e..a743d585 100644 --- a/documentcloud/core/versioning.py +++ b/documentcloud/core/versioning.py @@ -4,7 +4,7 @@ class QueryParameterVersioning(versioning.QueryParameterVersioning): - # pylint: disable=redefined-builtin + # pylint: disable=redefined-builtin, too-many-positional-arguments def reverse( self, viewname, args=None, kwargs=None, request=None, format=None, **extra ): diff --git a/documentcloud/documents/choices.py b/documentcloud/documents/choices.py index 91bcf23d..78a3e428 100644 --- a/documentcloud/documents/choices.py +++ b/documentcloud/documents/choices.py @@ -1,55 +1,87 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class Access(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class Access(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API + # pylint: disable = invalid-name # Free and public to all. - public = ChoiceItem(0, _("Public"), api=True) + public = 0, _("Public"), True # Visible to both the owner and her organization. - organization = ChoiceItem(1, _("Organization"), api=True) + organization = 1, _("Organization"), True # The document is only visible to its owner. - private = ChoiceItem(2, _("Private"), api=True) + private = 2, _("Private"), True # The document has been taken down (perhaps temporary). - invisible = ChoiceItem(3, _("Invisible"), api=False) + invisible = 3, _("Invisible"), False -class Status(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API - # The document is in a normal succesful state - success = ChoiceItem(0, _("Success"), api=True) +class Status(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj + + # pylint: disable = invalid-name + # The document is in a normal successful state + success = 0, _("Success"), True # The document is processing, but readable during the operation - readable = ChoiceItem(1, _("Readable"), api=True) + readable = 1, _("Readable"), True # The document is processing, and not currently readable - pending = ChoiceItem(2, _("Pending"), api=True) + pending = 2, _("Pending"), True # There was an error processing the document - error = ChoiceItem(3, _("Error"), api=True) + error = 3, _("Error"), True # There is no file yet - nofile = ChoiceItem(4, _("No file"), api=True) + nofile = 4, _("No file"), True # The file is deleted - deleted = ChoiceItem(5, _("Deleted"), api=False) - - -class EntityKind(DjangoChoices): - unknown = ChoiceItem(0, _("Unknown"), api=True) - person = ChoiceItem(1, _("Person"), api=True) - location = ChoiceItem(2, _("Location"), api=True) - organization = ChoiceItem(3, _("Organization"), api=True) - event = ChoiceItem(4, _("Event"), api=True) - work_of_art = ChoiceItem(5, _("Work_Of_Art"), api=True) - consumer_good = ChoiceItem(6, _("Consumer_Good"), api=True) - other = ChoiceItem(7, _("Other"), api=True) - phone_number = ChoiceItem(9, _("Phone_Number"), api=True) - address = ChoiceItem(10, _("Address"), api=True) - date = ChoiceItem(11, _("Date"), api=True) - number = ChoiceItem(12, _("Number"), api=True) - price = ChoiceItem(13, _("Price"), api=True) - - -class OccurrenceKind(DjangoChoices): - unknown = ChoiceItem(0, _("Unknown"), api=True) - proper = ChoiceItem(1, _("Proper"), api=True) - common = ChoiceItem(2, _("Common"), api=True) + deleted = 5, _("Deleted"), False + + +class EntityKind(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj + + # pylint: disable = invalid-name + unknown = 0, _("Unknown"), True + person = 1, _("Person"), True + location = 2, _("Location"), True + organization = 3, _("Organization"), True + event = 4, _("Event"), True + work_of_art = 5, _("Work_Of_Art"), True + consumer_good = 6, _("Consumer_Good"), True + other = 7, _("Other"), True + phone_number = 9, _("Phone_Number"), True + address = 10, _("Address"), True + date = 11, _("Date"), True + number = 12, _("Number"), True + price = 13, _("Price"), True + + +class OccurrenceKind(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj + + # pylint: disable = invalid-name + unknown = 0, _("Unknown"), True + proper = 1, _("Proper"), True + common = 2, _("Common"), True diff --git a/documentcloud/documents/fields.py b/documentcloud/documents/fields.py index da3145ae..716a4e08 100644 --- a/documentcloud/documents/fields.py +++ b/documentcloud/documents/fields.py @@ -6,11 +6,7 @@ class ChoiceField(serializers.ChoiceField): """Choice field enhanced to use the choices label and ability to omit choices""" def __init__(self, choices, **kwargs): - choices = [ - (choice.value, label) - for label, choice in choices._fields.items() - if choice.api - ] + choices = [(member.value, member.name) for member in choices if member.api] self.choice_map = {label: value for value, label in choices} super().__init__(choices, **kwargs) @@ -23,7 +19,6 @@ def to_representation(self, value): def to_internal_value(self, data): if data == "" and self.allow_blank: return "" - try: return self.choice_map[str(data)] except KeyError: diff --git a/documentcloud/documents/models/document.py b/documentcloud/documents/models/document.py index a61eccd4..50563a45 100644 --- a/documentcloud/documents/models/document.py +++ b/documentcloud/documents/models/document.py @@ -666,8 +666,8 @@ def page_filter(text): "type": "document", "user": self.user_id, "organization": self.organization_id, - "access": Access.attributes[self.access], - "status": Status.attributes[self.status], + "access": Access(self.access).name, + "status": Status(self.status).name, "title": self.title, "slug": self.slug, "source": self.source, @@ -744,6 +744,7 @@ def invalidate_cache(self): "X-Auth-Email": cloudflare_email, "X-Auth-Key": cloudflare_key, }, + timeout=10, ) def index_on_commit(self, **kwargs): diff --git a/documentcloud/documents/models/note.py b/documentcloud/documents/models/note.py index c05e3d8c..a5d925d4 100644 --- a/documentcloud/documents/models/note.py +++ b/documentcloud/documents/models/note.py @@ -165,7 +165,7 @@ def solr(self): "type": "note", "user": self.user_id, "organization": self.organization_id, - "access": Access.attributes[self.access], + "access": Access(self.access).name, "page_count": self.page_number, "title": self.title, "description": self.content, diff --git a/documentcloud/documents/models/saved_search.py b/documentcloud/documents/models/saved_search.py index b5850230..f071a2e2 100644 --- a/documentcloud/documents/models/saved_search.py +++ b/documentcloud/documents/models/saved_search.py @@ -3,6 +3,7 @@ from django.db import models from django.utils.translation import gettext_lazy as _ +# Standard Library from uuid import uuid4 # DocumentCloud diff --git a/documentcloud/documents/modifications.py b/documentcloud/documents/modifications.py index 6fa89acb..1682dc58 100644 --- a/documentcloud/documents/modifications.py +++ b/documentcloud/documents/modifications.py @@ -2,6 +2,7 @@ # Django from django.db import transaction +# Standard Library from collections import defaultdict from copy import copy diff --git a/documentcloud/documents/processing/document_conversion/requirements.txt b/documentcloud/documents/processing/document_conversion/requirements.txt index 8a76067d..37009855 100644 --- a/documentcloud/documents/processing/document_conversion/requirements.txt +++ b/documentcloud/documents/processing/document_conversion/requirements.txt @@ -1,8 +1,8 @@ -r cloud-requirements.txt -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 -sentry-sdk==0.14.0 +requests==2.33.0 +sentry-sdk==2.57.0 diff --git a/documentcloud/documents/processing/info_and_image/main.py b/documentcloud/documents/processing/info_and_image/main.py index 090e44bc..6290b426 100755 --- a/documentcloud/documents/processing/info_and_image/main.py +++ b/documentcloud/documents/processing/info_and_image/main.py @@ -31,12 +31,11 @@ # remove this when done with import code # pylint: disable=too-many-lines -# pylint: disable=import-error +# pylint: disable=import-error, too-many-positional-arguments # Imports based on execution context if env.str("ENVIRONMENT").startswith("local"): # DocumentCloud - from documentcloud.documents.processing.info_and_image import graft from documentcloud.common import access_choices, path, redis_fields from documentcloud.common.environment import ( encode_pubsub_data, @@ -45,11 +44,12 @@ storage, ) from documentcloud.common.serverless import utils - from documentcloud.common.serverless.utils import REDIS_TTL from documentcloud.common.serverless.error_handling import ( pubsub_function, pubsub_function_import, ) + from documentcloud.common.serverless.utils import REDIS_TTL + from documentcloud.documents.processing.info_and_image import graft from documentcloud.documents.processing.info_and_image.graft_adapter import ( GraftContext, ) @@ -58,9 +58,9 @@ Workspace, ) else: + # fmt: off # Third Party import graft - # only initialize sentry on serverless import sentry_sdk from common import access_choices, path, redis_fields @@ -71,8 +71,8 @@ storage, ) from common.serverless import utils - from common.serverless.utils import REDIS_TTL from common.serverless.error_handling import pubsub_function, pubsub_function_import + from common.serverless.utils import REDIS_TTL from graft_adapter import GraftContext from pdfium import StorageHandler, Workspace from sentry_sdk.integrations.aws_lambda import AwsLambdaIntegration @@ -328,7 +328,6 @@ def redact_document_and_overwrite(doc_id, slug, access, redactions): def get_redis_pagespec(doc_id): """Get the dimensions of all pages in a convenient format using Redis""" - # pylint: disable=too-many-nested-blocks dimensions_field = redis_fields.dimensions(doc_id) pipeline = REDIS.pipeline() @@ -769,7 +768,7 @@ def extract_single_page(doc_id, slug, access, page, page_number, large_image_pat image_width, max(round(img_buffer.height * (image_width / img_buffer.width)), 1), ), - Image.ANTIALIAS, + Image.LANCZOS, ) mem_file = io.BytesIO() diff --git a/documentcloud/documents/processing/info_and_image/pdfium.py b/documentcloud/documents/processing/info_and_image/pdfium.py index 7a5df3fe..12f81d85 100755 --- a/documentcloud/documents/processing/info_and_image/pdfium.py +++ b/documentcloud/documents/processing/info_and_image/pdfium.py @@ -33,6 +33,7 @@ INT_MAX = 2147483647 # Adapted from https://github.com/gersonkurz/pydfium +# pylint: disable=too-many-positional-arguments class FPDFLibraryConfig(Structure): @@ -129,7 +130,7 @@ def get_image(self): img = PIL.Image.frombuffer( "RGBA", (self.width, self.height), bitmap.contents, "raw", "RGBA", 0, 1 ) - # pylint: disable=invalid-name, unbalanced-tuple-unpacking + # pylint: disable=unbalanced-tuple-unpacking b, g, r, _a = img.split() img = PIL.Image.merge("RGB", (r, g, b)) return img @@ -374,7 +375,7 @@ def get_media_box(self): def set_desired_transform(self, page_object, x, y, width, height): # Get the bounds of the text object - (left, bottom, right, top) = self.get_bounds(page_object) + left, bottom, right, top = self.get_bounds(page_object) # Transform to origin self.workspace.fpdf_page_obj_transform( diff --git a/documentcloud/documents/processing/info_and_image/requirements.txt b/documentcloud/documents/processing/info_and_image/requirements.txt index 571e6581..c544cb93 100755 --- a/documentcloud/documents/processing/info_and_image/requirements.txt +++ b/documentcloud/documents/processing/info_and_image/requirements.txt @@ -1,12 +1,12 @@ -r cloud-requirements.txt -Pillow==9.2.0 -aioboto3==9.6.0 -django-environ==0.4.5 +Pillow==12.1.1 +aioboto3==15.5.0 +django-environ==0.13.0 furl==2.1.0 -listcrunch==0.1.0 +listcrunch==1.0.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 -sentry-sdk==0.14.0 +requests==2.33.0 +sentry-sdk==2.57.0 pymupdf==1.25.3 diff --git a/documentcloud/documents/processing/ocr/main.py b/documentcloud/documents/processing/ocr/main.py index 2d17faf9..db391b7a 100755 --- a/documentcloud/documents/processing/ocr/main.py +++ b/documentcloud/documents/processing/ocr/main.py @@ -18,7 +18,7 @@ logger = logging.getLogger() logger.setLevel(logging.INFO) -# pylint: disable=import-error +# pylint: disable=import-error, too-many-positional-arguments # Imports based on execution context if env.str("ENVIRONMENT").startswith("local"): @@ -30,9 +30,9 @@ publisher, storage, ) - from documentcloud.common.utils import graft_page from documentcloud.common.serverless import utils from documentcloud.common.serverless.error_handling import pubsub_function + from documentcloud.common.utils import graft_page from documentcloud.documents.processing.ocr.tess import Tesseract else: # Third Party @@ -45,12 +45,11 @@ publisher, storage, ) - from common.utils import graft_page from common.serverless import utils from common.serverless.error_handling import pubsub_function + from common.utils import graft_page from sentry_sdk.integrations.aws_lambda import AwsLambdaIntegration from sentry_sdk.integrations.redis import RedisIntegration - from tess import Tesseract sentry_sdk.init( diff --git a/documentcloud/documents/processing/ocr/requirements.txt b/documentcloud/documents/processing/ocr/requirements.txt index ab5f786c..dc66da21 100755 --- a/documentcloud/documents/processing/ocr/requirements.txt +++ b/documentcloud/documents/processing/ocr/requirements.txt @@ -1,12 +1,11 @@ -boto3==1.26.115 -smart-open==1.8.4 - -Pillow==9.2.0 +boto3==1.40.61 +smart-open==7.5.1 +Pillow==12.1.1 cpuprofile==1.0.1 -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 -sentry-sdk==0.14.0 +requests==2.33.0 +sentry-sdk==2.57.0 pymupdf==1.25.3 diff --git a/documentcloud/documents/processing/ocr/tess.py b/documentcloud/documents/processing/ocr/tess.py index c47b48f8..a2375afd 100755 --- a/documentcloud/documents/processing/ocr/tess.py +++ b/documentcloud/documents/processing/ocr/tess.py @@ -17,6 +17,7 @@ DATA_PATH = TMP_DIRECTORY +# pylint:disable = too-many-positional-arguments class TesseractError(Exception): pass diff --git a/documentcloud/documents/processing/sidekick/__init__.py b/documentcloud/documents/processing/sidekick/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/documents/processing/sidekick/common b/documentcloud/documents/processing/sidekick/common deleted file mode 120000 index f74dff0e..00000000 --- a/documentcloud/documents/processing/sidekick/common +++ /dev/null @@ -1 +0,0 @@ -../../../common \ No newline at end of file diff --git a/documentcloud/documents/processing/sidekick/main.py b/documentcloud/documents/processing/sidekick/main.py deleted file mode 100644 index 133561ae..00000000 --- a/documentcloud/documents/processing/sidekick/main.py +++ /dev/null @@ -1,184 +0,0 @@ -# Standard Library -import logging -import os -import re -from collections import Counter -from urllib.parse import urljoin - -# Third Party -import environ -import numpy as np -import requests -import sklearn.decomposition -from sklearn.feature_extraction.text import TfidfVectorizer - -env = environ.Env() -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -# pylint: disable=import-error - -# Imports based on execution context -if env.str("ENVIRONMENT").startswith("local"): - # DocumentCloud - from documentcloud.common import path - from documentcloud.common.environment import get_pubsub_data, publisher, storage - from documentcloud.common.serverless import utils - from documentcloud.common.serverless.error_handling import pubsub_function -else: - # Third Party - # only initialize sentry on serverless - import sentry_sdk - from common import path - from common.environment import get_pubsub_data, publisher, storage - from common.serverless import utils - from common.serverless.error_handling import pubsub_function - from sentry_sdk.integrations.aws_lambda import AwsLambdaIntegration - from sentry_sdk.integrations.redis import RedisIntegration - - sentry_sdk.init( - dsn=env("SENTRY_DSN"), integrations=[AwsLambdaIntegration(), RedisIntegration()] - ) - -REDIS = utils.get_redis() -API_CALLBACK = env.str("API_CALLBACK") -PROCESSING_TOKEN = env.str("PROCESSING_TOKEN") -VOCAB_SIZE = env.int("VOCAB_SIZE", default=30000) -TOKEN_PATTERN = re.compile(r"(?u)\b\w\w+\b") -EMBEDDING_DIR = env.str("EMBEDDING_DIR", default="embedding") - -SIDEKICK_PREPROCESS_TOPIC = publisher.topic_path( - "documentcloud", - env.str("SIDEKICK_PREPROCESS_TOPIC", default="sidekick-preprocess-topic"), -) - -LANGUAGES = {"eng": "en"} - - -def send_sidekick_update(project_id, json): - """Send an update to the API server for sidekick""" - utils.request(REDIS, "patch", f"projects/{project_id}/sidekick/", json) - - -def load_documents(project_id): - """Load the documents - - Fetch their IDs, slugs and languages via the API - Use the ID and slug to fetch the text from S3 - Return the most common language among the documents as the language to use - for the word embeddings - """ - - logger.info( - "[SIDEKICK PREPROCESS] project_id: %s - fetching project documents", project_id - ) - file_names = [] - languages = Counter() - doc_ids = [] - next_ = urljoin(API_CALLBACK, f"projects/{project_id}/documents/?expand=document") - - while next_: - response = requests.get( - next_, headers={"Authorization": f"processing-token {PROCESSING_TOKEN}"} - ) - response.raise_for_status() - response_json = response.json() - next_ = response_json["next"] - for result in response_json["results"]: - file_names.append( - path.text_path(result["document"]["id"], result["document"]["slug"]) - ) - languages.update([result["document"]["language"]]) - doc_ids.append(result["document"]["id"]) - - language = languages.most_common()[0][0] - - # download the files in parallel - texts = storage.async_download(file_names) - - return texts, doc_ids, language - - -def process_text(project_id, texts): - """Calculate the vocabulary for the corpus based on the document texts""" - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - calculating vocab", project_id) - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - tfidf", project_id) - - # Derive tf-idf data on corpus - vectorizer = TfidfVectorizer( - strip_accents="unicode", stop_words=None, max_features=VOCAB_SIZE - ) - - tfidf = vectorizer.fit_transform(texts) - features = vectorizer.get_feature_names() - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - svd", project_id) - - # Project tf-idf data down in dimensionality - svd_transformer = sklearn.decomposition.TruncatedSVD( - 300, algorithm="randomized", n_iter=5 - ) - doc_svd = svd_transformer.fit_transform(tfidf) - - return tfidf, features, doc_svd - - -def doc_embedding(project_id, language, tfidf, features, doc_svd): - """Calculate the doc embeddings""" - # Third Party - import fasttext - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - doc embeddings", project_id) - - # Load the embedding model - # error if language not present - language = LANGUAGES.get(language, language) - model = fasttext.load_model(os.path.join(EMBEDDING_DIR, f"cc.{language}.300.bin")) - embedding_vectors = np.array( - [model.get_word_vector(feature) for feature in features] - ) - - # scale embedding vectors based on frequency of the words - doc_embeddings = np.dot(tfidf.A, embedding_vectors) - - # Doc vectors are just doc_svd and doc_embeddings concatenated - doc_vectors = np.hstack((doc_svd, doc_embeddings)) - - # Serialize doc vectors to file - with storage.open( - path.sidekick_document_vectors_path(project_id), "wb" - ) as vectors_file: - np.savez_compressed(vectors_file, doc_vectors) - - -def doc_embedding_(project_id, _language, _tfidf, _features, doc_svd, doc_ids): - """Simpler doc embeddings - skip word vectors and just use the doc svd""" - - logger.info("[SIDEKICK PREPROCESS] project_id: %s - doc embeddings", project_id) - - # Serialize doc vectors to file - with storage.open( - path.sidekick_document_vectors_path(project_id), "wb" - ) as vectors_file: - np.savez_compressed(vectors_file, vectors=doc_svd, ids=doc_ids) - - -@pubsub_function(REDIS, SIDEKICK_PREPROCESS_TOPIC) -def preprocess(data, _context=None): - """Preprocess the documents in a project for sidekick""" - - data = get_pubsub_data(data) - project_id = data["project_id"] - - logger.info("[SIDEKICK PREPROCESS] project_id: %s", project_id) - - try: - texts, doc_ids, language = load_documents(project_id) - tfidf, features, doc_svd = process_text(project_id, texts) - doc_embedding_(project_id, language, tfidf, features, doc_svd, doc_ids) - except Exception: # pylint: disable=broad-except - send_sidekick_update(project_id, {"status": "error"}) - else: - send_sidekick_update(project_id, {"status": "success"}) diff --git a/documentcloud/documents/processing/sidekick/requirements.txt b/documentcloud/documents/processing/sidekick/requirements.txt deleted file mode 100644 index 68379dfc..00000000 --- a/documentcloud/documents/processing/sidekick/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ --r cloud-requirements.txt - -aioboto3==6.5.0 -django-environ==0.4.5 -furl==2.1.0 -pebble==4.5.0 -redis==3.4.1 -requests==2.22.0 -sentry-sdk==0.14.0 diff --git a/documentcloud/documents/processing/tests/imagediff.py b/documentcloud/documents/processing/tests/imagediff.py index d065398d..a6833b62 100755 --- a/documentcloud/documents/processing/tests/imagediff.py +++ b/documentcloud/documents/processing/tests/imagediff.py @@ -19,6 +19,7 @@ from .report_generator import ReportGenerator +# pylint: disable=too-many-positional-arguments def same_images( test_image: str, expected_image: str, diff --git a/documentcloud/documents/processing/tests/pipeline_tests/mocks.py b/documentcloud/documents/processing/tests/pipeline_tests/mocks.py index 05089cb9..ef933c6d 100644 --- a/documentcloud/documents/processing/tests/pipeline_tests/mocks.py +++ b/documentcloud/documents/processing/tests/pipeline_tests/mocks.py @@ -8,10 +8,8 @@ from contextlib import ExitStack from unittest.mock import patch -# Third Party -from config import celery_app - # DocumentCloud +from config import celery_app from documentcloud.common import path from documentcloud.common.serverless.utils import get_redis, initialize from documentcloud.documents.processing.tests.pipeline_tests.fake_pdf import FakePage @@ -48,7 +46,7 @@ def patch_env(env): # Mock methods -# pylint: disable=unused-argument +# pylint: disable=unused-argument, too-many-positional-arguments def page_loaded(page): pass diff --git a/documentcloud/documents/processing/tests/report_generator.py b/documentcloud/documents/processing/tests/report_generator.py index b60f25f9..4d3f4de5 100755 --- a/documentcloud/documents/processing/tests/report_generator.py +++ b/documentcloud/documents/processing/tests/report_generator.py @@ -35,9 +35,7 @@ def __init__(self, filename): self.anchor_id: int = 0 # Write the header and style sheet. Declare utf8 encoding. - self.html_file.write( - inspect.cleandoc( - """ + self.html_file.write(inspect.cleandoc(""" @@ -50,9 +48,7 @@ def __init__(self, filename): } - """ - ) - ) + """)) def add_heading(self, text): """Add a primary header to the HTML document with a table of contents entry. diff --git a/documentcloud/documents/processing/utils/requirements.txt b/documentcloud/documents/processing/utils/requirements.txt index 8a76067d..37009855 100644 --- a/documentcloud/documents/processing/utils/requirements.txt +++ b/documentcloud/documents/processing/utils/requirements.txt @@ -1,8 +1,8 @@ -r cloud-requirements.txt -django-environ==0.4.5 +django-environ==0.13.0 furl==2.1.0 pebble==4.5.0 redis==3.4.1 -requests==2.22.0 -sentry-sdk==0.14.0 +requests==2.33.0 +sentry-sdk==2.57.0 diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py index facc8e98..a7c28cf6 100644 --- a/documentcloud/documents/search.py +++ b/documentcloud/documents/search.py @@ -132,6 +132,7 @@ def search(user, query_params): "hl.weightMatches": settings.SOLR_HL_WEIGHT_MATCHES, **page_query_data, } + original_text_query = text_query if ( settings.SOLR_QUERY_NOTES and user.is_authenticated @@ -140,7 +141,6 @@ def search(user, query_params): ): # turn note queries on for all pro users # *:* returns all documents, do not enable note queries - original_text_query = text_query text_query = _add_note_query(text_query, user) kwargs["uf"] = "* _query_ -projects_edit_access" query_notes = True diff --git a/documentcloud/documents/serializers.py b/documentcloud/documents/serializers.py index bd936e6f..1b977647 100644 --- a/documentcloud/documents/serializers.py +++ b/documentcloud/documents/serializers.py @@ -850,7 +850,9 @@ class EntityOccurrenceSerializer(serializers.ModelSerializer): def get_occurrences(self, obj): def fix(entity): value = entity.pop("kind", 0) - entity["kind"] = OccurrenceKind.attributes.get(value, value) + entity["kind"] = ( + OccurrenceKind(value).name if value in OccurrenceKind.values else value + ) return entity return [fix(e) for e in obj.occurrences] diff --git a/documentcloud/documents/solr.py b/documentcloud/documents/solr.py index 2adb8936..58fdab09 100644 --- a/documentcloud/documents/solr.py +++ b/documentcloud/documents/solr.py @@ -33,10 +33,10 @@ # Third Party import pysolr import requests -from config import celery_app from dateutil.parser import parse # DocumentCloud +from config import celery_app from documentcloud.common import path from documentcloud.common.environment import storage from documentcloud.core.utils import grouper diff --git a/documentcloud/documents/tasks.py b/documentcloud/documents/tasks.py index dbf74e04..d3ac75de 100644 --- a/documentcloud/documents/tasks.py +++ b/documentcloud/documents/tasks.py @@ -135,7 +135,7 @@ def process(document_pk, user_pk, org_pk, force_ocr, ocr_engine): "slug": document.slug, "extension": document.original_extension, "access": document.access, - "ocr_code": Language.get_choice(document.language).ocr_code, + "ocr_code": Language(document.language).ocr_code, "method": "process_pdf", "user_id": user_pk, "org_id": org_pk, @@ -163,7 +163,7 @@ def redact(document_pk, user_pk, redactions): "doc_id": document_pk, "slug": document.slug, "access": document.access, - "ocr_code": Language.get_choice(document.language).ocr_code, + "ocr_code": Language(document.language).ocr_code, "redactions": redactions, }, redact, diff --git a/documentcloud/documents/tests/test_modifications.py b/documentcloud/documents/tests/test_modifications.py index 9f284cb3..7bd32125 100644 --- a/documentcloud/documents/tests/test_modifications.py +++ b/documentcloud/documents/tests/test_modifications.py @@ -29,7 +29,7 @@ def send_post_process(document, modifications): class TestPostProcess: """Test the page modification post processing""" - # pylint: disable=too-many-arguments + # pylint: disable=too-many-arguments, too-many-positional-arguments @pytest.mark.parametrize("factory,attr", models) @pytest.mark.parametrize( "page_spec,initial_page,final_page,count,page_count", tests diff --git a/documentcloud/documents/tests/test_saved_searches.py b/documentcloud/documents/tests/test_saved_searches.py index 5e7100c3..74889c1a 100644 --- a/documentcloud/documents/tests/test_saved_searches.py +++ b/documentcloud/documents/tests/test_saved_searches.py @@ -2,6 +2,7 @@ # Django from rest_framework import status +# Third Party import pytest # DocumentCloud diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py index 376b6446..166bbadf 100644 --- a/documentcloud/documents/tests/test_search.py +++ b/documentcloud/documents/tests/test_search.py @@ -44,7 +44,7 @@ from documentcloud.users.models import User from documentcloud.users.tests.factories import UserFactory -# pylint: disable=too-many-public-methods +# pylint: disable=too-many-public-methods, too-many-positional-arguments @pytest.fixture(scope="class") @@ -106,6 +106,7 @@ def setup_solr(django_db_setup, django_db_blocker): for note in notes.values(): solr_notes.add([note.solr()]) solr.commit() + solr_notes.commit() yield finally: Document.objects.all().delete() diff --git a/documentcloud/entities/choices.py b/documentcloud/entities/choices.py index bbbf16ed..25d51808 100644 --- a/documentcloud/entities/choices.py +++ b/documentcloud/entities/choices.py @@ -1,13 +1,19 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class EntityAccess(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class EntityAccess(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API + # pylint:disable=invalid-name # Free and public to all. - public = ChoiceItem(0, _("Public"), api=True) + public = 0, _("Public"), True # Visible to both the owner and her organization. - private = ChoiceItem(2, _("Private"), api=True) + private = 2, _("Private"), True diff --git a/documentcloud/entities/tests/factories.py b/documentcloud/entities/tests/factories.py index b3c1c2de..febdbde2 100644 --- a/documentcloud/entities/tests/factories.py +++ b/documentcloud/entities/tests/factories.py @@ -1,5 +1,6 @@ # Standard Library +# Standard Library import datetime # Third Party diff --git a/documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py b/documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py new file mode 100644 index 00000000..be1679e7 --- /dev/null +++ b/documentcloud/organizations/migrations/0020_remove_organization_members_and_more.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.2 on 2026-01-21 20:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('organizations', '0019_organization_members_organization_parent_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='organization', + name='members', + ), + migrations.RemoveField( + model_name='organization', + name='parent', + ), + migrations.RemoveField( + model_name='organization', + name='share_resources', + ), + ] diff --git a/documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py b/documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py new file mode 100644 index 00000000..e26b4cfd --- /dev/null +++ b/documentcloud/organizations/migrations/0021_organization_members_organization_parent_and_more.py @@ -0,0 +1,47 @@ +# Generated by Django 4.2.2 on 2026-01-21 21:29 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("organizations", "0020_remove_organization_members_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="organization", + name="members", + field=models.ManyToManyField( + blank=True, + help_text="Organizations which are members of this organization (useful for trade associations or other member groups)", + related_name="groups", + to=settings.SQUARELET_ORGANIZATION_MODEL, + ), + ), + migrations.AddField( + model_name="organization", + name="parent", + field=models.ForeignKey( + blank=True, + help_text="The parent organization", + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name="children", + to=settings.SQUARELET_ORGANIZATION_MODEL, + verbose_name="parent", + ), + ), + migrations.AddField( + model_name="organization", + name="share_resources", + field=models.BooleanField( + default=True, + help_text="Share resources (subscriptions, credits) with all children and member organizations. Global toggle that applies to all relationships.", + verbose_name="share resources", + ), + ), + ] diff --git a/documentcloud/projects/choices.py b/documentcloud/projects/choices.py index 992f11b2..d6b4f423 100644 --- a/documentcloud/projects/choices.py +++ b/documentcloud/projects/choices.py @@ -1,15 +1,21 @@ # Django +from django.db import models from django.utils.translation import gettext_lazy as _ -# Third Party -from djchoices import ChoiceItem, DjangoChoices +class CollaboratorAccess(models.IntegerChoices): + def __new__(cls, value, label=None, api=False): + obj = int.__new__(cls, value) + obj._value_ = value + if label is not None: + obj._label_ = label + obj.api = api + return obj -class CollaboratorAccess(DjangoChoices): - # `api` specifies if this attribute should be accessible via the API + # pylint:disable = invalid-name # This collaborator has read access - view = ChoiceItem(0, _("View"), api=True) + view = 0, _("View"), True # This collaborator can edit the documents in the project - edit = ChoiceItem(1, _("Edit"), api=True) - # This collaborator can edit the documents and the project itself - admin = ChoiceItem(2, _("Admin"), api=True) + edit = 1, _("Edit"), True + # This collaborator can edit the documents and the project itself + admin = 2, _("Admin"), True diff --git a/documentcloud/sidekick/__init__.py b/documentcloud/sidekick/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/apps.py b/documentcloud/sidekick/apps.py deleted file mode 100644 index 1a707e73..00000000 --- a/documentcloud/sidekick/apps.py +++ /dev/null @@ -1,12 +0,0 @@ -# Django -from django.apps import AppConfig - - -class SidekickConfig(AppConfig): - name = "documentcloud.sidekick" - - def ready(self): - # pylint: disable=unused-import - # load signals - # DocumentCloud - import documentcloud.sidekick.signals diff --git a/documentcloud/sidekick/choices.py b/documentcloud/sidekick/choices.py deleted file mode 100644 index 80e6c0b3..00000000 --- a/documentcloud/sidekick/choices.py +++ /dev/null @@ -1,11 +0,0 @@ -# Django -from django.utils.translation import gettext_lazy as _ - -# Third Party -from djchoices import ChoiceItem, DjangoChoices - - -class Status(DjangoChoices): - success = ChoiceItem(0, _("Success"), api=True) - pending = ChoiceItem(1, _("Pending"), api=True) - error = ChoiceItem(2, _("Error"), api=True) diff --git a/documentcloud/sidekick/lego.py b/documentcloud/sidekick/lego.py deleted file mode 100644 index 7dc52604..00000000 --- a/documentcloud/sidekick/lego.py +++ /dev/null @@ -1,139 +0,0 @@ -"""Implements metric lego learning""" - -# Adapted from https://bitbucket.org/muckdoc/muckdoc/ -# Look into faster version, e.g. from -# https://github.com/fcaldas/MetricLearning/blob/master/lego_functions.py - -# Third Party -import numba -import numpy as np -from scipy import stats - -# Use mathy names in this file -# pylint: disable=invalid-name - - -def lego_learn(doc_vectors, constraints, positive_docs): - updated_doc_vectors, mean_vec = get_mean_vec( - doc_vectors, constraints, positive_docs - ) - doc_dists = fast_cosine_matrix(mean_vec, updated_doc_vectors) - doc_percentiles = stats.rankdata(doc_dists, "average") / len(doc_dists) - return doc_dists, doc_percentiles - - -@numba.njit -def update(X_i, X_j, y, A, u=7, l=10, gamma=0.08): - # pylint: disable=too-many-arguments - diff = X_i - X_j - d = np.dot(diff, np.dot(A, diff)) - if (d > u and y == 1) or (d < l and y == -1): - target = u * (y == 1) + l * (y == -1) - _y = ( - (gamma * d * target - 1) - + np.sqrt((gamma * d * target - 1) ** 2 + 4 * gamma * d * d) - ) / (2 * gamma * d) - return A - ( - (gamma * (_y - target)) / (1 + gamma * (_y - target) * d) - ) * np.outer(np.dot(A, diff), np.dot(A, diff)) - else: - return A - - -@numba.njit(parallel=True) -def fast_cosine_matrix(u, M): - # From https://stackoverflow.com/a/47316253 - scores = np.zeros(M.shape[0]) - for i in numba.prange(M.shape[0]): # pylint: disable=not-an-iterable - v = M[i] - m = u.shape[0] - udotv = 0 - u_norm = 0 - v_norm = 0 - for j in range(m): - if (np.isnan(u[j])) or (np.isnan(v[j])): - continue - - udotv += u[j] * v[j] - u_norm += u[j] * u[j] - v_norm += v[j] * v[j] - - u_norm = np.sqrt(u_norm) - v_norm = np.sqrt(v_norm) - - if (u_norm == 0) or (v_norm == 0): - ratio = 1.0 - else: - ratio = udotv / (u_norm * v_norm) - scores[i] = ratio - return scores - - -@numba.njit -def get_mean_vec_(A_updated, doc_vectors, positive_doc_vectors): - L = np.linalg.cholesky(A_updated) - # mean with axis is not supported in numba, so accomplish with sum - mean_vec = np.sum(np.dot(positive_doc_vectors, L), 0) / L.shape[0] - - # Mean vector ordered list - updated_doc_vectors = np.dot(doc_vectors, L) - return updated_doc_vectors, mean_vec - - -def get_mean_vec(doc_vectors, constraints, positive_docs): - - if len(constraints) == 0: - # No constraints, go purely off positive docs - positive_doc_vectors = doc_vectors[positive_docs] - mean_vec = np.mean(positive_doc_vectors, axis=0) - return doc_vectors, mean_vec - else: - A_updated = batch_update(doc_vectors, constraints) - return get_mean_vec_(A_updated, doc_vectors, doc_vectors[positive_docs]) - - -def lego(u, v, y, r=0.5, A_prev=None): - - m = len(u) # number of features - # make into colume vectors [m,1] - u = u[:, np.newaxis] - v = v[:, np.newaxis] - if A_prev is None: - A_prev = np.identity(m) - - # find the current distance (mahalanobis) between u and v - z = u - v - y_current = float(np.dot(z.T, np.dot(A_prev, z))) # y_hat in paper - - # find y_bar, which is an approximation of distance using the new metric - y_bar_up = ( - r * y * y_current - - 1 - + np.sqrt((r * y * y_current - 1) ** 2 + 4 * r * y_current**2) - ) - y_bar_down = 2 * r * y_current - y_bar = y_bar_up / y_bar_down - y_bar = float(np.nan_to_num(y_bar)) - - # calculate the new metric matrix A_new using y_bar - A_new_up = r * (y_bar - y) * np.dot(A_prev, np.dot(np.dot(z, z.T), A_prev)) - A_new_down = 1 + r * (y_bar - y) * y_current - A_new = A_prev - A_new_up / A_new_down - - return A_new - - -# iterates through the constraints and updates the A matrix -def batch_update(doc_vectors, constraints): - A_ = np.identity(doc_vectors.shape[1]) - - for doc_u, doc_v, same_class in constraints: - u_t = doc_vectors[doc_u] - v_t = doc_vectors[doc_v] - if same_class == 1: - y_t = 1 - else: - y_t = -1 - A_ = update(u_t, v_t, y_t, A_) - - return A_ diff --git a/documentcloud/sidekick/local_tasks.py b/documentcloud/sidekick/local_tasks.py deleted file mode 100644 index 3659d6cc..00000000 --- a/documentcloud/sidekick/local_tasks.py +++ /dev/null @@ -1,10 +0,0 @@ -# Django -from celery import shared_task - -# DocumentCloud -from documentcloud.documents.processing.sidekick.main import preprocess - - -@shared_task -def sidekick_preprocess(data): - preprocess(data) diff --git a/documentcloud/sidekick/migrations/0001_initial.py b/documentcloud/sidekick/migrations/0001_initial.py deleted file mode 100644 index 8abdd5f8..00000000 --- a/documentcloud/sidekick/migrations/0001_initial.py +++ /dev/null @@ -1,25 +0,0 @@ -# Generated by Django 2.2.5 on 2021-07-13 20:22 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - ('projects', '0012_auto_20210407_1801'), - ] - - operations = [ - migrations.CreateModel( - name='Sidekick', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('status', models.IntegerField(choices=[(0, 'Uninitialized'), (1, 'Processing'), (2, 'Initialized'), (3, 'Error')], default=0, help_text='The status of this sidekick', verbose_name='status')), - ('tag_name', models.CharField(help_text='The name of the tag to use to associate a document as a positive or negative match for this sidekick', max_length=50, verbose_name='tag name')), - ('project', models.OneToOneField(help_text='The project this sidekick is for', on_delete=django.db.models.deletion.CASCADE, related_name='sidekick', to='projects.Project', verbose_name='project')), - ], - ), - ] diff --git a/documentcloud/sidekick/migrations/0002_auto_20210723_2029.py b/documentcloud/sidekick/migrations/0002_auto_20210723_2029.py deleted file mode 100644 index 8b53850b..00000000 --- a/documentcloud/sidekick/migrations/0002_auto_20210723_2029.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 2.2.5 on 2021-07-23 20:29 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('sidekick', '0001_initial'), - ] - - operations = [ - migrations.RemoveField( - model_name='sidekick', - name='tag_name', - ), - migrations.AlterField( - model_name='sidekick', - name='status', - field=models.IntegerField(choices=[(0, 'Success'), (1, 'Pending'), (2, 'Error')], default=1, help_text='The status of this sidekick', verbose_name='status'), - ), - ] diff --git a/documentcloud/sidekick/migrations/__init__.py b/documentcloud/sidekick/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/models.py b/documentcloud/sidekick/models.py deleted file mode 100644 index 8822ba06..00000000 --- a/documentcloud/sidekick/models.py +++ /dev/null @@ -1,45 +0,0 @@ -# Django -from django.db import models -from django.utils.translation import gettext_lazy as _ - -# Third Party -import numpy as np - -# DocumentCloud -from documentcloud.common import path -from documentcloud.common.environment import storage -from documentcloud.sidekick.choices import Status - -VOCAB_SIZE = 30_000 - - -def file_path(instance, file_name): - return f"sidekick/{instance.pk}/{file_name}" - - -class Sidekick(models.Model): - """Online learning for documents in a project""" - - project = models.OneToOneField( - verbose_name=_("project"), - to="projects.Project", - on_delete=models.CASCADE, - related_name="sidekick", - help_text=_("The project this sidekick is for"), - ) - status = models.IntegerField( - _("status"), - choices=Status.choices, - default=Status.pending, - help_text=_("The status of this sidekick"), - ) - - def get_document_vectors(self): - """Fetch the pre-preocessed document vectors from storage""" - with storage.open( - path.sidekick_document_vectors_path(self.project_id), "rb" - ) as vectors_file: - doc_vector_obj = np.load(vectors_file) - - # Grab document vector matrix - return (doc_vector_obj.get("vectors"), doc_vector_obj.get("ids")) diff --git a/documentcloud/sidekick/routers.py b/documentcloud/sidekick/routers.py deleted file mode 100644 index 77a1c6dc..00000000 --- a/documentcloud/sidekick/routers.py +++ /dev/null @@ -1,34 +0,0 @@ -# Django -from rest_framework.routers import DynamicRoute, Route - -# Third Party -from rest_framework_nested.routers import NestedDefaultRouter - - -class SidekickRouter(NestedDefaultRouter): - """Route list URL to detail views""" - - routes = [ - # List route. - Route( - url=r"^{prefix}{trailing_slash}$", - mapping={ - "get": "retrieve", - "put": "update", - "patch": "partial_update", - "post": "create", - "delete": "destroy", - }, - name="{basename}-detail", - detail=True, - initkwargs={"suffix": "Instance"}, - ), - # Dynamically generated list routes. Generated using - # @action(detail=False) decorator on methods of the viewset. - DynamicRoute( - url=r"^{prefix}/{url_path}{trailing_slash}$", - name="{basename}-{url_name}", - detail=True, - initkwargs={}, - ), - ] diff --git a/documentcloud/sidekick/rules.py b/documentcloud/sidekick/rules.py deleted file mode 100644 index 29ef7e83..00000000 --- a/documentcloud/sidekick/rules.py +++ /dev/null @@ -1,24 +0,0 @@ -# Third Party -from rules import add_perm, is_authenticated, predicate - -# DocumentCloud -from documentcloud.core.rules import skip_if_not_obj -from documentcloud.projects import rules as projects_rules - - -@predicate -@skip_if_not_obj -def can_view(user, sidekick): - return projects_rules.can_view(user, sidekick.project) - - -@predicate -@skip_if_not_obj -def can_change(user, sidekick): - return projects_rules.can_change(user, sidekick.project) - - -add_perm("sidekick.view_sidekick", can_view) -add_perm("sidekick.add_sidekick", is_authenticated) -add_perm("sidekick.change_sidekick", is_authenticated & can_change) -add_perm("sidekick.delete_sidekick", is_authenticated & can_change) diff --git a/documentcloud/sidekick/serializers.py b/documentcloud/sidekick/serializers.py deleted file mode 100644 index b26f4ef1..00000000 --- a/documentcloud/sidekick/serializers.py +++ /dev/null @@ -1,29 +0,0 @@ -# Django -from rest_framework import serializers - -# DocumentCloud -from documentcloud.documents.fields import ChoiceField -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick - - -class SidekickSerializer(serializers.ModelSerializer): - status = ChoiceField( - Status, read_only=True, help_text=Sidekick._meta.get_field("status").help_text - ) - - class Meta: - model = Sidekick - fields = ["status"] - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # Allow writing to status from processing lambda - context = kwargs.get("context", {}) - request = context.get("request") - has_request_auth = ( - request and hasattr(request, "auth") and request.auth is not None - ) - if has_request_auth and "processing" in request.auth.get("permissions", []): - self.fields["status"].read_only = False diff --git a/documentcloud/sidekick/sidekick.py b/documentcloud/sidekick/sidekick.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/signals.py b/documentcloud/sidekick/signals.py deleted file mode 100644 index 2fc2f554..00000000 --- a/documentcloud/sidekick/signals.py +++ /dev/null @@ -1,18 +0,0 @@ -# Django -from django.db.models.signals import post_delete -from django.dispatch import receiver - -# DocumentCloud -from documentcloud.common import path -from documentcloud.common.environment import storage -from documentcloud.sidekick.models import Sidekick - - -@receiver( - post_delete, - sender=Sidekick, - dispatch_uid="documentcloud.core.signals.delete_vectors", -) -def delete_vectors(instance, **kwargs): - """Delete vector files when deleting a sidekick instance""" - storage.delete(path.sidekick_document_vectors_path(instance.project_id)) diff --git a/documentcloud/sidekick/tasks.py b/documentcloud/sidekick/tasks.py deleted file mode 100644 index b0fdea92..00000000 --- a/documentcloud/sidekick/tasks.py +++ /dev/null @@ -1,164 +0,0 @@ -# Django -from celery import shared_task -from django.conf import settings -from django.db import transaction - -# Standard Library -import logging -import sys -from itertools import combinations - -# Third Party -from requests.exceptions import RequestException - -# DocumentCloud -from documentcloud.common.environment import httpsub -from documentcloud.documents.models import Document -from documentcloud.documents.tasks import solr_index_batch -from documentcloud.sidekick import lego -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick - -logger = logging.getLogger(__name__) - -if settings.ENVIRONMENT.startswith("local"): - # pylint: disable=unused-import - # DocumentCloud - from documentcloud.sidekick.local_tasks import sidekick_preprocess - - -def _httpsub_submit(url, project_pk, json, task_): - """Helper to reliably submit a task to lambda via HTTP""" - logger.info( - "Submitting project %s for %s. Retry: %d", - project_pk, - task_.name, - task_.request.retries, - ) - try: - response = httpsub.post(url, json=json) - response.raise_for_status() - logger.info("Submitted project %s for %s succesfully.", project_pk, task_.name) - except RequestException as exc: - if task_.request.retries >= task_.max_retries: - Sidekick.objects.filter(project_id=project_pk).update(status=Status.error) - logger.error( - "Submitting project %s for %s failed: %s", - project_pk, - task_.name, - exc, - exc_info=sys.exc_info(), - ) - else: - raise - - -@shared_task( - autoretry_for=(RequestException,), - retry_backoff=30, - retry_kwargs={"max_retries": settings.HTTPSUB_RETRY_LIMIT}, -) -def preprocess(project_pk): - """Start the sidekick pre-processing""" - _httpsub_submit( - settings.SIDEKICK_PROCESSING_URL, - project_pk, - {"project_id": project_pk}, - preprocess, - ) - - -@shared_task -def lego_learn(sidekick_id, tag_name): - """Start the lego learning""" - - logger.info("[LEGO LEARN] %s %s", sidekick_id, tag_name) - - with transaction.atomic(): - try: - sidekick = Sidekick.objects.get(pk=sidekick_id) - except Sidekick.DoesNotExist: - logger.warning("Sidekick does not exist: %s", sidekick_id) - return - - if sidekick.status != Status.success: - logger.warning( - "Sidekick not in successful state: %s %s", sidekick_id, sidekick.status - ) - return - - sidekick.status = Status.pending - sidekick.save() - - try: - doc_vectors, doc_ids = sidekick.get_document_vectors() - except ValueError: - sidekick.status = Status.error - sidekick.save() - return - - doc_ids = list(doc_ids) - - positive_doc_indices = _load_doc_indices(sidekick, doc_ids, tag_name, "true") - negative_doc_indices = _load_doc_indices(sidekick, doc_ids, tag_name, "false") - - logger.info( - "[LEGO LEARN] positive: %d negative: %d", - len(positive_doc_indices), - len(negative_doc_indices), - ) - - logger.info("[LEGO LEARN] positive: %s", positive_doc_indices) - - # constraints - # list of triples of the form (id0, id1, constraint) - # where constraint is 1 if both id0 and id1 are positive docs - # (positively correlated) - # and constraint is 0 is one is positive and one is negative (negatively correlated) - constraints = [] - for doc0, doc1 in combinations(positive_doc_indices, 2): - constraints.append((doc0, doc1, 1)) - for doc0 in positive_doc_indices: - for doc1 in negative_doc_indices: - constraints.append((doc0, doc1, 0)) - - logger.info("[LEGO LEARN] constraints: %s", constraints) - - # percentiles - # list of percentiles corresponding to document index - dists, percentiles = lego.lego_learn(doc_vectors, constraints, positive_doc_indices) - - logger.info("[LEGO LEARN] dists: %s", dists) - logger.info("[LEGO LEARN] percentiles: %s", percentiles) - - documents = Document.objects.in_bulk(doc_ids) - for doc_id, dist in zip(doc_ids, dists): - documents[doc_id].data[f"{tag_name}_score"] = [str(dist)] - documents[doc_id].data[f"{tag_name}_likely"] = ( - ["Likely"] - if dist > 0.75 - else (["Unlikely"] if dist < -0.75 else ["Uncertain"]) - ) - documents[doc_id].solr_dirty = True - with transaction.atomic(): - Document.objects.bulk_update(documents.values(), ["data", "solr_dirty"]) - transaction.on_commit( - lambda: solr_index_batch.delay( - [int(i) for i in doc_ids], - field_updates={ - f"data_{tag_name}_score": "set", - f"data_{tag_name}_likely": "set", - }, - ) - ) - - sidekick.status = Status.success - sidekick.save() - - -def _load_doc_indices(sidekick, doc_ids, tag_name, tag_value): - """Load the document indices for the given tag name and value""" - docs = sidekick.project.documents.filter( - data__contains={tag_name: [tag_value]}, pk__in=doc_ids - ).values_list("pk", flat=True) - return [doc_ids.index(d) for d in docs] diff --git a/documentcloud/sidekick/tests/__init__.py b/documentcloud/sidekick/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/documentcloud/sidekick/tests/test_views.py b/documentcloud/sidekick/tests/test_views.py deleted file mode 100644 index b86f6c8b..00000000 --- a/documentcloud/sidekick/tests/test_views.py +++ /dev/null @@ -1,114 +0,0 @@ -# Django -from django.conf import settings -from rest_framework import status - -# Standard Library -import json - -# Third Party -import pytest - -# DocumentCloud -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick - - -@pytest.mark.django_db() -class TestSidekickAPI: - def test_create(self, client, project): - """Create a new sidekick""" - client.force_authenticate(user=project.user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_201_CREATED - response_json = json.loads(response.content) - assert response_json == {"status": "pending"} - - def test_create_no_perm(self, client, project, user): - """Create a new sidekick for a project you are not an editor for""" - client.force_authenticate(user=user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_create_reprocess(self, client, project): - """Use `create` to reprocess an existing sidekick""" - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=project.user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_201_CREATED - response_json = json.loads(response.content) - assert response_json == {} - - def test_create_reprocess_pending(self, client, project): - """It is an error to attempt to re-process while already processing""" - Sidekick.objects.create(project=project, status=Status.pending) - client.force_authenticate(user=project.user) - response = client.post(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_400_BAD_REQUEST - response_json = json.loads(response.content) - assert response_json == ["Already processing"] - - def test_retrieve(self, client, project): - """Retrieve a sidekick""" - client.force_authenticate(user=project.user) - Sidekick.objects.create(project=project) - response = client.get(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_200_OK - - def test_retrieve_no_exist(self, client, project): - """Retrieve a sidekick but no sidekick""" - client.force_authenticate(user=project.user) - response = client.get(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_retrieve_no_perm(self, client, project, user): - """Retrieve a sidekick but no permissions""" - client.force_authenticate(user=user) - response = client.get(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_delete(self, client, project, mocker): - """Delete a sidekick""" - mock_delete = mocker.patch("documentcloud.common.environment.storage.delete") - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=project.user) - response = client.delete(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_204_NO_CONTENT - mock_delete.assert_called_once() - - def test_delete_no_perm(self, client, project, user): - """Delete a sidekick without permissions""" - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=user) - response = client.delete(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_delete_no_exist(self, client, project): - """Delete a sidekick with no sidekick""" - client.force_authenticate(user=project.user) - response = client.delete(f"/api/projects/{project.pk}/sidekick/") - assert response.status_code == status.HTTP_404_NOT_FOUND - - def test_update(self, client, project): - """Update a sidekick - nothing to update for normal users""" - Sidekick.objects.create(project=project, status=Status.success) - client.force_authenticate(user=project.user) - response = client.patch(f"/api/projects/{project.pk}/sidekick/", {}) - assert response.status_code == status.HTTP_200_OK - - def test_update_processing_token(self, client, project): - """Update a sidekick with a processing token""" - Sidekick.objects.create(project=project, status=Status.pending) - response = client.patch( - f"/api/projects/{project.pk}/sidekick/", - {"status": "success"}, - HTTP_AUTHORIZATION=f"processing-token {settings.PROCESSING_TOKEN}", - ) - assert response.status_code == status.HTTP_200_OK - response_json = json.loads(response.content) - assert response_json == {"status": "success"} - - def test_update_no_exist(self, client, project): - """Update a sidekick that doesn't exist""" - client.force_authenticate(user=project.user) - response = client.patch(f"/api/projects/{project.pk}/sidekick/", {}) - assert response.status_code == status.HTTP_404_NOT_FOUND diff --git a/documentcloud/sidekick/views.py b/documentcloud/sidekick/views.py deleted file mode 100644 index ca27767f..00000000 --- a/documentcloud/sidekick/views.py +++ /dev/null @@ -1,82 +0,0 @@ -# Django -from django.db import transaction -from django.db.utils import IntegrityError -from django.http.response import Http404 -from rest_framework import serializers, status, viewsets -from rest_framework.decorators import action -from rest_framework.generics import get_object_or_404 -from rest_framework.response import Response - -# DocumentCloud -from documentcloud.core.permissions import ( - DjangoObjectPermissionsOrAnonReadOnly, - SidekickPermissions, -) -from documentcloud.projects.models import Project -from documentcloud.sidekick.choices import Status -from documentcloud.sidekick.models import Sidekick -from documentcloud.sidekick.serializers import SidekickSerializer -from documentcloud.sidekick.tasks import lego_learn, preprocess - - -class SidekickViewSet(viewsets.ModelViewSet): - serializer_class = SidekickSerializer - queryset = Sidekick.objects.none() - permission_classes = (DjangoObjectPermissionsOrAnonReadOnly | SidekickPermissions,) - - def get_object(self): - """There is always at most one sidekick associated with a project""" - valid_token = ( - hasattr(self.request, "auth") - and self.request.auth is not None - and "processing" in self.request.auth.get("permissions", []) - ) - # Processing scope can access all documents - if valid_token: - projects = Project.objects.all() - else: - projects = Project.objects.get_editable(self.request.user) - project = get_object_or_404(projects, pk=self.kwargs["project_pk"]) - - try: - return project.sidekick - except Sidekick.DoesNotExist: - raise Http404 - - def perform_create(self, serializer): - """Specify the project""" - project = get_object_or_404( - Project.objects.get_editable(self.request.user), - pk=self.kwargs["project_pk"], - ) - try: - # try saving and processing the sidekick if one does not exist - with transaction.atomic(): - sidekick = serializer.save(project=project) - preprocess.delay(self.kwargs["project_pk"]) - except IntegrityError: - # a sidekick already exists, select it for updating - with transaction.atomic(): - sidekick = Sidekick.objects.select_for_update().get( - project_id=self.kwargs["project_pk"] - ) - if sidekick.status == Status.pending: - # if it is already processing then error - raise serializers.ValidationError("Already processing") - - # set to processing and begin the processing - sidekick.status = Status.pending - sidekick.save() - preprocess.delay(self.kwargs["project_pk"]) - - @action(detail=True, methods=["post"]) - def learn(self, request, project_pk=None): - """Activate lego learning""" - # pylint: disable=unused-argument - sidekick = self.get_object() - if "tagname" not in request.data: - raise serializers.ValidationError("Missing tagname") - - lego_learn.delay(sidekick.pk, request.data["tagname"]) - - return Response("OK", status=status.HTTP_200_OK) diff --git a/documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py b/documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py new file mode 100644 index 00000000..64f3bc5e --- /dev/null +++ b/documentcloud/users/migrations/0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username.py @@ -0,0 +1,475 @@ +# Generated by Django 5.2.13 on 2026-04-14 18:44 + +import django.contrib.postgres.fields.citext +import django.contrib.postgres.operations +import django.utils.timezone +import documentcloud.core.fields +import documentcloud.users.managers +import squarelet_auth.fields +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + replaces = [ + ("users", "0001_initial"), + ("users", "0002_auto_20200128_1418"), + ("users", "0003_auto_20200214_1640"), + ("users", "0004_auto_20200306_2000"), + ("users", "0005_auto_20200523_1534"), + ("users", "0006_auto_20200925_2001"), + ("users", "0007_auto_20211102_1707"), + ("users", "0009_user_mailkey"), + ("users", "0010_user_bio_alter_user_email_alter_user_username"), + ] + + dependencies = [ + ("auth", "0011_update_proxy_permissions"), + ] + + operations = [ + django.contrib.postgres.operations.CITextExtension(), + django.contrib.postgres.operations.CreateCollation( + name="case_insensitive", + locale="und-u-ks-level2", + provider="icu", + deterministic=False, + ), + migrations.CreateModel( + name="User", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("password", models.CharField(max_length=128, verbose_name="password")), + ( + "last_login", + models.DateTimeField( + blank=True, null=True, verbose_name="last login" + ), + ), + ( + "is_superuser", + models.BooleanField( + default=False, + help_text="Designates that this user has all permissions without explicitly assigning them.", + verbose_name="superuser status", + ), + ), + ( + "uuid", + models.UUIDField( + db_index=True, + default=uuid.uuid4, + editable=False, + help_text="Unique ID to link users across MuckRock's sites", + unique=True, + verbose_name="UUID", + ), + ), + ( + "name", + models.CharField( + help_text="The user's full name", + max_length=255, + verbose_name="full name", + ), + ), + ( + "email", + models.EmailField( + db_collation="case_insensitive", + help_text="The user's primary email address", + max_length=254, + null=True, + unique=True, + verbose_name="email", + ), + ), + ( + "username", + models.CharField( + db_collation="case_insensitive", + help_text="A unique public identifier for the user", + max_length=150, + unique=True, + verbose_name="username", + ), + ), + ( + "avatar_url", + models.URLField( + blank=True, + help_text="A URL which points to an avatar for the user", + max_length=255, + verbose_name="avatar url", + ), + ), + ( + "is_staff", + models.BooleanField( + default=False, + help_text="Designates whether the user can log into this admin site.", + verbose_name="staff status", + ), + ), + ( + "is_active", + models.BooleanField( + default=True, + help_text="Designates whether this user should be treated as active. Unselect this instead of deleting accounts.", + verbose_name="active", + ), + ), + ( + "email_failed", + models.BooleanField( + default=False, + help_text="Has an email we sent to this user's email address failed?", + verbose_name="email failed", + ), + ), + ( + "email_verified", + models.BooleanField( + default=False, + help_text="Has this user's email address been verified?", + verbose_name="email verified", + ), + ), + ( + "created_at", + documentcloud.core.fields.AutoCreatedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was created", + verbose_name="created at", + ), + ), + ( + "updated_at", + documentcloud.core.fields.AutoLastModifiedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was last updated", + verbose_name="updated at", + ), + ), + ( + "use_autologin", + models.BooleanField( + default=True, + help_text="Links you receive in emails from us will contain a token to automatically log you in", + verbose_name="use autologin", + ), + ), + ( + "groups", + models.ManyToManyField( + blank=True, + help_text="The groups this user belongs to. A user will get all permissions granted to each of their groups.", + related_name="user_set", + related_query_name="user", + to="auth.group", + verbose_name="groups", + ), + ), + ( + "user_permissions", + models.ManyToManyField( + blank=True, + help_text="Specific permissions for this user.", + related_name="user_set", + related_query_name="user", + to="auth.permission", + verbose_name="user permissions", + ), + ), + ], + options={ + "abstract": False, + "ordering": ("username",), + }, + managers=[ + ("objects", documentcloud.users.managers.UserManager()), + ], + ), + migrations.RunSQL( + sql="ALTER SEQUENCE users_user_id_seq RESTART WITH 100000", + ), + migrations.AlterField( + model_name="user", + name="created_at", + field=squarelet_auth.fields.AutoCreatedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was created", + verbose_name="created at", + ), + ), + migrations.AlterField( + model_name="user", + name="updated_at", + field=squarelet_auth.fields.AutoLastModifiedField( + default=django.utils.timezone.now, + editable=False, + help_text="Timestamp of when the user was last updated", + verbose_name="updated at", + ), + ), + migrations.AddField( + model_name="user", + name="document_language", + field=models.CharField( + blank=True, + choices=[ + ("afr", "Afrikaans"), + ("amh", "Amharic"), + ("ara", "Arabic"), + ("asm", "Assamese"), + ("aze", "Azerbaijani"), + ("aze_cyrl", "Azerbaijani - Cyrillic"), + ("bel", "Belarusian"), + ("ben", "Bengali"), + ("bod", "Tibetan"), + ("bos", "Bosnian"), + ("bul", "Bulgarian"), + ("cat", "Catalan; Valencian"), + ("ceb", "Cebuano"), + ("ces", "Czech"), + ("zho", "Chinese - Simplified"), + ("tra", "Chinese - Traditional"), + ("chr", "Cherokee"), + ("cym", "Welsh"), + ("dan", "Danish"), + ("deu", "German"), + ("dzo", "Dzongkha"), + ("ell", "Greek"), + ("eng", "English"), + ("enm", "Middle English"), + ("epo", "Esperanto"), + ("est", "Estonian"), + ("eus", "Basque"), + ("fas", "Persian"), + ("fin", "Finnish"), + ("fra", "French"), + ("frk", "German Fraktur"), + ("frm", "Middle French"), + ("gle", "Irish"), + ("glg", "Galician"), + ("grc", "Ancient Greek"), + ("guj", "Gujarati"), + ("hat", "Haitian; Haitian Creole"), + ("heb", "Hebrew"), + ("hin", "Hindi"), + ("hrv", "Croatian"), + ("hun", "Hungarian"), + ("iku", "Inuktitut"), + ("ind", "Indonesian"), + ("isl", "Icelandic"), + ("ita", "Italian"), + ("ita_old", "Italian - Old"), + ("jav", "Javanese"), + ("jpn", "Japanese"), + ("kan", "Kannada"), + ("kat", "Georgian"), + ("kat_old", "Georgian - Old"), + ("kaz", "Kazakh"), + ("khm", "Central Khmer"), + ("kir", "Kirghiz; Kyrgyz"), + ("kor", "Korean"), + ("kur", "Kurdish"), + ("lao", "Lao"), + ("lat", "Latin"), + ("lav", "Latvian"), + ("lit", "Lithuanian"), + ("mal", "Malayalam"), + ("mar", "Marathi"), + ("mkd", "Macedonian"), + ("mlt", "Maltese"), + ("msa", "Malay"), + ("mya", "Burmese"), + ("nep", "Nepali"), + ("nld", "Dutch; Flemish"), + ("nor", "Norwegian"), + ("ori", "Oriya"), + ("pan", "Panjabi; Punjabi"), + ("pol", "Polish"), + ("por", "Portuguese"), + ("pus", "Pushto; Pashto"), + ("ron", "Romanian; Moldavian; Moldovan"), + ("rus", "Russian"), + ("san", "Sanskrit"), + ("sin", "Sinhala; Sinhalese"), + ("slk", "Slovak"), + ("slv", "Slovenian"), + ("spa", "Spanish; Castilian"), + ("spa_old", "Spanish; Castilian - Old"), + ("sqi", "Albanian"), + ("srp", "Serbian"), + ("srp_latn", "Serbian - Latin"), + ("swa", "Swahili"), + ("swe", "Swedish"), + ("syr", "Syriac"), + ("tam", "Tamil"), + ("tel", "Telugu"), + ("tgk", "Tajik"), + ("tgl", "Tagalog"), + ("tha", "Thai"), + ("tir", "Tigrinya"), + ("tur", "Turkish"), + ("uig", "Uighur; Uyghur"), + ("ukr", "Ukrainian"), + ("urd", "Urdu"), + ("uzb", "Uzbek"), + ("uzb_cyrl", "Uzbek - Cyrillic"), + ("vie", "Vietnamese"), + ("yid", "Yiddish"), + ], + default="eng", + help_text="The default language for documents uploaded by this user", + max_length=8, + verbose_name="document language", + ), + ), + migrations.AddField( + model_name="user", + name="language", + field=models.CharField( + blank=True, + choices=[ + ("afr", "Afrikaans"), + ("amh", "Amharic"), + ("ara", "Arabic"), + ("asm", "Assamese"), + ("aze", "Azerbaijani"), + ("aze_cyrl", "Azerbaijani - Cyrillic"), + ("bel", "Belarusian"), + ("ben", "Bengali"), + ("bod", "Tibetan"), + ("bos", "Bosnian"), + ("bul", "Bulgarian"), + ("cat", "Catalan; Valencian"), + ("ceb", "Cebuano"), + ("ces", "Czech"), + ("zho", "Chinese - Simplified"), + ("tra", "Chinese - Traditional"), + ("chr", "Cherokee"), + ("cym", "Welsh"), + ("dan", "Danish"), + ("deu", "German"), + ("dzo", "Dzongkha"), + ("ell", "Greek"), + ("eng", "English"), + ("enm", "Middle English"), + ("epo", "Esperanto"), + ("est", "Estonian"), + ("eus", "Basque"), + ("fas", "Persian"), + ("fin", "Finnish"), + ("fra", "French"), + ("frk", "German Fraktur"), + ("frm", "Middle French"), + ("gle", "Irish"), + ("glg", "Galician"), + ("grc", "Ancient Greek"), + ("guj", "Gujarati"), + ("hat", "Haitian; Haitian Creole"), + ("heb", "Hebrew"), + ("hin", "Hindi"), + ("hrv", "Croatian"), + ("hun", "Hungarian"), + ("iku", "Inuktitut"), + ("ind", "Indonesian"), + ("isl", "Icelandic"), + ("ita", "Italian"), + ("ita_old", "Italian - Old"), + ("jav", "Javanese"), + ("jpn", "Japanese"), + ("kan", "Kannada"), + ("kat", "Georgian"), + ("kat_old", "Georgian - Old"), + ("kaz", "Kazakh"), + ("khm", "Central Khmer"), + ("kir", "Kirghiz; Kyrgyz"), + ("kor", "Korean"), + ("kur", "Kurdish"), + ("lao", "Lao"), + ("lat", "Latin"), + ("lav", "Latvian"), + ("lit", "Lithuanian"), + ("mal", "Malayalam"), + ("mar", "Marathi"), + ("mkd", "Macedonian"), + ("mlt", "Maltese"), + ("msa", "Malay"), + ("mya", "Burmese"), + ("nep", "Nepali"), + ("nld", "Dutch; Flemish"), + ("nor", "Norwegian"), + ("ori", "Oriya"), + ("pan", "Panjabi; Punjabi"), + ("pol", "Polish"), + ("por", "Portuguese"), + ("pus", "Pushto; Pashto"), + ("ron", "Romanian; Moldavian; Moldovan"), + ("rus", "Russian"), + ("san", "Sanskrit"), + ("sin", "Sinhala; Sinhalese"), + ("slk", "Slovak"), + ("slv", "Slovenian"), + ("spa", "Spanish; Castilian"), + ("spa_old", "Spanish; Castilian - Old"), + ("sqi", "Albanian"), + ("srp", "Serbian"), + ("srp_latn", "Serbian - Latin"), + ("swa", "Swahili"), + ("swe", "Swedish"), + ("syr", "Syriac"), + ("tam", "Tamil"), + ("tel", "Telugu"), + ("tgk", "Tajik"), + ("tgl", "Tagalog"), + ("tha", "Thai"), + ("tir", "Tigrinya"), + ("tur", "Turkish"), + ("uig", "Uighur; Uyghur"), + ("ukr", "Ukrainian"), + ("urd", "Urdu"), + ("uzb", "Uzbek"), + ("uzb_cyrl", "Uzbek - Cyrillic"), + ("vie", "Vietnamese"), + ("yid", "Yiddish"), + ], + default="eng", + help_text="The interface language for this user", + max_length=8, + verbose_name="language", + ), + ), + migrations.AddField( + model_name="user", + name="mailkey", + field=models.UUIDField( + help_text="Mail key for uploading documents via email", + null=True, + verbose_name="mailkey", + ), + ), + migrations.AddField( + model_name="user", + name="bio", + field=models.TextField( + blank=True, + help_text="Public bio for the user, in Markdown", + verbose_name="bio", + ), + ), + ] \ No newline at end of file diff --git a/documentcloud/users/migrations/0008_user_active_addons.py b/documentcloud/users/migrations/0008_user_active_addons.py deleted file mode 100644 index bcdc6bb5..00000000 --- a/documentcloud/users/migrations/0008_user_active_addons.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generated by Django 3.2.9 on 2022-03-30 19:14 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('addons', '0005_auto_20220330_1908'), - ('users', '0007_auto_20211102_1707'), - ] - - operations = [ - migrations.AddField( - model_name='user', - name='active_addons', - field=models.ManyToManyField(help_text='Add-Ons shown for this user', related_name='users', to='addons.AddOn', verbose_name='active add-ons'), - ), - ] diff --git a/documentcloud/users/migrations/0009_user_mailkey.py b/documentcloud/users/migrations/0009_user_mailkey.py index bbdacf07..4ff5c95b 100644 --- a/documentcloud/users/migrations/0009_user_mailkey.py +++ b/documentcloud/users/migrations/0009_user_mailkey.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('users', '0008_user_active_addons'), + ('users', '0007_auto_20211102_1707'), ] operations = [ diff --git a/documentcloud/users/migrations/0013_user_active_addons.py b/documentcloud/users/migrations/0013_user_active_addons.py new file mode 100644 index 00000000..1058a8ac --- /dev/null +++ b/documentcloud/users/migrations/0013_user_active_addons.py @@ -0,0 +1,36 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("users", "0001_initial_squashed_0010_user_bio_alter_user_email_alter_user_username"), + ("users", "0007_auto_20211102_1707"), + ] + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AddField( + model_name="user", + name="active_addons", + field=models.ManyToManyField( + help_text="Add-Ons shown for this user", + related_name="users", + to="addons.addon", + verbose_name="active add-ons", + ), + ), + ], + database_operations=[ + migrations.RunSQL( + sql=""" + CREATE TABLE IF NOT EXISTS users_user_active_addons ( + id serial NOT NULL PRIMARY KEY, + user_id integer NOT NULL REFERENCES users_user(id) DEFERRABLE INITIALLY DEFERRED, + addon_id bigint NOT NULL REFERENCES addons_addon(id) DEFERRABLE INITIALLY DEFERRED + ) + """, + reverse_sql="DROP TABLE IF EXISTS users_user_active_addons", + ), + ], + ) + ] \ No newline at end of file diff --git a/documentcloud/users/migrations/0014_merge_20260414_1910.py b/documentcloud/users/migrations/0014_merge_20260414_1910.py new file mode 100644 index 00000000..7fb57b4d --- /dev/null +++ b/documentcloud/users/migrations/0014_merge_20260414_1910.py @@ -0,0 +1,13 @@ +# Generated by Django 5.2.13 on 2026-04-14 19:10 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("users", "0012_default_pinned_projects"), + ("users", "0013_user_active_addons"), + ] + + operations = [] diff --git a/initialize_dotenvs.py b/initialize_dotenvs.py index 829d0316..24547538 100755 --- a/initialize_dotenvs.py +++ b/initialize_dotenvs.py @@ -70,9 +70,10 @@ def random_string(n): { "name": "MinIO", "envvars": [ - ("MINIO_ACCESS_KEY", lambda: random_string(64)), - ("MINIO_SECRET_KEY", lambda: random_string(64)), - ("MINIO_URL", "http://minio.documentcloud.org:9000"), + ("MINIO_ROOT_USER", lambda: random_string(64)), + ("MINIO_ROOT_PASSWORD", lambda: random_string(64)), + ("MINIO_URL", "https://minio.documentcloud.org"), + ("AWS_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt"), ], }, ], diff --git a/local.yml b/local.yml index 1e9a049f..05387510 100644 --- a/local.yml +++ b/local.yml @@ -50,16 +50,18 @@ services: image: redis:5.0 documentcloud_minio: - image: minio/minio:RELEASE.2019-10-12T01-39-57Z + image: minio/minio:RELEASE.2024-12-18T13-15-44Z volumes: - local_minio_data:/data ports: - "9000:9000" - command: server /data + - "9001:9001" + command: server /data --console-address ":9001" env_file: - ./.envs/.local/.django networks: default: + squarelet_default: aliases: - minio.documentcloud.org diff --git a/requirements/base.in b/requirements/base.in index f4c90fda..ce70f316 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -18,12 +18,11 @@ daily-active-users # Django # ------------------------------------------------------------------------------ -django<5 +django<6 django-environ django-model-utils django-compressor django-redis -django-choices django-filter django-extensions django-autoslug @@ -40,34 +39,35 @@ django-parler # Django REST Framework djangorestframework rest-social-auth -social-auth-core[openidconnect] +social-auth-core django-cors-headers djangorestframework_simplejwt drf-nested-routers -cryptography # support RS256 for JWT +cryptography>=46.0.7 # support RS256 for JWT drf-flex-fields django-parler-rest # Cloud providers gcsfs google-cloud-pubsub -boto3 smart-open -aioboto3 +aioboto3>=15 +boto3 # to resolve version issues -wrapt==1.11.2 +wrapt>=1.14.0,<2 # Processing cpuprofile listcrunch numpy +opencv-python-headless # Typing typing-extensions # Solr --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr +pysolr luqum # entity extraction @@ -78,12 +78,12 @@ pdfplumber pikepdf pymupdf -# sidekick -numba -scipy - - +# Logging logzio-python-handler # Documentation -drf-spectacular \ No newline at end of file +drf-spectacular + +# Python 2 compatibility some packages still require +six==1.17.0 + diff --git a/requirements/base.txt b/requirements/base.txt index 8ae79beb..2660a3cb 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,31 +1,35 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/base.in # --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr +aioboto3==15.5.0 # via -r requirements/base.in -aioboto3==9.6.0 - # via -r requirements/base.in -aiobotocore[boto3]==2.3.0 +aiobotocore[boto3]==2.25.1 # via aioboto3 -aiohttp==3.8.1 - # via aiobotocore +aiofiles==25.1.0 + # via aioboto3 +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.13.5 + # via + # aiobotocore + # gcsfs aioitertools==0.10.0 # via aiobotocore -aiosignal==1.2.0 +aiosignal==1.4.0 # via aiohttp amqp==5.3.1 # via kombu -argon2-cffi==19.1.0 +argon2-cffi==25.1.0 # via -r requirements/base.in -asgiref==3.7.2 +argon2-cffi-bindings==25.1.0 + # via argon2-cffi +asgiref==3.11.1 # via django asttokens==2.0.5 # via stack-data -async-timeout==4.0.2 - # via aiohttp attrs==19.3.0 # via # aiohttp @@ -36,35 +40,28 @@ billiard==4.2.1 # via celery bleach==6.0.0 # via -r requirements/base.in -boto==2.49.0 - # via smart-open -boto3==1.21.21 +boto3==1.40.61 # via # -r requirements/base.in # aiobotocore - # smart-open -botocore==1.24.21 +botocore==1.40.61 # via # aiobotocore # boto3 # s3transfer -cachetools==3.1.1 - # via google-auth celery==5.4.0 # via # -r requirements/base.in # squarelet-auth -certifi==2019.6.16 +certifi==2026.2.25 # via requests -cffi==1.15.1 +cffi==2.0.0 # via - # argon2-cffi + # argon2-cffi-bindings # cryptography -chardet==3.0.4 - # via pdfminer-six charset-normalizer==2.1.0 # via - # aiohttp + # pdfminer-six # requests click==8.1.7 # via @@ -80,9 +77,11 @@ click-repl==0.3.0 # via celery cpuprofile==1.0.1 # via -r requirements/base.in -cryptography==35.0.0 +cryptography==46.0.7 # via # -r requirements/base.in + # google-auth + # pdfminer-six # social-auth-core cssselect==1.1.0 # via premailer @@ -94,18 +93,18 @@ decorator==4.4.0 # via # gcsfs # ipython -defusedxml==0.6.0 +defusedxml==0.7.1 # via # python3-openid # social-auth-core -deprecation==2.1.0 +deprecated==1.3.1 # via pikepdf -django==4.2.2 +django==5.2.13 # via # -r requirements/base.in # daily-active-users # django-appconf - # django-choices + # django-compressor # django-cors-headers # django-debug-toolbar # django-extensions @@ -120,26 +119,25 @@ django==4.2.2 # drf-nested-routers # drf-spectacular # rest-social-auth + # social-auth-app-django # squarelet-auth django-appconf==1.0.3 # via django-compressor django-autoslug==1.9.8 # via -r requirements/base.in -django-choices==1.7.2 - # via -r requirements/base.in -django-compressor==4.4 +django-compressor==4.6.0 # via -r requirements/base.in django-cors-headers==3.13.0 # via -r requirements/base.in django-cprofile-middleware==1.0.5 # via -r requirements/base.in -django-debug-toolbar==4.1.0 +django-debug-toolbar==6.2.0 # via -r requirements/base.in -django-environ==0.4.5 +django-environ==0.13.0 # via -r requirements/base.in django-extensions==3.2.3 # via -r requirements/base.in -django-filter==21.1 +django-filter==25.2 # via -r requirements/base.in django-ipware==4.0.2 # via -r requirements/base.in @@ -157,9 +155,9 @@ django-redis==5.0.0 # via # -r requirements/base.in # python-redis-lock -django-robots==5.0 +django-robots==6.1 # via -r requirements/base.in -djangorestframework==3.14.0 +djangorestframework==3.16.1 # via # -r requirements/base.in # django-parler-rest @@ -167,7 +165,7 @@ djangorestframework==3.14.0 # drf-nested-routers # drf-spectacular # rest-social-auth -djangorestframework-simplejwt==4.8.0 +djangorestframework-simplejwt==5.5.1 # via -r requirements/base.in dogslow==1.2 # via -r requirements/base.in @@ -177,60 +175,89 @@ drf-nested-routers==0.91 # via -r requirements/base.in drf-spectacular==0.28.0 # via -r requirements/base.in -ecdsa==0.13.2 - # via python-jose executing==0.8.3 # via stack-data -frozenlist==1.3.0 +frozenlist==1.8.0 # via # aiohttp # aiosignal -fsspec==0.5.2 +fsspec==2026.3.0 # via gcsfs furl==2.1.0 # via -r requirements/base.in -gcsfs==0.3.1 +gcsfs==2026.3.0 # via -r requirements/base.in -google-api-core[grpc]==1.23.0 +google-api-core[grpc]==2.29.0 # via + # google-cloud-core # google-cloud-language # google-cloud-pubsub -google-auth==1.23.0 + # google-cloud-storage + # google-cloud-storage-control +google-auth==2.49.1 # via # gcsfs # google-api-core # google-auth-oauthlib -google-auth-oauthlib==0.4.1 + # google-cloud-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage + # google-cloud-storage-control +google-auth-oauthlib==1.3.1 # via gcsfs -google-cloud-language==2.0.0 +google-cloud-core==2.5.1 + # via google-cloud-storage +google-cloud-language==2.19.0 # via -r requirements/base.in -google-cloud-pubsub==1.0.2 +google-cloud-pubsub==2.36.0 # via -r requirements/base.in -googleapis-common-protos[grpc]==1.6.0 +google-cloud-storage==3.10.1 + # via gcsfs +google-cloud-storage-control==1.6.1 + # via gcsfs +google-crc32c==1.8.0 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.8.2 + # via google-cloud-storage +googleapis-common-protos[grpc]==1.73.0 # via # google-api-core # grpc-google-iam-v1 -grpc-google-iam-v1==0.12.3 + # grpcio-status +grpc-google-iam-v1==0.14.4 # via google-cloud-pubsub -grpcio==1.47.0 +grpcio==1.80.0 # via # google-api-core + # google-cloud-language + # google-cloud-pubsub # googleapis-common-protos # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.63.0rc1 + # via + # google-api-core + # google-cloud-pubsub html2text==2020.1.16 # via -r requirements/base.in -idna==2.8 +idna==3.7 # via # requests # yarl +importlib-metadata==8.4.0 + # via opentelemetry-api inflection==0.5.1 # via drf-spectacular -ipython==8.5.0 +ipython==8.10.0 # via -r requirements/base.in jedi==0.18.1 # via ipython jmespath==0.9.4 # via + # aiobotocore # boto3 # botocore jsonschema==3.2.0 @@ -239,52 +266,53 @@ jsonschema==3.2.0 # drf-spectacular kombu==5.4.2 # via celery -libcst==0.4.1 - # via google-cloud-language listcrunch==1.0.0 # via -r requirements/base.in -llvmlite==0.38.1 - # via numba -logzio-python-handler==4.1.1 +logzio-python-handler==4.1.9 # via -r requirements/base.in luqum==0.8.1 # via -r requirements/base.in -lxml==4.9.1 +lxml==6.0.2 # via # pikepdf # premailer -markdown==3.4.1 +markdown==3.8.1 # via -r requirements/base.in matplotlib-inline==0.1.3 # via ipython -multidict==4.7.5 +multidict==6.7.1 # via + # aiobotocore # aiohttp # yarl -mypy-extensions==0.4.3 - # via typing-inspect -numba==0.55.2 - # via -r requirements/base.in -numpy==1.22.4 +numpy==2.2.6 # via # -r requirements/base.in - # numba - # scipy + # opencv-python-headless oauthlib==3.1.0 # via # requests-oauthlib # social-auth-core +opencv-python-headless==4.13.0.92 + # via -r requirements/base.in +opentelemetry-api==1.40.0 + # via + # google-cloud-pubsub + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.40.0 + # via google-cloud-pubsub +opentelemetry-semantic-conventions==0.61b0 + # via opentelemetry-sdk orderedmultidict==1.0.1 # via furl packaging==24.1 - # via - # deprecation - # pikepdf + # via pikepdf parso==0.8.3 # via jedi -pdfminer-six==20200517 +pdfminer-six==20251230 # via pdfplumber -pdfplumber==0.5.28 +pdfplumber==0.11.9 # via -r requirements/base.in pebble==4.5.0 # via -r requirements/base.in @@ -292,9 +320,9 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pikepdf==5.4.0 +pikepdf==10.5.1 # via -r requirements/base.in -pillow==9.2.0 +pillow==12.1.1 # via # -r requirements/base.in # pdfplumber @@ -307,12 +335,25 @@ prompt-toolkit==3.0.38 # via # click-repl # ipython -proto-plus==1.13.0 - # via google-cloud-language -protobuf==4.25.2 +propcache==0.4.1 + # via + # aiohttp + # yarl +proto-plus==1.27.1 # via # google-api-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control +protobuf==5.29.6 + # via + # google-api-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status # logzio-python-handler # proto-plus ptyprocess==0.6.0 @@ -320,32 +361,30 @@ ptyprocess==0.6.0 pure-eval==0.2.2 # via stack-data pyasn1==0.4.7 - # via - # pyasn1-modules - # python-jose - # rsa + # via pyasn1-modules pyasn1-modules==0.2.7 # via google-auth pycparser==2.19 # via cffi -pycryptodome==3.10.1 - # via pdfminer-six -pygments==2.5.2 +pygments==2.20.0 # via ipython -pyjwt==2.3.0 +pyjwt==2.12.1 # via # djangorestframework-simplejwt # social-auth-core pymupdf==1.25.3 # via -r requirements/base.in +pypdfium2==5.7.0 + # via pdfplumber pyrsistent==0.18.0 # via jsonschema -python-dateutil==2.8.2 +pysolr==3.11.0 + # via -r requirements/base.in +python-dateutil==2.9.0.post0 # via + # aiobotocore # botocore # celery -python-jose==3.3.0 - # via social-auth-core python-redis-lock[django]==3.3.1 # via -r requirements/base.in python-slugify==3.0.3 @@ -353,15 +392,10 @@ python-slugify==3.0.3 python3-openid==3.1.0 # via social-auth-core pytz==2022.1 - # via - # -r requirements/base.in - # djangorestframework - # google-api-core -pyyaml==5.3.1 - # via - # drf-spectacular - # libcst -rcssmin==1.1.1 + # via -r requirements/base.in +pyyaml==6.0.3 + # via drf-spectacular +rcssmin==1.2.2 # via # -r requirements/base.in # django-compressor @@ -370,62 +404,50 @@ redis==3.4.1 # -r requirements/base.in # django-redis # python-redis-lock -requests==2.28.1 +requests==2.33.0 # via # gcsfs # google-api-core + # google-cloud-storage # logzio-python-handler # premailer # pysolr # requests-oauthlib - # smart-open # social-auth-core # squarelet-auth requests-oauthlib==1.2.0 # via # google-auth-oauthlib # social-auth-core -rest-social-auth==8.1.0 +rest-social-auth==9.0.0 # via -r requirements/base.in -rjsmin==1.2.1 +rjsmin==1.2.5 # via django-compressor -rsa==4.0 - # via - # google-auth - # python-jose rules==2.1 # via -r requirements/base.in -s3transfer==0.5.2 +s3transfer==0.14.0 # via boto3 -scipy==1.8.1 - # via -r requirements/base.in -six==1.15.0 +six==1.17.0 # via - # argon2-cffi + # -r requirements/base.in # asttokens # bleach # django-appconf - # django-choices # furl - # google-api-core - # google-auth - # grpcio # jsonschema # orderedmultidict # python-dateutil -smart-open==1.8.4 +smart-open==7.5.1 # via -r requirements/base.in -social-auth-app-django==5.0.0 +social-auth-app-django==5.6.0 # via rest-social-auth -social-auth-core[openidconnect]==4.4.2 +social-auth-core[openidconnect]==4.7.0 # via # -r requirements/base.in # rest-social-auth # social-auth-app-django # squarelet-auth -sortedcontainers==2.4.0 - # via pdfminer-six -sqlparse==0.4.4 +sqlparse==0.5.4 # via # django # django-debug-toolbar @@ -442,11 +464,11 @@ traitlets==5.3.0 typing-extensions==4.15.0 # via # -r requirements/base.in - # asgiref - # libcst - # typing-inspect -typing-inspect==0.6.0 - # via libcst + # aiosignal + # grpcio + # opentelemetry-api + # opentelemetry-sdk + # opentelemetry-semantic-conventions tzdata==2024.2 # via # celery @@ -455,7 +477,7 @@ unidecode==1.1.1 # via -r requirements/base.in uritemplate==4.1.1 # via drf-spectacular -urllib3==1.26.10 +urllib3==2.6.3 # via # botocore # requests @@ -464,18 +486,20 @@ vine==5.1.0 # amqp # celery # kombu -wand==0.6.6 - # via pdfplumber wcwidth==0.1.8 # via prompt-toolkit webencodings==0.5.1 # via bleach -wrapt==1.11.2 +wrapt==1.17.3 # via # -r requirements/base.in # aiobotocore -yarl==1.4.2 + # deprecated + # smart-open +yarl==1.23.0 # via aiohttp +zipp==3.19.1 + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/local.in b/requirements/local.in index 843be959..784fd276 100644 --- a/requirements/local.in +++ b/requirements/local.in @@ -5,11 +5,12 @@ ipdb Sphinx psycopg2 --no-binary psycopg2 pip-tools +pip-audit # Testing # ------------------------------------------------------------------------------ pytest -pytest-sugar +pytest-sugar>=1 pytest-mock -e git+https://github.com/elritsch/python-sharedmock#egg=sharedmock pytest-watch @@ -20,7 +21,7 @@ flake8 coverage black pylint-django -pylint-celery +pylint>=3,<4 # Django # ------------------------------------------------------------------------------ @@ -34,10 +35,7 @@ fakeredis # ------------------------------------------------------------------------------ python-Levenshtein Unidecode -opencv-python requests-mock -fasttext==0.9.3 -scikit-learn # Documentation # ------------------------------------------------------------------------------ diff --git a/requirements/local.txt b/requirements/local.txt index 660893b7..0e77b19f 100644 --- a/requirements/local.txt +++ b/requirements/local.txt @@ -1,123 +1,120 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/local.in # --no-binary psycopg2 --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr - # via -r requirements/./base.txt -e git+https://github.com/elritsch/python-sharedmock#egg=sharedmock # via -r requirements/local.in -aioboto3==9.6.0 - # via -r requirements/./base.txt -aiobotocore[boto3]==2.3.0 +aioboto3==15.5.0 + # via -r requirements/base.txt +aiobotocore[boto3]==2.25.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aioboto3 -aiohttp==3.8.1 +aiofiles==25.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aioboto3 +aiohappyeyeballs==2.6.1 + # via + # -r requirements/base.txt + # aiohttp +aiohttp==3.13.5 + # via + # -r requirements/base.txt # aiobotocore + # gcsfs aioitertools==0.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore -aiosignal==1.2.0 +aiosignal==1.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp -alabaster==0.7.12 +alabaster==1.0.0 # via sphinx amqp==5.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # kombu -argon2-cffi==19.1.0 - # via -r requirements/./base.txt -asgiref==3.7.2 +argon2-cffi==25.1.0 + # via -r requirements/base.txt +argon2-cffi-bindings==25.1.0 # via - # -r requirements/./base.txt - # django -astroid==2.11.7 + # -r requirements/base.txt + # argon2-cffi +asgiref==3.11.1 # via - # pylint - # pylint-celery + # -r requirements/base.txt + # django +astroid==3.3.11 + # via pylint asttokens==2.0.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -async-timeout==4.0.2 - # via - # -r requirements/./base.txt - # aiohttp attrs==19.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # jsonschema - # pytest -babel==2.7.0 +babel==2.18.0 # via sphinx backcall==0.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython billiard==4.2.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery -black==24.8.0 +black==26.3.1 # via -r requirements/local.in bleach==6.0.0 - # via -r requirements/./base.txt -boto==2.49.0 + # via -r requirements/base.txt +boolean-py==5.0 + # via license-expression +boto3==1.40.61 # via - # -r requirements/./base.txt - # smart-open -boto3==1.21.21 - # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore - # smart-open -botocore==1.24.21 +botocore==1.40.61 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # boto3 # s3transfer build==1.2.2.post1 # via pip-tools -cachetools==3.1.1 +cachecontrol[filecache]==0.14.4 # via - # -r requirements/./base.txt - # google-auth + # cachecontrol + # pip-audit celery==5.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # squarelet-auth -certifi==2019.6.16 +certifi==2026.2.25 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests -cffi==1.15.1 +cffi==2.0.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt + # argon2-cffi-bindings # cryptography -chardet==3.0.4 - # via - # -r requirements/./base.txt - # pdfminer-six charset-normalizer==2.1.0 # via - # -r requirements/./base.txt - # aiohttp + # -r requirements/base.txt + # pdfminer-six # requests click==8.1.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # black # celery # click-didyoumean @@ -126,15 +123,15 @@ click==8.1.7 # pip-tools click-didyoumean==0.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-plugins==1.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-repl==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery colorama==0.4.6 # via pytest-watch @@ -143,44 +140,49 @@ coverage==4.5.4 # -r requirements/local.in # django-coverage-plugin cpuprofile==1.0.1 - # via -r requirements/./base.txt -cryptography==35.0.0 + # via -r requirements/base.txt +cryptography==46.0.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-auth + # pdfminer-six # social-auth-core cssselect==1.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer cssutils==1.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer +cyclonedx-python-lib==7.6.2 + # via pip-audit daily-active-users==0.1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt decorator==4.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # ipdb # ipython -defusedxml==0.6.0 +defusedxml==0.7.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # py-serializable # python3-openid # social-auth-core -deprecation==2.1.0 +deprecated==1.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf -dill==0.3.5.1 +dill==0.4.1 # via pylint -django==4.2.2 +django==5.2.13 # via - # -r requirements/./base.txt + # -r requirements/base.txt # daily-active-users # django-appconf - # django-choices + # django-compressor # django-cors-headers # django-debug-toolbar # django-extensions @@ -195,82 +197,77 @@ django==4.2.2 # drf-nested-routers # drf-spectacular # rest-social-auth + # social-auth-app-django # squarelet-auth django-appconf==1.0.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor django-autoslug==1.9.8 - # via -r requirements/./base.txt -django-choices==1.7.2 - # via -r requirements/./base.txt -django-compressor==4.4 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-compressor==4.6.0 + # via -r requirements/base.txt django-cors-headers==3.13.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-coverage-plugin==2.0.3 # via -r requirements/local.in django-cprofile-middleware==1.0.5 - # via -r requirements/./base.txt -django-debug-toolbar==4.1.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-debug-toolbar==6.2.0 + # via -r requirements/base.txt django-environ==0.4.5 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-extensions==3.2.3 - # via -r requirements/./base.txt -django-filter==21.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-filter==25.2 + # via -r requirements/base.txt django-ipware==4.0.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-model-utils==3.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-parler==2.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest django-parler-rest==2.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-premailer==0.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-redis==5.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python-redis-lock -django-robots==5.0 - # via -r requirements/./base.txt -djangorestframework==3.14.0 +django-robots==6.1 + # via -r requirements/base.txt +djangorestframework==3.16.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest # djangorestframework-simplejwt # drf-nested-routers # drf-spectacular # rest-social-auth -djangorestframework-simplejwt==4.8.0 - # via -r requirements/./base.txt +djangorestframework-simplejwt==5.5.1 + # via -r requirements/base.txt docopt==0.6.2 # via pytest-watch -docutils==0.15.2 +docutils==0.21.2 # via sphinx dogslow==1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-flex-fields==1.0.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-nested-routers==0.91 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-spectacular==0.28.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # -r requirements/local.in -ecdsa==0.13.2 - # via - # -r requirements/./base.txt - # python-jose entrypoints==0.3 # via flake8 executing==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data factory-boy==2.12.0 # via -r requirements/local.in @@ -278,308 +275,377 @@ faker==2.0.1 # via factory-boy fakeredis==1.0.5 # via -r requirements/local.in -fasttext==0.9.3 - # via -r requirements/local.in +filelock==3.25.2 + # via cachecontrol flake8==3.7.8 # via -r requirements/local.in -frozenlist==1.3.0 +frozenlist==1.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # aiosignal -fsspec==0.5.2 +fsspec==2026.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs furl==2.1.0 - # via -r requirements/./base.txt -gcsfs==0.3.1 - # via -r requirements/./base.txt -google-api-core[grpc]==1.23.0 + # via -r requirements/base.txt +gcsfs==2026.3.0 + # via -r requirements/base.txt +google-api-core[grpc]==2.29.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-cloud-core # google-cloud-language # google-cloud-pubsub -google-auth==1.23.0 + # google-cloud-storage + # google-cloud-storage-control +google-auth==2.49.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # google-api-core # google-auth-oauthlib -google-auth-oauthlib==0.4.1 + # google-cloud-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage + # google-cloud-storage-control +google-auth-oauthlib==1.3.1 + # via + # -r requirements/base.txt + # gcsfs +google-cloud-core==2.5.1 + # via + # -r requirements/base.txt + # google-cloud-storage +google-cloud-language==2.19.0 + # via -r requirements/base.txt +google-cloud-pubsub==2.36.0 + # via -r requirements/base.txt +google-cloud-storage==3.10.1 + # via + # -r requirements/base.txt + # gcsfs +google-cloud-storage-control==1.6.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs -google-cloud-language==2.0.0 - # via -r requirements/./base.txt -google-cloud-pubsub==1.0.2 - # via -r requirements/./base.txt -googleapis-common-protos[grpc]==1.6.0 +google-crc32c==1.8.0 + # via + # -r requirements/base.txt + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.8.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-cloud-storage +googleapis-common-protos[grpc]==1.73.0 + # via + # -r requirements/base.txt # google-api-core # grpc-google-iam-v1 -grpc-google-iam-v1==0.12.3 + # grpcio-status +grpc-google-iam-v1==0.14.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-cloud-pubsub -grpcio==1.47.0 +grpcio==1.80.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language + # google-cloud-pubsub # googleapis-common-protos # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.63.0rc1 + # via + # -r requirements/base.txt + # google-api-core + # google-cloud-pubsub html2text==2020.1.16 - # via -r requirements/./base.txt -idna==2.8 + # via -r requirements/base.txt +html5lib==1.1 + # via pip-audit +idna==3.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests # yarl -imagesize==1.1.0 +imagesize==2.0.0 # via sphinx -importlib-metadata==6.0.0 - # via pluggy +importlib-metadata==8.4.0 + # via + # -r requirements/base.txt + # opentelemetry-api inflection==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular iniconfig==1.1.1 # via pytest ipdb==0.13.9 # via -r requirements/local.in -ipython==8.5.0 +ipython==8.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipdb -isort==4.3.21 +isort==6.1.0 # via pylint jedi==0.18.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -jinja2==2.10.1 +jinja2==3.1.6 # via sphinx jmespath==0.9.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiobotocore # boto3 # botocore -joblib==1.0.1 - # via scikit-learn jsonschema==3.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular kombu==5.4.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery -lazy-object-proxy==1.4.2 - # via astroid -libcst==0.4.1 - # via - # -r requirements/./base.txt - # google-cloud-language +levenshtein==0.27.3 + # via python-levenshtein +license-expression==30.4.4 + # via cyclonedx-python-lib listcrunch==1.0.0 - # via -r requirements/./base.txt -llvmlite==0.38.1 - # via - # -r requirements/./base.txt - # numba -logzio-python-handler==4.1.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt +logzio-python-handler==4.1.9 + # via -r requirements/base.txt luqum==0.8.1 - # via -r requirements/./base.txt -lxml==4.9.1 + # via -r requirements/base.txt +lxml==6.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf # premailer -markdown==3.4.1 - # via -r requirements/./base.txt -markupsafe==1.1.1 - # via jinja2 +markdown==3.8.1 + # via -r requirements/base.txt +markdown-it-py==4.0.0 + # via rich +markupsafe==3.0.3 + # via + # jinja2 + # werkzeug matplotlib-inline==0.1.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython mccabe==0.6.1 # via # flake8 # pylint -multidict==4.7.5 +mdurl==0.1.2 + # via markdown-it-py +msgpack==1.1.2 + # via cachecontrol +multidict==6.7.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiobotocore # aiohttp # yarl mypy-extensions==0.4.3 + # via black +numpy==2.2.6 # via - # -r requirements/./base.txt - # black - # typing-inspect -numba==0.55.2 - # via -r requirements/./base.txt -numpy==1.22.4 - # via - # -r requirements/./base.txt - # fasttext - # numba - # opencv-python - # scikit-learn - # scipy + # -r requirements/base.txt + # opencv-python-headless oauthlib==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests-oauthlib # social-auth-core -opencv-python==4.6.0.66 - # via -r requirements/local.in +opencv-python-headless==4.13.0.92 + # via -r requirements/base.txt +opentelemetry-api==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub +opentelemetry-semantic-conventions==0.61b0 + # via + # -r requirements/base.txt + # opentelemetry-sdk orderedmultidict==1.0.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # furl +packageurl-python==0.17.6 + # via cyclonedx-python-lib packaging==24.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # black # build - # deprecation # pikepdf + # pip-audit + # pip-requirements-parser # pytest - # pytest-sugar # sphinx + # wheel parso==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jedi -pathspec==0.9.0 +pathspec==1.0.4 # via black -pdfminer-six==20200517 +pdfminer-six==20251230 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber -pdfplumber==0.5.28 - # via -r requirements/./base.txt +pdfplumber==0.11.9 + # via -r requirements/base.txt pebble==4.5.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt pexpect==4.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython pickleshare==0.7.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -pikepdf==5.4.0 - # via -r requirements/./base.txt -pillow==9.2.0 +pikepdf==10.5.1 + # via -r requirements/base.txt +pillow==12.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber # pikepdf -pip-tools==7.4.1 +pip-api==0.0.34 + # via pip-audit +pip-audit==2.7.3 + # via -r requirements/local.in +pip-requirements-parser==32.0.1 + # via pip-audit +pip-tools==7.5.3 # via -r requirements/local.in platformdirs==2.5.2 # via # black # pylint -pluggy==0.12.0 +pluggy==1.6.0 # via pytest ply==3.11 # via - # -r requirements/./base.txt + # -r requirements/base.txt # luqum premailer==3.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-premailer prompt-toolkit==3.0.38 # via - # -r requirements/./base.txt + # -r requirements/base.txt # click-repl # ipython -proto-plus==1.13.0 +propcache==0.4.1 + # via + # -r requirements/base.txt + # aiohttp + # yarl +proto-plus==1.27.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-api-core # google-cloud-language -protobuf==4.25.2 + # google-cloud-pubsub + # google-cloud-storage-control +protobuf==5.29.6 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status # logzio-python-handler # proto-plus psycopg2==2.9.6 # via -r requirements/local.in ptyprocess==0.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pexpect pure-eval==0.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -py==1.8.2 - # via pytest +py-serializable==1.1.2 + # via cyclonedx-python-lib pyasn1==0.4.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pyasn1-modules - # python-jose - # rsa pyasn1-modules==0.2.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth -pybind11==2.6.2 - # via fasttext pycodestyle==2.5.0 # via flake8 pycparser==2.19 # via - # -r requirements/./base.txt + # -r requirements/base.txt # cffi -pycryptodome==3.10.1 - # via - # -r requirements/./base.txt - # pdfminer-six pyflakes==2.1.1 # via flake8 -pygments==2.5.2 +pygments==2.20.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython + # pytest + # rich # sphinx -pyjwt==2.3.0 +pyjwt==2.12.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # djangorestframework-simplejwt # social-auth-core -pylint==2.14.5 +pylint==3.3.9 # via - # pylint-celery + # -r requirements/local.in # pylint-django # pylint-plugin-utils -pylint-celery==0.3 - # via -r requirements/local.in -pylint-django==2.5.3 +pylint-django==2.7.0 # via -r requirements/local.in -pylint-plugin-utils==0.7 - # via - # pylint-celery - # pylint-django +pylint-plugin-utils==0.9.0 + # via pylint-django pymupdf==1.25.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt +pyparsing==3.3.2 + # via pip-requirements-parser +pypdfium2==5.7.0 + # via + # -r requirements/base.txt + # pdfplumber pyproject-hooks==1.2.0 # via # build # pip-tools pyrsistent==0.18.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jsonschema -pytest==7.1.2 +pysolr==3.11.0 + # via -r requirements/base.txt +pytest==9.0.3 # via # -r requirements/local.in # pytest-django @@ -590,62 +656,60 @@ pytest-django==4.5.2 # via -r requirements/local.in pytest-mock==3.8.2 # via -r requirements/local.in -pytest-sugar==0.9.2 +pytest-sugar==1.1.1 # via -r requirements/local.in pytest-watch==4.2.0 # via -r requirements/local.in -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiobotocore # botocore # celery # faker -python-jose==3.3.0 - # via - # -r requirements/./base.txt - # social-auth-core -python-levenshtein==0.12.0 +python-levenshtein==0.27.3 # via -r requirements/local.in python-redis-lock[django]==3.3.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt python-slugify==3.0.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt python3-openid==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # social-auth-core +pytokens==0.4.1 + # via black pytz==2022.1 + # via -r requirements/base.txt +pyyaml==6.0.3 # via - # -r requirements/./base.txt - # babel - # djangorestframework - # google-api-core -pyyaml==5.3.1 - # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular - # libcst -rcssmin==1.1.1 +rapidfuzz==3.14.5 + # via levenshtein +rcssmin==1.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor redis==3.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-redis # fakeredis # python-redis-lock -requests==2.28.1 +requests==2.33.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # cachecontrol # gcsfs # google-api-core + # google-cloud-storage # logzio-python-handler + # pip-audit # premailer # pysolr # requests-mock # requests-oauthlib - # smart-open # social-auth-core # sphinx # squarelet-auth @@ -653,184 +717,160 @@ requests-mock==1.9.3 # via -r requirements/local.in requests-oauthlib==1.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth-oauthlib # social-auth-core -rest-social-auth==8.1.0 - # via -r requirements/./base.txt -rjsmin==1.2.1 +rest-social-auth==9.0.0 + # via -r requirements/base.txt +rich==14.3.3 + # via pip-audit +rjsmin==1.2.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor -rsa==4.0 - # via - # -r requirements/./base.txt - # google-auth - # python-jose rules==2.1 - # via -r requirements/./base.txt -s3transfer==0.5.2 + # via -r requirements/base.txt +s3transfer==0.14.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # boto3 -scikit-learn==1.1.1 - # via -r requirements/local.in -scipy==1.8.1 - # via - # -r requirements/./base.txt - # scikit-learn -six==1.15.0 +six==1.17.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt # asttokens # bleach # django-appconf - # django-choices # django-coverage-plugin # faker # fakeredis # furl - # google-api-core - # google-auth - # grpcio + # html5lib # jsonschema # orderedmultidict # python-dateutil # requests-mock -smart-open==1.8.4 - # via -r requirements/./base.txt -snowballstemmer==1.9.1 +smart-open==7.5.1 + # via -r requirements/base.txt +snowballstemmer==3.0.1 # via sphinx -social-auth-app-django==5.0.0 +social-auth-app-django==5.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth -social-auth-core[openidconnect]==4.4.2 +social-auth-core[openidconnect]==4.7.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth # social-auth-app-django # squarelet-auth sortedcontainers==2.4.0 # via - # -r requirements/./base.txt + # cyclonedx-python-lib # fakeredis - # pdfminer-six -sphinx==2.2.0 +sphinx==8.1.3 # via -r requirements/local.in -sphinxcontrib-applehelp==1.0.1 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.1 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==1.0.2 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.2 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.3 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -sqlparse==0.4.4 +sqlparse==0.5.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django # django-debug-toolbar squarelet-auth==0.1.14 - # via -r requirements/./base.txt + # via -r requirements/base.txt stack-data==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -termcolor==1.1.0 +termcolor==3.3.0 # via pytest-sugar text-unidecode==1.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # faker # python-slugify -threadpoolctl==2.2.0 - # via scikit-learn toml==0.10.2 - # via ipdb -tomli==2.0.1 # via - # black - # build - # pip-tools - # pylint - # pytest + # ipdb + # pip-audit tomlkit==0.11.4 # via pylint traitlets==5.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython # matplotlib-inline typing-extensions==4.15.0 # via - # -r requirements/./base.txt - # asgiref - # black - # libcst - # typing-inspect -typing-inspect==0.6.0 - # via - # -r requirements/./base.txt - # libcst + # -r requirements/base.txt + # aiosignal + # grpcio + # opentelemetry-api + # opentelemetry-sdk + # opentelemetry-semantic-conventions tzdata==2024.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery # kombu unidecode==1.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # -r requirements/local.in uritemplate==4.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular -urllib3==1.26.10 +urllib3==2.6.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # botocore # requests vine==5.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # amqp # celery # kombu -wand==0.6.6 - # via - # -r requirements/./base.txt - # pdfplumber watchdog==6.0.0 # via pytest-watch wcwidth==0.1.8 # via - # -r requirements/./base.txt + # -r requirements/base.txt # prompt-toolkit webencodings==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # bleach -werkzeug==2.1.2 + # html5lib +werkzeug==3.1.8 # via -r requirements/local.in -wheel==0.45.1 +wheel==0.46.2 # via pip-tools -wrapt==1.11.2 +wrapt==1.17.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore - # astroid -yarl==1.4.2 + # deprecated + # smart-open +yarl==1.23.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp -zipp==3.11.0 - # via importlib-metadata +zipp==3.19.1 + # via + # -r requirements/base.txt + # importlib-metadata # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements/production.in b/requirements/production.in index ec9a1a87..e292b828 100644 --- a/requirements/production.in +++ b/requirements/production.in @@ -2,7 +2,6 @@ gunicorn psycopg2 --no-binary psycopg2 -Collectfast sentry-sdk scout-apm @@ -12,4 +11,5 @@ django-storages[boto3] django-anymail[mailgun] django-celery-email +# Documentation drf-spectacular \ No newline at end of file diff --git a/requirements/production.txt b/requirements/production.txt index ded6e691..5c5da4a4 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -1,173 +1,163 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements/production.in # --no-binary psycopg2 --e git+https://github.com/MuckRock/pysolr.git@958db93824d6ba58c3183cee43f8cfd8c26165ba#egg=pysolr - # via -r requirements/./base.txt -aioboto3==9.6.0 - # via -r requirements/./base.txt -aiobotocore[boto3]==2.3.0 +aioboto3==15.5.0 + # via -r requirements/base.txt +aiobotocore[boto3]==2.25.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aioboto3 -aiohttp==3.8.1 +aiofiles==25.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aioboto3 +aiohappyeyeballs==2.6.1 + # via + # -r requirements/base.txt + # aiohttp +aiohttp==3.13.5 + # via + # -r requirements/base.txt # aiobotocore + # gcsfs aioitertools==0.10.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore -aiosignal==1.2.0 +aiosignal==1.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp amqp==5.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # kombu -argon2-cffi==19.1.0 - # via -r requirements/./base.txt -asgiref==3.7.2 +argon2-cffi==25.1.0 + # via -r requirements/base.txt +argon2-cffi-bindings==25.1.0 + # via + # -r requirements/base.txt + # argon2-cffi +asgiref==3.11.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django # scout-apm asttokens==2.0.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -async-timeout==4.0.2 - # via - # -r requirements/./base.txt - # aiohttp attrs==19.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # jsonschema backcall==0.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython billiard==4.2.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery bleach==6.0.0 - # via -r requirements/./base.txt -boto==2.49.0 + # via -r requirements/base.txt +boto3==1.40.61 # via - # -r requirements/./base.txt - # smart-open -boto3==1.21.21 - # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # django-storages - # smart-open -botocore==1.24.21 +botocore==1.40.61 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore # boto3 # s3transfer -cachetools==3.1.1 - # via - # -r requirements/./base.txt - # google-auth celery==5.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-celery-email # squarelet-auth -certifi==2019.6.16 +certifi==2026.2.25 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests + # scout-apm # sentry-sdk - # urllib3 -cffi==1.15.1 +cffi==2.0.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt + # argon2-cffi-bindings # cryptography -chardet==3.0.4 - # via - # -r requirements/./base.txt - # pdfminer-six charset-normalizer==2.1.0 # via - # -r requirements/./base.txt - # aiohttp + # -r requirements/base.txt + # pdfminer-six # requests click==8.1.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery # click-didyoumean # click-plugins # click-repl click-didyoumean==0.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-plugins==1.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery click-repl==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery -collectfast==2.2.0 - # via -r requirements/production.in cpuprofile==1.0.1 - # via -r requirements/./base.txt -cryptography==35.0.0 + # via -r requirements/base.txt +cryptography==46.0.7 # via - # -r requirements/./base.txt - # django-anymail - # pyopenssl + # -r requirements/base.txt + # google-auth + # pdfminer-six # social-auth-core - # urllib3 cssselect==1.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer cssutils==1.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # premailer daily-active-users==0.1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt decorator==4.4.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # ipython -defusedxml==0.6.0 +defusedxml==0.7.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python3-openid # social-auth-core -deprecation==2.1.0 +deprecated==1.3.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf -django==4.2.2 +django==5.2.13 # via - # -r requirements/./base.txt - # collectfast + # -r requirements/base.txt # daily-active-users # django-anymail # django-appconf # django-celery-email - # django-choices + # django-compressor # django-cors-headers # django-debug-toolbar # django-extensions @@ -183,268 +173,318 @@ django==4.2.2 # drf-nested-routers # drf-spectacular # rest-social-auth + # social-auth-app-django # squarelet-auth -django-anymail[mailgun]==10.0 +django-anymail[mailgun]==14.0 # via -r requirements/production.in django-appconf==1.0.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-celery-email # django-compressor django-autoslug==1.9.8 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-celery-email==3.0.0 # via -r requirements/production.in -django-choices==1.7.2 - # via -r requirements/./base.txt -django-compressor==4.4 - # via -r requirements/./base.txt +django-compressor==4.6.0 + # via -r requirements/base.txt django-cors-headers==3.13.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-cprofile-middleware==1.0.5 - # via -r requirements/./base.txt -django-debug-toolbar==4.1.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-debug-toolbar==6.2.0 + # via -r requirements/base.txt django-environ==0.4.5 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-extensions==3.2.3 - # via -r requirements/./base.txt -django-filter==21.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt +django-filter==25.2 + # via -r requirements/base.txt django-ipware==4.0.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-model-utils==3.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-parler==2.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest django-parler-rest==2.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-premailer==0.2.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt django-redis==5.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python-redis-lock -django-robots==5.0 - # via -r requirements/./base.txt -django-storages[boto3]==1.12.3 - # via - # -r requirements/production.in - # collectfast -djangorestframework==3.14.0 +django-robots==6.1 + # via -r requirements/base.txt +django-storages[boto3]==1.14.6 + # via -r requirements/production.in +djangorestframework==3.16.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-parler-rest # djangorestframework-simplejwt # drf-nested-routers # drf-spectacular # rest-social-auth -djangorestframework-simplejwt==4.8.0 - # via -r requirements/./base.txt +djangorestframework-simplejwt==5.5.1 + # via -r requirements/base.txt dogslow==1.2 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-flex-fields==1.0.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-nested-routers==0.91 - # via -r requirements/./base.txt + # via -r requirements/base.txt drf-spectacular==0.28.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # -r requirements/production.in -ecdsa==0.13.2 - # via - # -r requirements/./base.txt - # python-jose executing==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data -frozenlist==1.3.0 +frozenlist==1.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp # aiosignal -fsspec==0.5.2 +fsspec==2026.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs furl==2.1.0 - # via -r requirements/./base.txt -gcsfs==0.3.1 - # via -r requirements/./base.txt -google-api-core[grpc]==1.23.0 + # via -r requirements/base.txt +gcsfs==2026.3.0 + # via -r requirements/base.txt +google-api-core[grpc]==2.29.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # google-cloud-core # google-cloud-language # google-cloud-pubsub -google-auth==1.23.0 + # google-cloud-storage + # google-cloud-storage-control +google-auth==2.49.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs # google-api-core # google-auth-oauthlib -google-auth-oauthlib==0.4.1 + # google-cloud-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage + # google-cloud-storage-control +google-auth-oauthlib==1.3.1 + # via + # -r requirements/base.txt + # gcsfs +google-cloud-core==2.5.1 + # via + # -r requirements/base.txt + # google-cloud-storage +google-cloud-language==2.19.0 + # via -r requirements/base.txt +google-cloud-pubsub==2.36.0 + # via -r requirements/base.txt +google-cloud-storage==3.10.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # gcsfs -google-cloud-language==2.0.0 - # via -r requirements/./base.txt -google-cloud-pubsub==1.0.2 - # via -r requirements/./base.txt -googleapis-common-protos[grpc]==1.6.0 +google-cloud-storage-control==1.6.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # gcsfs +google-crc32c==1.8.0 + # via + # -r requirements/base.txt + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.8.2 + # via + # -r requirements/base.txt + # google-cloud-storage +googleapis-common-protos[grpc]==1.73.0 + # via + # -r requirements/base.txt # google-api-core # grpc-google-iam-v1 -grpc-google-iam-v1==0.12.3 + # grpcio-status +grpc-google-iam-v1==0.14.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-cloud-pubsub -grpcio==1.47.0 +grpcio==1.80.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language + # google-cloud-pubsub # googleapis-common-protos # grpc-google-iam-v1 + # grpcio-status +grpcio-status==1.63.0rc1 + # via + # -r requirements/base.txt + # google-api-core + # google-cloud-pubsub gunicorn==20.1.0 # via -r requirements/production.in html2text==2020.1.16 - # via -r requirements/./base.txt -idna==2.8 + # via -r requirements/base.txt +idna==3.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # django-anymail # requests - # urllib3 # yarl +importlib-metadata==8.4.0 + # via + # -r requirements/base.txt + # opentelemetry-api inflection==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular -ipython==8.5.0 - # via -r requirements/./base.txt +ipython==8.10.0 + # via -r requirements/base.txt jedi==0.18.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython jmespath==0.9.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiobotocore # boto3 # botocore jsonschema==3.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular kombu==5.4.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery -libcst==0.4.1 - # via - # -r requirements/./base.txt - # google-cloud-language listcrunch==1.0.0 - # via -r requirements/./base.txt -llvmlite==0.38.1 - # via - # -r requirements/./base.txt - # numba -logzio-python-handler==4.1.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt +logzio-python-handler==4.1.9 + # via -r requirements/base.txt luqum==0.8.1 - # via -r requirements/./base.txt -lxml==4.9.1 + # via -r requirements/base.txt +lxml==6.0.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pikepdf # premailer -markdown==3.4.1 - # via -r requirements/./base.txt +markdown==3.8.1 + # via -r requirements/base.txt matplotlib-inline==0.1.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -multidict==4.7.5 +multidict==6.7.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiobotocore # aiohttp # yarl -mypy-extensions==0.4.3 - # via - # -r requirements/./base.txt - # typing-inspect -numba==0.55.2 - # via -r requirements/./base.txt -numpy==1.22.4 +numpy==2.2.6 # via - # -r requirements/./base.txt - # numba - # scipy + # -r requirements/base.txt + # opencv-python-headless oauthlib==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # requests-oauthlib # social-auth-core +opencv-python-headless==4.13.0.92 + # via -r requirements/base.txt +opentelemetry-api==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-sdk==1.40.0 + # via + # -r requirements/base.txt + # google-cloud-pubsub +opentelemetry-semantic-conventions==0.61b0 + # via + # -r requirements/base.txt + # opentelemetry-sdk orderedmultidict==1.0.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # furl packaging==24.1 # via - # -r requirements/./base.txt - # deprecation + # -r requirements/base.txt # pikepdf parso==0.8.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jedi -pdfminer-six==20200517 +pdfminer-six==20251230 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber -pdfplumber==0.5.28 - # via -r requirements/./base.txt +pdfplumber==0.11.9 + # via -r requirements/base.txt pebble==4.5.0 - # via -r requirements/./base.txt + # via -r requirements/base.txt pexpect==4.8.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython pickleshare==0.7.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -pikepdf==5.4.0 - # via -r requirements/./base.txt -pillow==9.2.0 +pikepdf==10.5.1 + # via -r requirements/base.txt +pillow==12.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pdfplumber # pikepdf ply==3.11 # via - # -r requirements/./base.txt + # -r requirements/base.txt # luqum premailer==3.0.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-premailer prompt-toolkit==3.0.38 # via - # -r requirements/./base.txt + # -r requirements/base.txt # click-repl # ipython -proto-plus==1.13.0 +propcache==0.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiohttp + # yarl +proto-plus==1.27.1 + # via + # -r requirements/base.txt + # google-api-core # google-cloud-language -protobuf==4.25.2 + # google-cloud-pubsub + # google-cloud-storage-control +protobuf==5.29.6 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-api-core + # google-cloud-language + # google-cloud-pubsub + # google-cloud-storage-control # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status # logzio-python-handler # proto-plus psutil==5.7.3 @@ -453,201 +493,172 @@ psycopg2==2.9.6 # via -r requirements/production.in ptyprocess==0.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pexpect pure-eval==0.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # stack-data pyasn1==0.4.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # pyasn1-modules - # python-jose - # rsa pyasn1-modules==0.2.7 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth pycparser==2.19 # via - # -r requirements/./base.txt + # -r requirements/base.txt # cffi -pycryptodome==3.10.1 - # via - # -r requirements/./base.txt - # pdfminer-six -pygments==2.5.2 +pygments==2.20.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython -pyjwt==2.3.0 +pyjwt==2.12.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # djangorestframework-simplejwt # social-auth-core pymupdf==1.25.3 - # via -r requirements/./base.txt -pyopenssl==19.1.0 - # via urllib3 + # via -r requirements/base.txt +pypdfium2==5.7.0 + # via + # -r requirements/base.txt + # pdfplumber pyrsistent==0.18.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # jsonschema -python-dateutil==2.8.2 +pysolr==3.11.0 + # via -r requirements/base.txt +python-dateutil==2.9.0.post0 # via - # -r requirements/./base.txt + # -r requirements/base.txt + # aiobotocore # botocore # celery -python-jose==3.3.0 - # via - # -r requirements/./base.txt - # social-auth-core python-redis-lock[django]==3.3.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt python-slugify==3.0.3 - # via -r requirements/./base.txt + # via -r requirements/base.txt python3-openid==3.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # social-auth-core pytz==2022.1 + # via -r requirements/base.txt +pyyaml==6.0.3 # via - # -r requirements/./base.txt - # djangorestframework - # google-api-core -pyyaml==5.3.1 - # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular - # libcst -rcssmin==1.1.1 +rcssmin==1.2.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor redis==3.4.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-redis # python-redis-lock -requests==2.28.1 +requests==2.33.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-anymail # gcsfs # google-api-core + # google-cloud-storage # logzio-python-handler # premailer # pysolr # requests-oauthlib - # smart-open # social-auth-core # squarelet-auth requests-oauthlib==1.2.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # google-auth-oauthlib # social-auth-core -rest-social-auth==8.1.0 - # via -r requirements/./base.txt -rjsmin==1.2.1 +rest-social-auth==9.0.0 + # via -r requirements/base.txt +rjsmin==1.2.5 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django-compressor -rsa==4.0 - # via - # -r requirements/./base.txt - # google-auth - # python-jose rules==2.1 - # via -r requirements/./base.txt -s3transfer==0.5.2 + # via -r requirements/base.txt +s3transfer==0.14.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # boto3 -scipy==1.8.1 - # via -r requirements/./base.txt -scout-apm==2.17.0 +scout-apm==3.5.3 # via -r requirements/production.in -sentry-sdk==1.0.0 +sentry-sdk==2.57.0 # via -r requirements/production.in -six==1.15.0 +six==1.17.0 # via - # -r requirements/./base.txt - # argon2-cffi + # -r requirements/base.txt # asttokens # bleach # django-appconf - # django-choices # furl - # google-api-core - # google-auth - # grpcio # jsonschema # orderedmultidict - # pyopenssl # python-dateutil -smart-open==1.8.4 - # via -r requirements/./base.txt -social-auth-app-django==5.0.0 +smart-open==7.5.1 + # via -r requirements/base.txt +social-auth-app-django==5.6.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth -social-auth-core[openidconnect]==4.4.2 +social-auth-core[openidconnect]==4.7.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # rest-social-auth # social-auth-app-django # squarelet-auth -sortedcontainers==2.4.0 - # via - # -r requirements/./base.txt - # pdfminer-six -sqlparse==0.4.4 +sqlparse==0.5.4 # via - # -r requirements/./base.txt + # -r requirements/base.txt # django # django-debug-toolbar squarelet-auth==0.1.14 - # via -r requirements/./base.txt + # via -r requirements/base.txt stack-data==0.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython text-unidecode==1.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # python-slugify traitlets==5.3.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # ipython # matplotlib-inline typing-extensions==4.15.0 # via - # -r requirements/./base.txt - # asgiref - # collectfast - # libcst - # typing-inspect -typing-inspect==0.6.0 - # via - # -r requirements/./base.txt - # libcst + # -r requirements/base.txt + # aiosignal + # grpcio + # opentelemetry-api + # opentelemetry-sdk + # opentelemetry-semantic-conventions tzdata==2024.2 # via - # -r requirements/./base.txt + # -r requirements/base.txt # celery # kombu unidecode==1.1.1 - # via -r requirements/./base.txt + # via -r requirements/base.txt uritemplate==4.1.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # drf-spectacular -urllib3[secure]==1.26.10 +urllib3==2.6.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # botocore # django-anymail # requests @@ -655,31 +666,33 @@ urllib3[secure]==1.26.10 # sentry-sdk vine==5.1.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # amqp # celery # kombu -wand==0.6.6 - # via - # -r requirements/./base.txt - # pdfplumber wcwidth==0.1.8 # via - # -r requirements/./base.txt + # -r requirements/base.txt # prompt-toolkit webencodings==0.5.1 # via - # -r requirements/./base.txt + # -r requirements/base.txt # bleach -wrapt==1.11.2 +wrapt==1.17.3 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiobotocore + # deprecated # scout-apm -yarl==1.4.2 + # smart-open +yarl==1.23.0 # via - # -r requirements/./base.txt + # -r requirements/base.txt # aiohttp +zipp==3.19.1 + # via + # -r requirements/base.txt + # importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/tasks.py b/tasks.py index 32b8b283..c9aa4e3b 100755 --- a/tasks.py +++ b/tasks.py @@ -31,7 +31,7 @@ def test( slow_switch = "" if slow else '-m "not slow"' warnings = "-e PYTHONWARNINGS=always" if warnings else "" filters = f"-k {keywords}" if keywords else "" - + c.run(f"{COMPOSE_PREFIX} up -d documentcloud_test_solr") c.run( COMPOSE_RUN_OPT_USER.format( opt=f"-e DJANGO_SETTINGS_MODULE=config.settings.test {warnings}", @@ -120,9 +120,9 @@ def format(c): cmd="black documentcloud --exclude migrations && " "black config/urls.py && " "black config/settings && " - "isort -rc documentcloud && " - "isort -rc config/urls.py && " - "isort -rc config/settings" + "isort documentcloud && " + "isort config/urls.py && " + "isort config/settings" ) ) @@ -233,6 +233,10 @@ def download_tesseract_data(c): """Download Tesseract data files. Needed to be able to do OCR locally.""" c.run("cd config/aws/lambda; ./build.sh") +@task +def initialize_minio(c): + """Initialize Minio bucket and policies for local development""" + c.run(DJANGO_RUN.format(cmd="python manage.py initialize_minio")) @task def deploy_lambdas(c, staging=False):